In [166]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings
import xgboost as xgb
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore")

In [167]:
# Thanks and credited to https://www.kaggle.com/gemartin who created this wonderful mem reducer
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() 
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() 
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [168]:
train = pd.read_csv('train_V2.csv')
train  = reduce_mem_usage(train)

test = pd.read_csv('test_V2.csv')
test  = reduce_mem_usage(test)

Memory usage of dataframe is 1031696192.00 MB
Memory usage after optimization is: 523044550.00 MB
Decreased by 49.3%
Memory usage of dataframe is 433255056.00 MB
Memory usage after optimization is: 236250902.00 MB
Decreased by 45.5%


In [169]:
def reduce_match_type(df):
    mapper = lambda x: 'solo' if ('solo' in x) else 'duo' if ('duo' in x) or ('crash' in x) else 'squad'
    df['matchType'] = df['matchType'].apply(mapper)
    return df

def headshotKills_over_kills(df):
    df['headshotKills_over_kills'] = df['headshotKills'] / df['kills']
    df['headshotKills_over_kills'].fillna(0, inplace=True)
    return df

def total_dist(df):
    df['totalDistance'] = df['rideDistance'] + df['walkDistance'] + df['swimDistance']
    return df

def items(df):
    df['items'] = df['heals'] + df['boosts']
    return df

def players_in_team(df):
    agg = df.groupby(['groupId']).size().to_frame('players_in_team')
    return df.merge(agg, how='left', on=['groupId'])

def killPlace_over_maxPlace(df):
    df['killPlace_over_maxPlace'] = df['killPlace'] / df['maxPlace']
    df['killPlace_over_maxPlace'].fillna(0, inplace=True)
    df['killPlace_over_maxPlace'].replace(np.inf, 0, inplace=True)
    return df

def walkDistance_over_heals(df):
    df['walkDistance_over_heals'] = df['walkDistance'] / df['heals']
    df['walkDistance_over_heals'].fillna(0, inplace=True)
    df['walkDistance_over_heals'].replace(np.inf, 0, inplace=True)
    return df

def walkDistance_over_kills(df):
    df['walkDistance_over_kills'] = df['walkDistance'] / df['kills']
    df['walkDistance_over_kills'].fillna(0, inplace=True)
    df['walkDistance_over_kills'].replace(np.inf, 0, inplace=True)
    return df

def teamwork(df):
    df['teamwork'] = df['assists'] + df['revives']
    return df

def normalize(df):
    df['playersJoined'] = df.groupby('matchId')['matchId'].transform('count')

    # Create normalized features
    df['killsNorm'] = df['kills']*((100-df['playersJoined'])/100 + 1)
    df['damageDealtNorm'] = df['damageDealt']*((100-df['playersJoined'])/100 + 1)
    df['maxPlaceNorm'] = df['maxPlace']*((100-df['playersJoined'])/100 + 1)
    df['matchDurationNorm'] = df['matchDuration']*((100-df['playersJoined'])/100 + 1)
    
    return df

def other(df):
    return df.drop(['groupId', 'matchId'], axis=1)


functions = [headshotKills_over_kills, 
             total_dist,
             items,
             players_in_team,
             killPlace_over_maxPlace,
             walkDistance_over_heals,
             walkDistance_over_kills,
             teamwork,
             reduce_match_type,
             normalize,
             other]

def apply_feature_engineering(functions, df):

    for function in functions:
        df = function(df)
        
    return df


def get_solo(df):
    return df.loc[df['matchType'] == 'solo'].drop(['matchType'], axis=1)

def get_duo(df):
    return df.loc[df['matchType'] == 'duo'].drop(['matchType'], axis=1)

def get_squad(df):
    return df.loc[df['matchType'] == 'squad'].drop(['matchType'], axis=1)

In [170]:
train_fixed = apply_feature_engineering(functions, train)

#TODO use better parameters
#TODO train on full data not subset
train_solo = get_solo(train_fixed)
solo_subset = train_solo.sample(frac=0.05, random_state=42)

train_duo = get_duo(train_fixed)
duo_subset = train_solo.sample(frac=0.05, random_state=42)

train_squad = get_squad(train_fixed)
squad_subset = train_solo.sample(frac=0.05, random_state=42)

#~~~~~~~~~~~~~~~~~~~~~~

x_train_solo = solo_subset.drop(['Id', 'winPlacePerc'], axis=1)
y_train_solo = solo_subset['winPlacePerc']

x_train_duo = duo_subset.drop(['Id', 'winPlacePerc'], axis=1)
y_train_duo = duo_subset['winPlacePerc']

x_train_squad = squad_subset.drop(['Id', 'winPlacePerc'], axis=1)
y_train_squad = squad_subset['winPlacePerc']

#~~~~~~~~~~~~~~~~~~~~~~
#~~~~~~~~~~~~~~~~~~~~~~

test_fixed = apply_feature_engineering(functions, test)

x_test_solo = get_solo(test_fixed)
Id_solo = x_test_solo['Id']
x_test_solo.drop(['Id'], axis=1, inplace=True)

x_test_duo = get_duo(test_fixed)
Id_duo = x_test_duo['Id']
x_test_duo.drop(['Id'], axis=1, inplace=True)

x_test_squad = get_squad(test_fixed)
Id_squad = x_test_squad['Id']
x_test_squad.drop(['Id'], axis=1, inplace=True)

In [171]:
xgbregressor_solo = xgb.XGBRegressor(learning_rate=0.11,
                                     max_depth = 5,
                                     subsample = 0.85,
                                     nthread=8,
                                     colsample_bytree = 0.7,
                                     n_estimators=20000)

xgbregressor_duo = xgb.XGBRegressor(learning_rate=0.11,
                                     max_depth = 5,
                                     subsample = 0.85,
                                     nthread=8,
                                    colsample_bytree = 0.7,
                                     n_estimators=20000)

xgbregressor_squad = xgb.XGBRegressor(learning_rate=0.11,
                                     max_depth = 5,
                                     subsample = 0.85,
                                     nthread=8,
                                      colsample_bytree = 0.7,
                                     n_estimators=20000)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

X_train1, X_test1, y_train1, y_test1 = train_test_split(x_train_solo, y_train_solo, test_size=0.33, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(x_train_duo, y_train_duo, test_size=0.33, random_state=42)
X_train3, X_test3, y_train3, y_test3 = train_test_split(x_train_squad, y_train_squad, test_size=0.33, random_state=42)


xgbregressor_solo.fit(X_train1, y_train1, eval_set = [(X_test1, y_test1)], eval_metric='mae', early_stopping_rounds=200)
y_pred_solo = xgbregressor.predict(x_test_solo)

xgbregressor_duo.fit(X_train2, y_train2, eval_set = [(X_test2, y_test2)], eval_metric='mae', early_stopping_rounds=200)
y_pred_duo = xgbregressor.predict(x_test_duo)

xgbregressor_squad.fit(X_train3, y_train3, eval_set = [(X_test3, y_test3)], eval_metric='mae', early_stopping_rounds=200)
y_pred_squad = xgbregressor.predict(x_test_squad)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[0]	validation_0-mae:0.231246
Will train until validation_0-mae hasn't improved in 200 rounds.
[1]	validation_0-mae:0.208661
[2]	validation_0-mae:0.188669
[3]	validation_0-mae:0.171477
[4]	validation_0-mae:0.155571
[5]	validation_0-mae:0.142098
[6]	validation_0-mae:0.130141
[7]	validation_0-mae:0.119655
[8]	validation_0-mae:0.110405
[9]	validation_0-mae:0.102272
[10]	validation_0-mae:0.0954
[11]	validation_0-mae:0.0896
[12]	validation_0-mae:0.084147
[13]	validation_0-mae:0.079489
[14]	validation_0-mae:0.07516
[15]	validation_0-mae:0.071577
[16]	validation_0-mae:0.068385
[17]	validation_0-mae:0.065752
[18]	validation_0-mae:0.06348
[19]	validation_0-mae:0.061145
[20]	validation_0-mae:0.059533
[21]	validation_0-mae:0.05816
[22]	validation_0-mae:0.05669
[23]	validation_0-mae:0.055342
[24]	validation_0-mae:0.054482
[25]	validation_0-mae:0.05351
[26]	validation_0-mae:0.052596
[27]	validation_0-mae:0.051952
[28]	validation_0-mae:0.051304
[29]	validation_0-mae:0.050805
[30]	validation_0-mae:0.

[259]	validation_0-mae:0.042951
[260]	validation_0-mae:0.042953
[261]	validation_0-mae:0.042947
[262]	validation_0-mae:0.042948
[263]	validation_0-mae:0.042956
[264]	validation_0-mae:0.042957
[265]	validation_0-mae:0.04296
[266]	validation_0-mae:0.042933
[267]	validation_0-mae:0.042927
[268]	validation_0-mae:0.042933
[269]	validation_0-mae:0.042935
[270]	validation_0-mae:0.042932
[271]	validation_0-mae:0.042937
[272]	validation_0-mae:0.042939
[273]	validation_0-mae:0.042941
[274]	validation_0-mae:0.042927
[275]	validation_0-mae:0.042916
[276]	validation_0-mae:0.042915
[277]	validation_0-mae:0.042905
[278]	validation_0-mae:0.042907
[279]	validation_0-mae:0.042909
[280]	validation_0-mae:0.042908
[281]	validation_0-mae:0.042908
[282]	validation_0-mae:0.042906
[283]	validation_0-mae:0.042905
[284]	validation_0-mae:0.042902
[285]	validation_0-mae:0.042894
[286]	validation_0-mae:0.042888
[287]	validation_0-mae:0.042892
[288]	validation_0-mae:0.042891
[289]	validation_0-mae:0.042889
[290]	val

[516]	validation_0-mae:0.042828
[517]	validation_0-mae:0.042819
[518]	validation_0-mae:0.042821
[519]	validation_0-mae:0.042822
[520]	validation_0-mae:0.042821
[521]	validation_0-mae:0.042821
[522]	validation_0-mae:0.04282
[523]	validation_0-mae:0.04282
[524]	validation_0-mae:0.042827
[525]	validation_0-mae:0.042834
[526]	validation_0-mae:0.042836
[527]	validation_0-mae:0.042835
[528]	validation_0-mae:0.042839
[529]	validation_0-mae:0.04284
[530]	validation_0-mae:0.04284
[531]	validation_0-mae:0.04284
[532]	validation_0-mae:0.042837
[533]	validation_0-mae:0.042838
[534]	validation_0-mae:0.042837
[535]	validation_0-mae:0.042841
[536]	validation_0-mae:0.042844
[537]	validation_0-mae:0.042843
[538]	validation_0-mae:0.042841
[539]	validation_0-mae:0.042839
[540]	validation_0-mae:0.042833
[541]	validation_0-mae:0.042837
[542]	validation_0-mae:0.042833
[543]	validation_0-mae:0.042821
[544]	validation_0-mae:0.042824
[545]	validation_0-mae:0.042821
[546]	validation_0-mae:0.042819
[547]	validat

[176]	validation_0-mae:0.043283
[177]	validation_0-mae:0.043281
[178]	validation_0-mae:0.043282
[179]	validation_0-mae:0.043289
[180]	validation_0-mae:0.043259
[181]	validation_0-mae:0.043258
[182]	validation_0-mae:0.043263
[183]	validation_0-mae:0.043262
[184]	validation_0-mae:0.043237
[185]	validation_0-mae:0.043229
[186]	validation_0-mae:0.043194
[187]	validation_0-mae:0.043176
[188]	validation_0-mae:0.043183
[189]	validation_0-mae:0.043161
[190]	validation_0-mae:0.04315
[191]	validation_0-mae:0.043139
[192]	validation_0-mae:0.043146
[193]	validation_0-mae:0.043115
[194]	validation_0-mae:0.043103
[195]	validation_0-mae:0.043096
[196]	validation_0-mae:0.043093
[197]	validation_0-mae:0.043102
[198]	validation_0-mae:0.043092
[199]	validation_0-mae:0.043081
[200]	validation_0-mae:0.043075
[201]	validation_0-mae:0.043068
[202]	validation_0-mae:0.043066
[203]	validation_0-mae:0.043078
[204]	validation_0-mae:0.043082
[205]	validation_0-mae:0.043083
[206]	validation_0-mae:0.043073
[207]	val

[433]	validation_0-mae:0.042808
[434]	validation_0-mae:0.042814
[435]	validation_0-mae:0.042816
[436]	validation_0-mae:0.042806
[437]	validation_0-mae:0.042809
[438]	validation_0-mae:0.042813
[439]	validation_0-mae:0.042812
[440]	validation_0-mae:0.042812
[441]	validation_0-mae:0.042812
[442]	validation_0-mae:0.042807
[443]	validation_0-mae:0.042812
[444]	validation_0-mae:0.042813
[445]	validation_0-mae:0.042817
[446]	validation_0-mae:0.042812
[447]	validation_0-mae:0.04281
[448]	validation_0-mae:0.04281
[449]	validation_0-mae:0.042804
[450]	validation_0-mae:0.042801
[451]	validation_0-mae:0.042801
[452]	validation_0-mae:0.042796
[453]	validation_0-mae:0.042796
[454]	validation_0-mae:0.042798
[455]	validation_0-mae:0.042796
[456]	validation_0-mae:0.042796
[457]	validation_0-mae:0.042801
[458]	validation_0-mae:0.042795
[459]	validation_0-mae:0.042792
[460]	validation_0-mae:0.042791
[461]	validation_0-mae:0.042793
[462]	validation_0-mae:0.042789
[463]	validation_0-mae:0.042791
[464]	vali

[93]	validation_0-mae:0.044556
[94]	validation_0-mae:0.04454
[95]	validation_0-mae:0.044541
[96]	validation_0-mae:0.044495
[97]	validation_0-mae:0.044438
[98]	validation_0-mae:0.044427
[99]	validation_0-mae:0.044422
[100]	validation_0-mae:0.044415
[101]	validation_0-mae:0.044376
[102]	validation_0-mae:0.044351
[103]	validation_0-mae:0.044338
[104]	validation_0-mae:0.04426
[105]	validation_0-mae:0.044194
[106]	validation_0-mae:0.044183
[107]	validation_0-mae:0.044176
[108]	validation_0-mae:0.044152
[109]	validation_0-mae:0.044145
[110]	validation_0-mae:0.044137
[111]	validation_0-mae:0.044119
[112]	validation_0-mae:0.044083
[113]	validation_0-mae:0.044076
[114]	validation_0-mae:0.044044
[115]	validation_0-mae:0.043993
[116]	validation_0-mae:0.043956
[117]	validation_0-mae:0.043944
[118]	validation_0-mae:0.043829
[119]	validation_0-mae:0.043792
[120]	validation_0-mae:0.043787
[121]	validation_0-mae:0.043768
[122]	validation_0-mae:0.043751
[123]	validation_0-mae:0.043751
[124]	validation_

[350]	validation_0-mae:0.04281
[351]	validation_0-mae:0.042807
[352]	validation_0-mae:0.042807
[353]	validation_0-mae:0.042812
[354]	validation_0-mae:0.042815
[355]	validation_0-mae:0.042805
[356]	validation_0-mae:0.042801
[357]	validation_0-mae:0.042798
[358]	validation_0-mae:0.042802
[359]	validation_0-mae:0.042798
[360]	validation_0-mae:0.042795
[361]	validation_0-mae:0.042803
[362]	validation_0-mae:0.042805
[363]	validation_0-mae:0.042806
[364]	validation_0-mae:0.042802
[365]	validation_0-mae:0.042804
[366]	validation_0-mae:0.042802
[367]	validation_0-mae:0.042803
[368]	validation_0-mae:0.04279
[369]	validation_0-mae:0.04279
[370]	validation_0-mae:0.042798
[371]	validation_0-mae:0.042788
[372]	validation_0-mae:0.042788
[373]	validation_0-mae:0.042788
[374]	validation_0-mae:0.042788
[375]	validation_0-mae:0.042783
[376]	validation_0-mae:0.042777
[377]	validation_0-mae:0.042779
[378]	validation_0-mae:0.042784
[379]	validation_0-mae:0.042795
[380]	validation_0-mae:0.042798
[381]	valid

In [178]:
data_solo= {'Id': Id_solo, 'winPlacePerc':y_pred_solo}
data_duo ={'Id': Id_duo, 'winPlacePerc':y_pred_duo}
data_squad ={'Id': Id_squad, 'winPlacePerc':y_pred_squad}

solos = pd.DataFrame(data=data_solo)
duos = pd.DataFrame(data=data_duo)
squads = pd.DataFrame(data=data_squad)


final = solos.append([duos, squads])


final.to_csv("sample3.csv", index=False)

In [175]:
train_subset = train_fixed.sample(frac=0.05, random_state=42)

train_subset = pd.get_dummies(train_subset, columns = ['matchType'])

x_train = train_subset.drop(['Id', 'winPlacePerc'], axis=1)
y_train = train_subset['winPlacePerc']

X_train4, X_test4, y_train4, y_test4 = train_test_split(x_train, y_train, test_size=0.33, random_state=42)

xgbregressor = xgb.XGBRegressor(learning_rate=0.11,
                                     max_depth = 5,
                                     subsample = 0.85,
                                     nthread=8,
                                     colsample_bytree = 0.7,
                                     n_estimators=20000)

[0]	validation_0-mae:0.243873
Will train until validation_0-mae hasn't improved in 200 rounds.
[1]	validation_0-mae:0.221982
[2]	validation_0-mae:0.203043
[3]	validation_0-mae:0.185699
[4]	validation_0-mae:0.170771
[5]	validation_0-mae:0.157466
[6]	validation_0-mae:0.146114
[7]	validation_0-mae:0.135982
[8]	validation_0-mae:0.127164
[9]	validation_0-mae:0.119423
[10]	validation_0-mae:0.112728
[11]	validation_0-mae:0.106763
[12]	validation_0-mae:0.101707
[13]	validation_0-mae:0.096856
[14]	validation_0-mae:0.093156
[15]	validation_0-mae:0.089385
[16]	validation_0-mae:0.085852
[17]	validation_0-mae:0.083369
[18]	validation_0-mae:0.080685
[19]	validation_0-mae:0.079179
[20]	validation_0-mae:0.077166
[21]	validation_0-mae:0.075353
[22]	validation_0-mae:0.073846
[23]	validation_0-mae:0.072543
[24]	validation_0-mae:0.071678
[25]	validation_0-mae:0.070225
[26]	validation_0-mae:0.069321
[27]	validation_0-mae:0.068556
[28]	validation_0-mae:0.067949
[29]	validation_0-mae:0.06744
[30]	validation_

[259]	validation_0-mae:0.056232
[260]	validation_0-mae:0.05623
[261]	validation_0-mae:0.056213
[262]	validation_0-mae:0.056213
[263]	validation_0-mae:0.056206
[264]	validation_0-mae:0.056203
[265]	validation_0-mae:0.0562
[266]	validation_0-mae:0.056197
[267]	validation_0-mae:0.056198
[268]	validation_0-mae:0.056189
[269]	validation_0-mae:0.056175
[270]	validation_0-mae:0.056173
[271]	validation_0-mae:0.056173
[272]	validation_0-mae:0.056165
[273]	validation_0-mae:0.056165
[274]	validation_0-mae:0.056157
[275]	validation_0-mae:0.056153
[276]	validation_0-mae:0.056139
[277]	validation_0-mae:0.056133
[278]	validation_0-mae:0.05613
[279]	validation_0-mae:0.056121
[280]	validation_0-mae:0.056118
[281]	validation_0-mae:0.056115
[282]	validation_0-mae:0.056116
[283]	validation_0-mae:0.056116
[284]	validation_0-mae:0.056114
[285]	validation_0-mae:0.056109
[286]	validation_0-mae:0.056104
[287]	validation_0-mae:0.056105
[288]	validation_0-mae:0.056103
[289]	validation_0-mae:0.056097
[290]	valida

[516]	validation_0-mae:0.055663
[517]	validation_0-mae:0.055659
[518]	validation_0-mae:0.055657
[519]	validation_0-mae:0.055655
[520]	validation_0-mae:0.055653
[521]	validation_0-mae:0.055652
[522]	validation_0-mae:0.055653
[523]	validation_0-mae:0.055653
[524]	validation_0-mae:0.055645
[525]	validation_0-mae:0.055644
[526]	validation_0-mae:0.055645
[527]	validation_0-mae:0.055643
[528]	validation_0-mae:0.055637
[529]	validation_0-mae:0.055636
[530]	validation_0-mae:0.055637
[531]	validation_0-mae:0.055639
[532]	validation_0-mae:0.05564
[533]	validation_0-mae:0.05564
[534]	validation_0-mae:0.055635
[535]	validation_0-mae:0.055636
[536]	validation_0-mae:0.055634
[537]	validation_0-mae:0.05563
[538]	validation_0-mae:0.055627
[539]	validation_0-mae:0.055628
[540]	validation_0-mae:0.055627
[541]	validation_0-mae:0.055626
[542]	validation_0-mae:0.055625
[543]	validation_0-mae:0.055623
[544]	validation_0-mae:0.055616
[545]	validation_0-mae:0.055616
[546]	validation_0-mae:0.055616
[547]	valid

[773]	validation_0-mae:0.055516
[774]	validation_0-mae:0.055516
[775]	validation_0-mae:0.055517
[776]	validation_0-mae:0.055517
[777]	validation_0-mae:0.055512
[778]	validation_0-mae:0.055514
[779]	validation_0-mae:0.055514
[780]	validation_0-mae:0.055513
[781]	validation_0-mae:0.055514
[782]	validation_0-mae:0.055514
[783]	validation_0-mae:0.055514
[784]	validation_0-mae:0.055514
[785]	validation_0-mae:0.05551
[786]	validation_0-mae:0.05551
[787]	validation_0-mae:0.05551
[788]	validation_0-mae:0.055511
[789]	validation_0-mae:0.055513
[790]	validation_0-mae:0.055512
[791]	validation_0-mae:0.055513
[792]	validation_0-mae:0.055512
[793]	validation_0-mae:0.055513
[794]	validation_0-mae:0.055514
[795]	validation_0-mae:0.055514
[796]	validation_0-mae:0.055516
[797]	validation_0-mae:0.055515
[798]	validation_0-mae:0.055514
[799]	validation_0-mae:0.055513
[800]	validation_0-mae:0.055512
[801]	validation_0-mae:0.05551
[802]	validation_0-mae:0.05551
[803]	validation_0-mae:0.055501
[804]	validat

[1030]	validation_0-mae:0.055478
[1031]	validation_0-mae:0.055476
[1032]	validation_0-mae:0.055477
[1033]	validation_0-mae:0.055478
[1034]	validation_0-mae:0.05548
[1035]	validation_0-mae:0.05548
[1036]	validation_0-mae:0.05548
[1037]	validation_0-mae:0.055482
[1038]	validation_0-mae:0.055482
[1039]	validation_0-mae:0.055483
[1040]	validation_0-mae:0.055483
[1041]	validation_0-mae:0.055484
[1042]	validation_0-mae:0.055485
[1043]	validation_0-mae:0.055486
[1044]	validation_0-mae:0.055488
[1045]	validation_0-mae:0.055488
[1046]	validation_0-mae:0.055487
[1047]	validation_0-mae:0.055487
[1048]	validation_0-mae:0.055487
[1049]	validation_0-mae:0.055487
[1050]	validation_0-mae:0.055486
[1051]	validation_0-mae:0.055483
[1052]	validation_0-mae:0.055484
[1053]	validation_0-mae:0.055485
[1054]	validation_0-mae:0.055485
[1055]	validation_0-mae:0.055487
[1056]	validation_0-mae:0.055489
[1057]	validation_0-mae:0.055489
[1058]	validation_0-mae:0.055489
[1059]	validation_0-mae:0.05549
[1060]	validat

ValueError: feature_names mismatch: ['assists', 'boosts', 'damageDealt', 'DBNOs', 'headshotKills', 'heals', 'killPlace', 'killPoints', 'kills', 'killStreaks', 'longestKill', 'matchDuration', 'maxPlace', 'numGroups', 'rankPoints', 'revives', 'rideDistance', 'roadKills', 'swimDistance', 'teamKills', 'vehicleDestroys', 'walkDistance', 'weaponsAcquired', 'winPoints', 'headshotKills_over_kills', 'totalDistance', 'items', 'players_in_team', 'killPlace_over_maxPlace', 'walkDistance_over_heals', 'walkDistance_over_kills', 'teamwork', 'playersJoined', 'killsNorm', 'damageDealtNorm', 'maxPlaceNorm', 'matchDurationNorm', 'matchType_duo', 'matchType_solo', 'matchType_squad'] ['assists', 'boosts', 'damageDealt', 'DBNOs', 'headshotKills', 'heals', 'killPlace', 'killPoints', 'kills', 'killStreaks', 'longestKill', 'matchDuration', 'maxPlace', 'numGroups', 'rankPoints', 'revives', 'rideDistance', 'roadKills', 'swimDistance', 'teamKills', 'vehicleDestroys', 'walkDistance', 'weaponsAcquired', 'winPoints', 'headshotKills_over_kills', 'totalDistance', 'items', 'players_in_team', 'killPlace_over_maxPlace', 'walkDistance_over_heals', 'walkDistance_over_kills', 'teamwork', 'playersJoined', 'killsNorm', 'damageDealtNorm', 'maxPlaceNorm', 'matchDurationNorm']
expected matchType_duo, matchType_solo, matchType_squad in input data

In [176]:

x_test = pd.get_dummies(test_fixed, columns = ['matchType'])
Id = x_test['Id']
x_test.drop(['Id'], axis=1, inplace=True)



xgbregressor.fit(X_train4, y_train4, eval_set = [(X_test4, y_test4)], eval_metric='mae', early_stopping_rounds=200)

y_pred = xgbregressor.predict(x_test)

data= {'Id': Id, 'winPlacePerc':y_pred}

final = pd.DataFrame(data=data)

final.to_csv("sample2.csv", index=False)

[0]	validation_0-mae:0.243873
Will train until validation_0-mae hasn't improved in 200 rounds.
[1]	validation_0-mae:0.221982
[2]	validation_0-mae:0.203043
[3]	validation_0-mae:0.185699
[4]	validation_0-mae:0.170771
[5]	validation_0-mae:0.157466
[6]	validation_0-mae:0.146114
[7]	validation_0-mae:0.135982
[8]	validation_0-mae:0.127164
[9]	validation_0-mae:0.119423
[10]	validation_0-mae:0.112728
[11]	validation_0-mae:0.106763
[12]	validation_0-mae:0.101707
[13]	validation_0-mae:0.096856
[14]	validation_0-mae:0.093156
[15]	validation_0-mae:0.089385
[16]	validation_0-mae:0.085852
[17]	validation_0-mae:0.083369
[18]	validation_0-mae:0.080685
[19]	validation_0-mae:0.079179
[20]	validation_0-mae:0.077166
[21]	validation_0-mae:0.075353
[22]	validation_0-mae:0.073846
[23]	validation_0-mae:0.072543
[24]	validation_0-mae:0.071678
[25]	validation_0-mae:0.070225
[26]	validation_0-mae:0.069321
[27]	validation_0-mae:0.068556
[28]	validation_0-mae:0.067949
[29]	validation_0-mae:0.06744
[30]	validation_

[259]	validation_0-mae:0.056232
[260]	validation_0-mae:0.05623
[261]	validation_0-mae:0.056213
[262]	validation_0-mae:0.056213
[263]	validation_0-mae:0.056206
[264]	validation_0-mae:0.056203
[265]	validation_0-mae:0.0562
[266]	validation_0-mae:0.056197
[267]	validation_0-mae:0.056198
[268]	validation_0-mae:0.056189
[269]	validation_0-mae:0.056175
[270]	validation_0-mae:0.056173
[271]	validation_0-mae:0.056173
[272]	validation_0-mae:0.056165
[273]	validation_0-mae:0.056165
[274]	validation_0-mae:0.056157
[275]	validation_0-mae:0.056153
[276]	validation_0-mae:0.056139
[277]	validation_0-mae:0.056133
[278]	validation_0-mae:0.05613
[279]	validation_0-mae:0.056121
[280]	validation_0-mae:0.056118
[281]	validation_0-mae:0.056115
[282]	validation_0-mae:0.056116
[283]	validation_0-mae:0.056116
[284]	validation_0-mae:0.056114
[285]	validation_0-mae:0.056109
[286]	validation_0-mae:0.056104
[287]	validation_0-mae:0.056105
[288]	validation_0-mae:0.056103
[289]	validation_0-mae:0.056097
[290]	valida

[516]	validation_0-mae:0.055663
[517]	validation_0-mae:0.055659
[518]	validation_0-mae:0.055657
[519]	validation_0-mae:0.055655
[520]	validation_0-mae:0.055653
[521]	validation_0-mae:0.055652
[522]	validation_0-mae:0.055653
[523]	validation_0-mae:0.055653
[524]	validation_0-mae:0.055645
[525]	validation_0-mae:0.055644
[526]	validation_0-mae:0.055645
[527]	validation_0-mae:0.055643
[528]	validation_0-mae:0.055637
[529]	validation_0-mae:0.055636
[530]	validation_0-mae:0.055637
[531]	validation_0-mae:0.055639
[532]	validation_0-mae:0.05564
[533]	validation_0-mae:0.05564
[534]	validation_0-mae:0.055635
[535]	validation_0-mae:0.055636
[536]	validation_0-mae:0.055634
[537]	validation_0-mae:0.05563
[538]	validation_0-mae:0.055627
[539]	validation_0-mae:0.055628
[540]	validation_0-mae:0.055627
[541]	validation_0-mae:0.055626
[542]	validation_0-mae:0.055625
[543]	validation_0-mae:0.055623
[544]	validation_0-mae:0.055616
[545]	validation_0-mae:0.055616
[546]	validation_0-mae:0.055616
[547]	valid

[773]	validation_0-mae:0.055516
[774]	validation_0-mae:0.055516
[775]	validation_0-mae:0.055517
[776]	validation_0-mae:0.055517
[777]	validation_0-mae:0.055512
[778]	validation_0-mae:0.055514
[779]	validation_0-mae:0.055514
[780]	validation_0-mae:0.055513
[781]	validation_0-mae:0.055514
[782]	validation_0-mae:0.055514
[783]	validation_0-mae:0.055514
[784]	validation_0-mae:0.055514
[785]	validation_0-mae:0.05551
[786]	validation_0-mae:0.05551
[787]	validation_0-mae:0.05551
[788]	validation_0-mae:0.055511
[789]	validation_0-mae:0.055513
[790]	validation_0-mae:0.055512
[791]	validation_0-mae:0.055513
[792]	validation_0-mae:0.055512
[793]	validation_0-mae:0.055513
[794]	validation_0-mae:0.055514
[795]	validation_0-mae:0.055514
[796]	validation_0-mae:0.055516
[797]	validation_0-mae:0.055515
[798]	validation_0-mae:0.055514
[799]	validation_0-mae:0.055513
[800]	validation_0-mae:0.055512
[801]	validation_0-mae:0.05551
[802]	validation_0-mae:0.05551
[803]	validation_0-mae:0.055501
[804]	validat

[1030]	validation_0-mae:0.055478
[1031]	validation_0-mae:0.055476
[1032]	validation_0-mae:0.055477
[1033]	validation_0-mae:0.055478
[1034]	validation_0-mae:0.05548
[1035]	validation_0-mae:0.05548
[1036]	validation_0-mae:0.05548
[1037]	validation_0-mae:0.055482
[1038]	validation_0-mae:0.055482
[1039]	validation_0-mae:0.055483
[1040]	validation_0-mae:0.055483
[1041]	validation_0-mae:0.055484
[1042]	validation_0-mae:0.055485
[1043]	validation_0-mae:0.055486
[1044]	validation_0-mae:0.055488
[1045]	validation_0-mae:0.055488
[1046]	validation_0-mae:0.055487
[1047]	validation_0-mae:0.055487
[1048]	validation_0-mae:0.055487
[1049]	validation_0-mae:0.055487
[1050]	validation_0-mae:0.055486
[1051]	validation_0-mae:0.055483
[1052]	validation_0-mae:0.055484
[1053]	validation_0-mae:0.055485
[1054]	validation_0-mae:0.055485
[1055]	validation_0-mae:0.055487
[1056]	validation_0-mae:0.055489
[1057]	validation_0-mae:0.055489
[1058]	validation_0-mae:0.055489
[1059]	validation_0-mae:0.05549
[1060]	validat