In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from plotly import tools
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import warnings
warnings.filterwarnings("ignore")
init_notebook_mode(connected=True)

from sklearn.model_selection import train_test_split, StratifiedKFold, KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import eli5
from sklearn import decomposition
import lightgbm as lgb
import xgboost as xgb

import os
from IPython.display import display_html

import json
from tqdm import tqdm_notebook

In [2]:
df_train_pickle = pd.read_pickle("./train_features_5.pkl")
df_test_pickle = pd.read_pickle("./test_features_5.pkl")

In [3]:
list(df_train_pickle.columns)

['game_time',
 'game_mode',
 'lobby_type',
 'objectives_len',
 'chat_len',
 'r1_hero_id',
 'r1_kills',
 'r1_deaths',
 'r1_assists',
 'r1_denies',
 'r1_gold',
 'r1_lh',
 'r1_xp',
 'r1_health',
 'r1_max_health',
 'r1_max_mana',
 'r1_level',
 'r1_x',
 'r1_y',
 'r1_stuns',
 'r1_creeps_stacked',
 'r1_camps_stacked',
 'r1_rune_pickups',
 'r1_firstblood_claimed',
 'r1_teamfight_participation',
 'r1_towers_killed',
 'r1_roshans_killed',
 'r1_obs_placed',
 'r1_sen_placed',
 'r1_ability_level',
 'r1_max_hero_hit',
 'r1_purchase_count',
 'r1_count_ability_use',
 'r1_damage_dealt',
 'r1_damage_received',
 'r2_hero_id',
 'r2_kills',
 'r2_deaths',
 'r2_assists',
 'r2_denies',
 'r2_gold',
 'r2_lh',
 'r2_xp',
 'r2_health',
 'r2_max_health',
 'r2_max_mana',
 'r2_level',
 'r2_x',
 'r2_y',
 'r2_stuns',
 'r2_creeps_stacked',
 'r2_camps_stacked',
 'r2_rune_pickups',
 'r2_firstblood_claimed',
 'r2_teamfight_participation',
 'r2_towers_killed',
 'r2_roshans_killed',
 'r2_obs_placed',
 'r2_sen_placed',
 'r2_a

In [7]:
df_train_1 = pd.read_pickle("./train_features_1.pkl")
df_test_1 = pd.read_pickle("./test_features_1.pkl")

In [8]:
df_train_1.head(3)

Unnamed: 0_level_0,game_time,game_mode,lobby_type,objectives_len,chat_len,r1_hero_id,r1_kills,r1_deaths,r1_assists,r1_denies,...,d5_rune_pickups,d5_firstblood_claimed,d5_teamfight_participation,d5_towers_killed,d5_roshans_killed,d5_obs_placed,d5_sen_placed,radiant_tower_kills,dire_tower_kills,diff_tower_kills
match_id_hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a400b8f29dece5f4d266f49f1ae2e98a,155,22,7,1,11,11,0,0,0,0,...,0,0,0.0,0,0,0,0,0.0,0.0,0.0
b9c57c450ce74a2af79c9ce96fac144d,658,4,0,3,10,15,7,2,0,7,...,0,0,0.0,0,0,0,0,2.0,0.0,2.0
6db558535151ea18ca70a6892197db41,21,23,0,0,0,101,0,0,0,0,...,0,0,0.0,0,0,0,0,0.0,0.0,0.0


In [10]:
np.any(np.isnan(df_train_pickle))

True

In [16]:
df_train_pickle[df_train_pickle==np.inf]=np.nan
df_train_pickle = df_train_pickle.fillna(0)

In [17]:
np.any(np.isnan(df_train_pickle))

False

In [12]:
np.any(np.isnan(df_train_1))

False

In [18]:
df_test_pickle[df_test_pickle==np.inf]=np.nan
df_test_pickle = df_test_pickle.fillna(0)

In [19]:
np.any(np.isnan(df_test_pickle))

False

In [20]:
df_train_heroes = pd.read_pickle("./train_heroes_id.pkl")
df_test_heroes = pd.read_pickle("./test_heroes_id.pkl")

In [21]:
np.any(np.isnan(df_train_heroes))

False

In [22]:
np.any(np.isnan(df_test_heroes))

False

In [24]:
X_train = df_train_pickle.copy()
X_test = df_test_pickle.copy()

In [25]:
heroes_ids = [f'{t}{i}_hero_id' for t in ['r', 'd'] for i in range(1, 6)]
heroes_ids

['r1_hero_id',
 'r2_hero_id',
 'r3_hero_id',
 'r4_hero_id',
 'r5_hero_id',
 'd1_hero_id',
 'd2_hero_id',
 'd3_hero_id',
 'd4_hero_id',
 'd5_hero_id']

In [26]:
X_train.drop(columns = heroes_ids, inplace = True)
print(np.any(np.isnan(X_train)))
X_train.head(3)

False


Unnamed: 0,game_time,game_mode,lobby_type,objectives_len,chat_len,r1_kills,r1_deaths,r1_assists,r1_denies,r1_gold,...,total_damage_received_ratio,r_std_damage_received,d_std_damage_received,std_damage_received_ratio,r_mean_damage_received,d_mean_damage_received,mean_damage_received_ratio,radiant_tower_kills,dire_tower_kills,diff_tower_kills
0,155.0,22.0,7.0,1.0,11.0,0.0,0.0,0.0,0.0,543.0,...,2.727974,375.188353,244.929582,1.531821,848.4,311.0,2.727974,0.0,0.0,0.0
1,658.0,4.0,0.0,3.0,10.0,7.0,2.0,0.0,7.0,5257.0,...,0.866879,2286.08635,2169.544929,1.053717,4240.6,4891.8,0.866879,2.0,0.0,2.0
2,21.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,176.0,...,0.971429,30.410524,31.304952,0.971429,13.6,14.0,0.971429,0.0,0.0,0.0


In [27]:
X_test.drop(columns = heroes_ids, inplace = True)
print(np.any(np.isnan(X_test)))
X_test.head(3)

False


Unnamed: 0,game_time,game_mode,lobby_type,objectives_len,chat_len,r1_kills,r1_deaths,r1_assists,r1_denies,r1_gold,...,total_damage_received_ratio,r_std_damage_received,d_std_damage_received,std_damage_received_ratio,r_mean_damage_received,d_mean_damage_received,mean_damage_received_ratio,radiant_tower_kills,dire_tower_kills,diff_tower_kills
0,23.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,115.0,...,0.390821,50.415275,194.104869,0.259732,56.2,143.8,0.390821,0.0,0.0,0.0
1,1044.0,22.0,7.0,12.0,6.0,3.0,5.0,7.0,1.0,5864.0,...,1.045873,2383.98448,2904.398389,0.820819,10328.0,9875.0,1.045873,7.0,2.0,5.0
2,1091.0,22.0,7.0,6.0,1.0,3.0,1.0,7.0,1.0,4351.0,...,0.809324,3028.941284,2068.178716,1.464545,7374.4,9111.8,0.809324,4.0,1.0,3.0


In [28]:
columns = list(X_train.columns)

In [29]:
columns

['game_time',
 'game_mode',
 'lobby_type',
 'objectives_len',
 'chat_len',
 'r1_kills',
 'r1_deaths',
 'r1_assists',
 'r1_denies',
 'r1_gold',
 'r1_lh',
 'r1_xp',
 'r1_health',
 'r1_max_health',
 'r1_max_mana',
 'r1_level',
 'r1_x',
 'r1_y',
 'r1_stuns',
 'r1_creeps_stacked',
 'r1_camps_stacked',
 'r1_rune_pickups',
 'r1_firstblood_claimed',
 'r1_teamfight_participation',
 'r1_towers_killed',
 'r1_roshans_killed',
 'r1_obs_placed',
 'r1_sen_placed',
 'r1_ability_level',
 'r1_max_hero_hit',
 'r1_purchase_count',
 'r1_count_ability_use',
 'r1_damage_dealt',
 'r1_damage_received',
 'r2_kills',
 'r2_deaths',
 'r2_assists',
 'r2_denies',
 'r2_gold',
 'r2_lh',
 'r2_xp',
 'r2_health',
 'r2_max_health',
 'r2_max_mana',
 'r2_level',
 'r2_x',
 'r2_y',
 'r2_stuns',
 'r2_creeps_stacked',
 'r2_camps_stacked',
 'r2_rune_pickups',
 'r2_firstblood_claimed',
 'r2_teamfight_participation',
 'r2_towers_killed',
 'r2_roshans_killed',
 'r2_obs_placed',
 'r2_sen_placed',
 'r2_ability_level',
 'r2_max_hero_h

In [30]:
columns.index('d5_damage_received')

294

In [31]:
columns_to_drop = columns[5:295]
columns_to_drop

['r1_kills',
 'r1_deaths',
 'r1_assists',
 'r1_denies',
 'r1_gold',
 'r1_lh',
 'r1_xp',
 'r1_health',
 'r1_max_health',
 'r1_max_mana',
 'r1_level',
 'r1_x',
 'r1_y',
 'r1_stuns',
 'r1_creeps_stacked',
 'r1_camps_stacked',
 'r1_rune_pickups',
 'r1_firstblood_claimed',
 'r1_teamfight_participation',
 'r1_towers_killed',
 'r1_roshans_killed',
 'r1_obs_placed',
 'r1_sen_placed',
 'r1_ability_level',
 'r1_max_hero_hit',
 'r1_purchase_count',
 'r1_count_ability_use',
 'r1_damage_dealt',
 'r1_damage_received',
 'r2_kills',
 'r2_deaths',
 'r2_assists',
 'r2_denies',
 'r2_gold',
 'r2_lh',
 'r2_xp',
 'r2_health',
 'r2_max_health',
 'r2_max_mana',
 'r2_level',
 'r2_x',
 'r2_y',
 'r2_stuns',
 'r2_creeps_stacked',
 'r2_camps_stacked',
 'r2_rune_pickups',
 'r2_firstblood_claimed',
 'r2_teamfight_participation',
 'r2_towers_killed',
 'r2_roshans_killed',
 'r2_obs_placed',
 'r2_sen_placed',
 'r2_ability_level',
 'r2_max_hero_hit',
 'r2_purchase_count',
 'r2_count_ability_use',
 'r2_damage_dealt',
 'r

In [36]:
list(X_train.columns)

['game_time',
 'game_mode',
 'lobby_type',
 'objectives_len',
 'chat_len',
 'r_total_kills',
 'd_total_kills',
 'total_kills_ratio',
 'r_std_kills',
 'd_std_kills',
 'std_kills_ratio',
 'r_mean_kills',
 'd_mean_kills',
 'mean_kills_ratio',
 'r_total_deaths',
 'd_total_deaths',
 'total_deaths_ratio',
 'r_std_deaths',
 'd_std_deaths',
 'std_deaths_ratio',
 'r_mean_deaths',
 'd_mean_deaths',
 'mean_deaths_ratio',
 'r_total_assists',
 'd_total_assists',
 'total_assists_ratio',
 'r_std_assists',
 'd_std_assists',
 'std_assists_ratio',
 'r_mean_assists',
 'd_mean_assists',
 'mean_assists_ratio',
 'r_total_denies',
 'd_total_denies',
 'total_denies_ratio',
 'r_std_denies',
 'd_std_denies',
 'std_denies_ratio',
 'r_mean_denies',
 'd_mean_denies',
 'mean_denies_ratio',
 'r_total_gold',
 'd_total_gold',
 'total_gold_ratio',
 'r_std_gold',
 'd_std_gold',
 'std_gold_ratio',
 'r_mean_gold',
 'd_mean_gold',
 'mean_gold_ratio',
 'r_total_lh',
 'd_total_lh',
 'total_lh_ratio',
 'r_std_lh',
 'd_std_lh'

In [37]:
print(np.any(np.isnan(X_train)))

False


In [38]:
X_test.drop(columns = columns_to_drop, inplace = True)

print(np.any(np.isnan(X_test)))

False


In [39]:
X_train['r_R^2'] = X_train.r_mean_x**2 + X_train.r_mean_y**2
X_train['d_R^2'] = X_train.d_mean_x**2 + X_train.d_mean_y**2

In [69]:
X_test['r_R^2'] = X_test.r_mean_x**2 + X_test.r_mean_y**2
X_test['d_R^2'] = X_test.d_mean_x**2 + X_test.d_mean_y**2

In [40]:
X_train['x_diff'] = np.abs(X_train.r_mean_x - X_train.d_mean_x) 
X_train['y_diff'] = np.abs(X_train.r_mean_y - X_train.d_mean_y) 

In [70]:
X_test['x_diff'] = np.abs(X_test.r_mean_x - X_test.d_mean_x) 
X_test['y_diff'] = np.abs(X_test.r_mean_y - X_test.d_mean_y) 

In [41]:
X_train.head(3)

Unnamed: 0,game_time,game_mode,lobby_type,objectives_len,chat_len,r_total_kills,d_total_kills,total_kills_ratio,r_std_kills,d_std_kills,...,r_mean_damage_received,d_mean_damage_received,mean_damage_received_ratio,radiant_tower_kills,dire_tower_kills,diff_tower_kills,r_R^2,d_R^2,x_diff,y_diff
0,155.0,22.0,7.0,1.0,11.0,0.0,1.0,0.0,0.0,0.447214,...,848.4,311.0,2.727974,0.0,0.0,0.0,28324.0,32584.0,10.8,6.4
1,658.0,4.0,0.0,3.0,10.0,16.0,3.0,5.333333,2.48998,0.547723,...,4240.6,4891.8,0.866879,2.0,0.0,2.0,33903.68,39379.36,11.6,8.8
2,21.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,13.6,14.0,0.971429,0.0,0.0,0.0,27191.84,47012.48,43.2,30.0


In [42]:
df_train_heroes.head(3)

Unnamed: 0_level_0,id001,id002,id003,id004,id005,id006,id007,id008,id009,id010,...,id107,id108,id109,id110,id111,id112,id113,id114,id119,id120
match_id_hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a400b8f29dece5f4d266f49f1ae2e98a,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
b9c57c450ce74a2af79c9ce96fac144d,-1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6db558535151ea18ca70a6892197db41,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
df_train_heroes.index = X_train.index
df_test_heroes.index = X_test.index

In [44]:
df_train_heroes.head(5)

Unnamed: 0,id001,id002,id003,id004,id005,id006,id007,id008,id009,id010,...,id107,id108,id109,id110,id111,id112,id113,id114,id119,id120
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,-1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,-1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,-1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [45]:
print(np.any(np.isnan(df_train_heroes)))

False


In [50]:
X_train_full = pd.merge(X_train, df_train_heroes, left_index = True, right_index = True)
X_train_full.head(3)
print(np.any(np.isnan(X_train_full)))

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [52]:
hero_id_columns = list(df_train_heroes.columns)
hero_id_columns

['id001',
 'id002',
 'id003',
 'id004',
 'id005',
 'id006',
 'id007',
 'id008',
 'id009',
 'id010',
 'id011',
 'id012',
 'id013',
 'id014',
 'id015',
 'id016',
 'id017',
 'id018',
 'id019',
 'id020',
 'id021',
 'id022',
 'id023',
 'id025',
 'id026',
 'id027',
 'id028',
 'id029',
 'id030',
 'id031',
 'id032',
 'id033',
 'id034',
 'id035',
 'id036',
 'id037',
 'id038',
 'id039',
 'id040',
 'id041',
 'id042',
 'id043',
 'id044',
 'id045',
 'id046',
 'id047',
 'id048',
 'id049',
 'id050',
 'id051',
 'id052',
 'id053',
 'id054',
 'id055',
 'id056',
 'id057',
 'id058',
 'id059',
 'id060',
 'id061',
 'id062',
 'id063',
 'id064',
 'id065',
 'id066',
 'id067',
 'id068',
 'id069',
 'id070',
 'id071',
 'id072',
 'id073',
 'id074',
 'id075',
 'id076',
 'id077',
 'id078',
 'id079',
 'id080',
 'id081',
 'id082',
 'id083',
 'id084',
 'id085',
 'id086',
 'id087',
 'id088',
 'id089',
 'id090',
 'id091',
 'id092',
 'id093',
 'id094',
 'id095',
 'id096',
 'id097',
 'id098',
 'id099',
 'id100',
 'id101',


In [56]:
train_columns = list(X_train.columns)

In [58]:
X_train_matrix = X_train.values
X_train_matrix

array([[1.550000e+02, 2.200000e+01, 7.000000e+00, ..., 3.258400e+04,
        1.080000e+01, 6.400000e+00],
       [6.580000e+02, 4.000000e+00, 0.000000e+00, ..., 3.937936e+04,
        1.160000e+01, 8.800000e+00],
       [2.100000e+01, 2.300000e+01, 0.000000e+00, ..., 4.701248e+04,
        4.320000e+01, 3.000000e+01],
       ...,
       [6.430000e+02, 2.200000e+01, 7.000000e+00, ..., 3.085776e+04,
        3.200000e+00, 8.000000e+00],
       [2.405000e+03, 2.200000e+01, 7.000000e+00, ..., 3.492560e+04,
        1.160000e+01, 1.400000e+01],
       [1.775000e+03, 2.200000e+01, 0.000000e+00, ..., 3.821600e+04,
        2.680000e+01, 3.800000e+01]])

In [59]:
heroes_train_matrix = df_train_heroes.values
heroes_train_matrix

array([[ 0,  0,  0, ...,  0,  0,  0],
       [-1,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       ...,
       [ 0,  0,  0, ...,  0,  0, -1],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]], dtype=int64)

In [61]:
X_full_matrix = np.concatenate((X_train_matrix,heroes_train_matrix),axis=1)
X_full_matrix.shape

(39675, 388)

In [62]:
print(np.any(np.isnan(X_full_matrix)))

False


In [63]:
all_columns = train_columns + hero_id_columns

In [66]:
X_train_full = pd.DataFrame(data = X_full_matrix, columns = all_columns)
X_train_full.head(3)

Unnamed: 0,game_time,game_mode,lobby_type,objectives_len,chat_len,r_total_kills,d_total_kills,total_kills_ratio,r_std_kills,d_std_kills,...,id107,id108,id109,id110,id111,id112,id113,id114,id119,id120
0,155.0,22.0,7.0,1.0,11.0,0.0,1.0,0.0,0.0,0.447214,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,658.0,4.0,0.0,3.0,10.0,16.0,3.0,5.333333,2.48998,0.547723,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,21.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
print(np.any(np.isnan(X_train_full)))

False


In [71]:
X_test_matrix = X_test.values
heroes_test_matrix = df_test_heroes.values
X_full_test_matrix = np.concatenate((X_test_matrix, heroes_test_matrix),axis=1)
X_test_full = pd.DataFrame(data = X_full_test_matrix, columns = all_columns)
X_test_full.head(3)
print(np.any(np.isnan(X_test_full)))

False


In [73]:
print(np.any(np.isinf(X_train_full)))

False


In [74]:
pd.to_pickle(X_train_full, "./train_features_7.pkl")
pd.to_pickle(X_test_full, "./test_features_7.pkl")

In [75]:
X_train_full_cat = X_train_full.copy()
X_test_full_cat = X_test_full.copy()

In [77]:
X_train_full_cat[hero_id_columns].astype('category')
X_test_full_cat[hero_id_columns].astype('category');

In [78]:
print(np.any(np.isinf(X_train_full_cat)))

False


In [79]:
pd.to_pickle(X_train_full_cat, "./train_features_7_cat.pkl")
pd.to_pickle(X_test_full_cat, "./test_features_7_cat.pkl")