In [1]:
import pandas as pd
from config.constants import EXPORTS_DIR
from converters.data2frames import clean_export_dataframe

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier

In [3]:
from sklearn.model_selection import cross_val_score

In [5]:
soloq_df = clean_export_dataframe(pd.read_excel(EXPORTS_DIR + 'patch_8-18_dataset.xlsx'))

In [38]:
soloq_drop_cols = [ 'account_id', 'account_type', 'id', 'substitute']

In [40]:
soloq_df.drop(soloq_drop_cols, axis=1, inplace=True)

In [6]:
slo_df = pd.read_excel(EXPORTS_DIR + 'slo_dataset.xlsx')

In [7]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

In [41]:
newdf = slo_df.select_dtypes(include=numerics)
newdf2 = soloq_df.select_dtypes(include=numerics)

In [42]:
perkp_df = slo_df.perkPrimaryStyle_name.str.get_dummies()
perks_df = slo_df.perkSubStyle_name.str.get_dummies()

perkp_df2 = soloq_df.perkPrimaryStyle_name.str.get_dummies()
perks_df2 = soloq_df.perkSubStyle_name.str.get_dummies()

In [43]:
role_dum = slo_df.role.str.get_dummies()
lane_dum = slo_df.lane.str.get_dummies()

role_dum2 = soloq_df.role.str.get_dummies()
lane_dum2 = soloq_df.lane.str.get_dummies()

In [44]:
champ_df = slo_df.champ_name.str.get_dummies()

champ_df2 = soloq_df.champ_name.str.get_dummies()

In [45]:
all_num_df = pd.concat([newdf, perkp_df, perks_df, champ_df, role_dum, lane_dum], axis=1)

all_num_df2 = pd.concat([newdf2, perkp_df2, perks_df2, champ_df2, role_dum2, lane_dum2], axis=1)

## Fill NA values 

In [47]:
fill_0_cols = all_num_df.T.ix[all_num_df.isnull().any().tolist()].T.columns.tolist()

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.


In [48]:
all_num_df[fill_0_cols] = all_num_df[fill_0_cols].fillna(0).astype(int)

In [49]:
all_num_df2[fill_0_cols] = all_num_df2[fill_0_cols].fillna(0).astype(int)

In [76]:
fill_mean_cols = [col for col in all_num_df2.columns if 'tt' in col]

In [78]:
all_num_df2[fill_mean_cols] = all_num_df2[fill_mean_cols].fillna(all_num_df2[fill_mean_cols].mean())

In [80]:
fill_0_cols_2 = all_num_df2.T.ix[all_num_df2.isnull().any().tolist()].T.columns.tolist()

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.


In [81]:
all_num_df2[fill_0_cols_2] = all_num_df2[fill_0_cols_2].fillna(0)

## Fill missing cols 

In [51]:
missing_cols = set(all_num_df2.columns) - set(all_num_df.columns)

In [57]:
all_num_df = all_num_df.assign(**{col: 0 for col in missing_cols})

In [84]:
all_num_df.shape[1], all_num_df2.shape[1]

(271, 271)

##  Predict

In [83]:
trainers = all_num_df
predictors = all_num_df2

In [85]:
labels = slo_df.position

In [86]:
model1 = DecisionTreeClassifier()
model2 = BernoulliNB()
model3 = MLPClassifier()
model4 = SVC()
model5 = BaggingClassifier()
model6 = GradientBoostingClassifier() # 99%
model7 = RandomForestClassifier() # 

In [87]:
models = [model1, model2, model3, model4, model5, model6, model7]

In [88]:
models_results = pd.concat([pd.DataFrame(cross_val_score(estimator=model, cv=5, X=trainers, y=labels)).T for model in models])

In [89]:
models_results['model'] = ['DecisionTreeClassifier', 'BernoulliNB', 'MLPClassifier', 'SVC', 'BaggingClassifier', 'GradientBoostingClassifier', 'RandomForestClassifier']

In [90]:
models_results['mean'] = models_results[[0, 1, 2, 3, 4]].T.mean()

In [91]:
models_results['std'] = models_results[[0, 1, 2, 3, 4]].T.std()

In [92]:
models_results.reset_index(drop=True)

Unnamed: 0,0,1,2,3,4,model,mean,std
0,0.965,0.9425,0.9675,0.965,0.965,DecisionTreeClassifier,0.961,0.010398
1,0.95,0.945,0.95,0.935,0.95,BernoulliNB,0.946,0.006519
2,0.2675,0.455,0.3825,0.46,0.29,MLPClassifier,0.371,0.089976
3,0.2,0.2,0.2,0.2,0.2,SVC,0.2,0.0
4,0.9625,0.955,0.9775,0.9625,0.96,BaggingClassifier,0.9635,0.008404
5,0.9875,0.9975,0.9975,1.0,0.9925,GradientBoostingClassifier,0.995,0.005
6,0.965,0.965,0.98,0.9675,0.955,RandomForestClassifier,0.9665,0.008944


In [93]:
model6.fit(X=trainers, y=labels)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_split=1e-07, min_samples_leaf=1,
              min_samples_split=2, min_weight_fraction_leaf=0.0,
              n_estimators=100, presort='auto', random_state=None,
              subsample=1.0, verbose=0, warm_start=False)

In [94]:
predicts = model6.predict(X=predictors)

In [95]:
soloq_df['position'] = predicts

In [96]:
soloq_df.position.unique()

array(['JUNG', 'TOP', 'MID', 'ADC', 'SUPP'], dtype=object)

In [98]:
soloq_df[['gameId', 'summonerName', 'champ_name', 'lane', 'role', 'position']]

Unnamed: 0,gameId,summonerName,champ_name,lane,role,position
0,3770407781,D1 WARRIOR,Kog'Maw,MIDDLE,DUO_SUPPORT,JUNG
1,3770407781,Kyrîel,Braum,MIDDLE,DUO_SUPPORT,TOP
2,3770407781,ChosenØne,Graves,JUNGLE,NONE,TOP
3,3770407781,Decak,Cho'Gath,MIDDLE,DUO_SUPPORT,JUNG
4,3770407781,MidLurk3r,Cassiopeia,MIDDLE,DUO,TOP
5,3770407781,ordno,Galio,MIDDLE,SOLO,TOP
6,3770407781,Archie2b,Gragas,JUNGLE,NONE,TOP
7,3770407781,ºˆºˆºˆºˆºˆºˆºˆº,Alistar,BOTTOM,DUO_SUPPORT,TOP
8,3770407781,Nawada,Kai'Sa,BOTTOM,DUO_CARRY,TOP
9,3770407781,DXZOREN,Illaoi,TOP,SOLO,TOP


# Predictions with only champion, role and lane info 