In [None]:
import pandas as pd
import numpy as np
from dataset_functions import *
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV,StratifiedKFold
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# K-Nearest Neighbors

In [None]:
df, target = get_dataset()
win = target['radiant_win']

Filtering Df:  (game_mode == 2 or game_mode == 22) and game_time > 0 

Dropped:  ['lobby_type', 'chat_len', 'game_mode', 'match_id_hash'] 

Dataframe Shape:  (32153, 242) 

Target shape: (32153, 6)


In [None]:
params = {
    'n_neighbors': np.arange(1,np.ceil(np.sqrt(df.shape[0])).astype(int),2),
}

model = KNeighborsClassifier()

scorings = ["roc_auc","accuracy","recall","precision","f1"]
best_model = GridSearchCV(estimator=model,param_grid=params,scoring=scorings,refit="roc_auc",cv=StratifiedKFold(n_splits=5,shuffle=True),return_train_score=True,verbose=1)

In [None]:
def pca_tranform(df:pd.DataFrame, variance_ratio:float):

    scaler = StandardScaler()

    pca = PCA(n_components=variance_ratio,random_state=42)

    df_reduced = pca.fit_transform(scaler.fit_transform(df))

    return df_reduced

In [None]:
df_tt = teamstats_teamheros_transform(df.copy())
df_mp = team_mean_position_transform(df_tt.copy())
df_mp_selected = feature_selection_transform(df_mp,win,0.01)

df_mp_selected_pca_95 = pca_tranform(df_mp_selected,0.95)
df_mp_selected_pca_99 = pca_tranform(df_mp_selected,0.99)

print('PCA 0.95 shape: ',df_mp_selected_pca_95.shape)
print('PCA 0.99 shape: ',df_mp_selected_pca_99.shape)

Hero Id Labels: ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id'] 

Numbers of Heros:  115 

NaN Count:  0 

Single Player Labels: ['r1_kills', 'r1_deaths', 'r1_assists', 'r1_denies', 'r1_gold', 'r1_lh', 'r1_xp', 'r1_health', 'r1_max_health', 'r1_max_mana', 'r1_level', 'r1_x', 'r1_y', 'r1_stuns', 'r1_creeps_stacked', 'r1_camps_stacked', 'r1_rune_pickups', 'r1_firstblood_claimed', 'r1_teamfight_participation', 'r1_towers_killed', 'r1_roshans_killed', 'r1_obs_placed', 'r1_sen_placed', 'r2_kills', 'r2_deaths', 'r2_assists', 'r2_denies', 'r2_gold', 'r2_lh', 'r2_xp', 'r2_health', 'r2_max_health', 'r2_max_mana', 'r2_level', 'r2_x', 'r2_y', 'r2_stuns', 'r2_creeps_stacked', 'r2_camps_stacked', 'r2_rune_pickups', 'r2_firstblood_claimed', 'r2_teamfight_participation', 'r2_towers_killed', 'r2_roshans_killed', 'r2_obs_placed', 'r2_sen_placed', 'r3_kills', 'r3_deaths', 'r3_assists', 'r3_denies', 'r3_gold', 'r

In [None]:
df_tt = teamstats_teamheros_transform(df.copy())
df_wmp = team_weighted_mean_position_transform(df_tt.copy())
df_wmp_selected = feature_selection_transform(df_wmp,win,0.01)

df_wmp_selected_pca_95 = pca_tranform(df_wmp_selected,0.95)
df_wmp_selected_pca_99 = pca_tranform(df_wmp_selected,0.99)

Hero Id Labels: ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id'] 

Numbers of Heros:  115 

NaN Count:  0 

Single Player Labels: ['r1_kills', 'r1_deaths', 'r1_assists', 'r1_denies', 'r1_gold', 'r1_lh', 'r1_xp', 'r1_health', 'r1_max_health', 'r1_max_mana', 'r1_level', 'r1_x', 'r1_y', 'r1_stuns', 'r1_creeps_stacked', 'r1_camps_stacked', 'r1_rune_pickups', 'r1_firstblood_claimed', 'r1_teamfight_participation', 'r1_towers_killed', 'r1_roshans_killed', 'r1_obs_placed', 'r1_sen_placed', 'r2_kills', 'r2_deaths', 'r2_assists', 'r2_denies', 'r2_gold', 'r2_lh', 'r2_xp', 'r2_health', 'r2_max_health', 'r2_max_mana', 'r2_level', 'r2_x', 'r2_y', 'r2_stuns', 'r2_creeps_stacked', 'r2_camps_stacked', 'r2_rune_pickups', 'r2_firstblood_claimed', 'r2_teamfight_participation', 'r2_towers_killed', 'r2_roshans_killed', 'r2_obs_placed', 'r2_sen_placed', 'r3_kills', 'r3_deaths', 'r3_assists', 'r3_denies', 'r3_gold', 'r

In [None]:
print('PCA 0.95 shape: ',df_wmp_selected_pca_95.shape)
print('PCA 0.99 shape: ',df_wmp_selected_pca_99.shape)

PCA 0.95 shape:  (32153, 11)
PCA 0.99 shape:  (32153, 18)


# Mean Position Dataset with 0.95 Variance

In [None]:
#first normalize
#df_mp_selected_pca_95 = StandardScaler().fit_transform(df_mp_selected_pca_95)
df_mp_selected_pca_95 = MinMaxScaler().fit_transform(df_mp_selected_pca_95)
best_model.fit(df_mp_selected_pca_95,win)

Fitting 5 folds for each of 90 candidates, totalling 450 fits


In [None]:
best_model.cv_results_

{'mean_fit_time': array([0.06739264, 0.04364471, 0.0493701 , 0.05712814, 0.04343157,
        0.05097003, 0.0499846 , 0.04976292, 0.04907274, 0.04965434,
        0.04332209, 0.04432154, 0.04385552, 0.04668465, 0.0589643 ,
        0.05087137, 0.04905634, 0.04456   , 0.04381189, 0.04468875,
        0.05304828, 0.05045166, 0.04389052, 0.04377007, 0.04959722,
        0.04461145, 0.04870586, 0.05148687, 0.05771837, 0.04908032,
        0.04418292, 0.04346023, 0.04928546, 0.04916544, 0.0484046 ,
        0.04317107, 0.04939313, 0.04916801, 0.04951224, 0.04998794,
        0.05057693, 0.04809508, 0.04822526, 0.04777641, 0.04789238,
        0.04857535, 0.04370108, 0.04905934, 0.05033283, 0.04410844,
        0.04800048, 0.0505456 , 0.05447512, 0.04853067, 0.04886012,
        0.04914083, 0.0482676 , 0.04802017, 0.04886842, 0.04888158,
        0.04848132, 0.04566321, 0.04393978, 0.05495195, 0.04415927,
        0.05444646, 0.04840794, 0.0436008 , 0.04764414, 0.04282761,
        0.04294643, 0.0584096 ,

In [None]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:

    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'n_neighbors': 169} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.788106,0.795585
1,accuracy,0.704413,0.710735
2,recall,0.789781,0.796743
3,precision,0.692647,0.697534
4,f1,0.738003,0.74384


# Mean Position Dataset with 0.99 Variance

In [None]:
#first normalize
df_mp_selected_pca_99 = MinMaxScaler().fit_transform(df_mp_selected_pca_99)
best_model.fit(df_mp_selected_pca_99,win)

Fitting 5 folds for each of 90 candidates, totalling 450 fits


In [None]:
best_model.cv_results_

{'mean_fit_time': array([0.00515833, 0.00481005, 0.00525184, 0.00545931, 0.00463209,
        0.00497999, 0.00514841, 0.00472999, 0.0062448 , 0.00476441,
        0.00551825, 0.00468888, 0.00570478, 0.00473356, 0.00497165,
        0.00551181, 0.00485659, 0.00465565, 0.00578036, 0.00506744,
        0.00486717, 0.0047709 , 0.00476198, 0.00474153, 0.00588841,
        0.006354  , 0.00717196, 0.00682306, 0.00640559, 0.00553555,
        0.00463281, 0.00465217, 0.00460935, 0.0046463 , 0.00592804,
        0.00503731, 0.00480208, 0.00633855, 0.00560203, 0.00474734,
        0.00540676, 0.00571828, 0.00533051, 0.00526166, 0.00522246,
        0.00520477, 0.00553603, 0.00521994, 0.00484238, 0.00508509,
        0.00531621, 0.00531526, 0.00540619, 0.00533867, 0.00523438,
        0.00525012, 0.00514431, 0.0052917 , 0.00531378, 0.00484271,
        0.00561695, 0.00477443, 0.00515847, 0.00528674, 0.00564642,
        0.00508614, 0.00488067, 0.00515018, 0.0051187 , 0.00483556,
        0.00525527, 0.00515895,

In [None]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:

    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'n_neighbors': 163} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.789683,0.797052
1,accuracy,0.704164,0.711162
2,recall,0.792967,0.799295
3,precision,0.691263,0.697174
4,f1,0.738622,0.74473


# Weighted Mean Position Dataset with 0.95 Variance

In [None]:
#first normalize
df_wmp_selected_pca_95 = MinMaxScaler().fit_transform(df_wmp_selected_pca_95)
best_model.fit(df_wmp_selected_pca_95,win)

Fitting 5 folds for each of 90 candidates, totalling 450 fits


In [None]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:

    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'n_neighbors': 179} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.787346,0.795258
1,accuracy,0.704164,0.710517
2,recall,0.798041,0.803425
3,precision,0.689615,0.694998
4,f1,0.739869,0.745285


# Weighted Mean Position Dataset with 0.99 Variance

In [None]:
#first normalize
df_wmp_selected_pca_99 = MinMaxScaler().fit_transform(df_wmp_selected_pca_99)
best_model.fit(df_wmp_selected_pca_99,win)

Fitting 5 folds for each of 90 candidates, totalling 450 fits


In [None]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:

    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'n_neighbors': 163} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.78725,0.7954
1,accuracy,0.704476,0.710548
2,recall,0.7981,0.803572
3,precision,0.689929,0.694993
4,f1,0.74007,0.745342
