In [20]:
import pandas as pd
import numpy as np
from dataset_functions import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV,StratifiedKFold

# Random Forest

In [75]:
params = {
    'n_estimators': [50, 100, 150], #[100,200,250,300]
    'class_weight': ['balanced'],
    'criterion': ['entropy','gini'],
    'random_state': [42],
    'max_depth': [5,10], #[None,10,20,30]
    'min_samples_leaf': [1,2,5],
}

model = RandomForestClassifier() 

In [22]:
df, target = get_dataset()
win = target['radiant_win']

Filtering Df:  (game_mode == 2 or game_mode == 22) and game_time > 0 

Dropped:  ['lobby_type', 'chat_len', 'game_mode', 'match_id_hash'] 

Dataframe Shape:  (32153, 242) 

Target shape: (32153, 6)


In [23]:
df_tt = teamstats_teamheros_transform(df.copy())
df_tt = feature_selection_transform(df_tt,win,0.01)

Hero Id Labels: ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id'] 

Numbers of Heros:  115 

NaN Count:  0 

Single Player Labels: ['r1_kills', 'r1_deaths', 'r1_assists', 'r1_denies', 'r1_gold', 'r1_lh', 'r1_xp', 'r1_health', 'r1_max_health', 'r1_max_mana', 'r1_level', 'r1_x', 'r1_y', 'r1_stuns', 'r1_creeps_stacked', 'r1_camps_stacked', 'r1_rune_pickups', 'r1_firstblood_claimed', 'r1_teamfight_participation', 'r1_towers_killed', 'r1_roshans_killed', 'r1_obs_placed', 'r1_sen_placed', 'r2_kills', 'r2_deaths', 'r2_assists', 'r2_denies', 'r2_gold', 'r2_lh', 'r2_xp', 'r2_health', 'r2_max_health', 'r2_max_mana', 'r2_level', 'r2_x', 'r2_y', 'r2_stuns', 'r2_creeps_stacked', 'r2_camps_stacked', 'r2_rune_pickups', 'r2_firstblood_claimed', 'r2_teamfight_participation', 'r2_towers_killed', 'r2_roshans_killed', 'r2_obs_placed', 'r2_sen_placed', 'r3_kills', 'r3_deaths', 'r3_assists', 'r3_denies', 'r3_gold', 'r

In [76]:
scorings = ["roc_auc","accuracy","recall","precision","f1"]
best_model = GridSearchCV(estimator=model,param_grid=params,scoring=scorings,refit="roc_auc",cv=StratifiedKFold(n_splits=5,shuffle=True), return_train_score=True,verbose=1,n_jobs=-1)

best_model.fit(df_tt,win)

Fitting 5 folds for each of 36 candidates, totalling 180 fits


In [77]:
best_model.cv_results_

{'mean_fit_time': array([ 2.44647288,  5.68972254,  8.4864717 ,  2.68554611,  5.21862044,
         8.32209654,  3.21418319,  5.00444789,  6.97545557,  4.80961952,
         9.68719006, 14.73885841,  4.65248623,  9.37137623, 14.96166129,
         4.81041265,  9.5212286 , 14.38967347,  1.91039596,  4.05768166,
         6.0280942 ,  2.0772511 ,  3.99111876,  6.11628318,  2.32505269,
         4.23118291,  5.87145629,  4.34942975,  7.84851551, 12.08416371,
         3.90610442,  8.08629885, 12.00192666,  4.0029232 ,  8.60749564,
        11.44273453]),
 'std_fit_time': array([0.35142534, 0.34530075, 0.96137763, 0.51809951, 0.30583193,
        0.54166412, 0.8594713 , 0.86972726, 0.58670979, 0.64215525,
        0.65900069, 1.15822424, 0.30562178, 0.56217476, 0.76545793,
        0.4269425 , 0.87794787, 0.82494365, 0.17439275, 0.36948825,
        0.41507998, 0.25448133, 0.34937319, 0.26223855, 0.17452923,
        0.42518875, 0.60268177, 0.53440172, 0.43997073, 0.92464724,
        0.33930567, 0.406

In [78]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:
    
    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results 

Best Parameters:
 {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 5, 'n_estimators': 150, 'random_state': 42} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.800248,0.907278
1,accuracy,0.716139,0.819527
2,recall,0.764824,0.858413
3,precision,0.716056,0.810464
4,f1,0.739589,0.833743


In [79]:
depths = [tree.get_depth() for tree in best_model.best_estimator_.estimators_]
print(np.max(depths))
print(np.min(depths))
print(np.mean(depths))

10
10
10.0


In [80]:
feature_importance = {
    name: value 
    for name,value in zip(best_model.best_estimator_.feature_names_in_,best_model.best_estimator_.feature_importances_)
}

feature_importance = dict(reversed(sorted(feature_importance.items(), key=lambda item: item[1])))
feature_names = list(feature_importance.keys())
print("Most Important:\n", feature_names[:10])
print("Least Important:\n", feature_names[-10:])


Most Important:
 ['d_kills', 'd_towers_killed', 'r_towers_killed', 'r_kills', 'd_deaths', 'r_deaths', 'd_gold', 'r_gold', 'd_assists', 'r_rune_pickups']
Least Important:
 ['d5_y', 'r_health', 'd5_x', 'd3_x', 'd4_y', 'd1_y', 'd_level', 'd_max_mana', 'r_level', 'game_time']
