In [1]:
import pandas as pd
import warnings
import numpy as np
import re
from sklearn.ensemble import RandomForestClassifier

seed = 10

def get_dataset():

    featureset_path = "train_features.csv"
    targetset_path = "train_targets.csv"

    df = pd.read_csv(featureset_path)
    target = pd.read_csv(targetset_path)

    #print("Features: ",df.columns,"\n")
    #print("Target Columns: ",target.columns,"\n")

    column_to_drop = ["lobby_type","chat_len","game_mode","match_id_hash"] # "match_id_hash","objectives_len"

    filter = "(game_mode == 2 or game_mode == 22) and game_time > 0" # 2 standard ranked or 22 captain mode

    df = df.query(filter)

    df = df.drop(labels=column_to_drop,axis=1)

    tf_toreplace = ["r1_teamfight_participation",
                    "r2_teamfight_participation",
                    "r3_teamfight_participation",
                    "r4_teamfight_participation",
                    "r5_teamfight_participation",
                    "d1_teamfight_participation",
                    "d2_teamfight_participation",
                    "d3_teamfight_participation",
                    "d4_teamfight_participation",
                    "d5_teamfight_participation"]

    for label in tf_toreplace:
        df.loc[df[label] > 1.0, label] = 1


    print("Filtering Df: ", filter, "\n")

    print("Dropped: ",column_to_drop,"\n")

    print("Dataframe Shape: ",df.shape,"\n")

    target = target.loc[df.index]
    print(f"Target shape: {target.shape}")
    return df,target


def get_hero_id_labels(df: pd.DataFrame) -> list[str]:
    hero_id_labels = [s for s in df.columns if s.endswith('_hero_id')]
    print("Hero Id Labels:",hero_id_labels,"\n")
    return hero_id_labels

def get_single_hero_labels(df: pd.DataFrame) -> list[str]:
    single_hero_labels = [s for s in df.columns if re.match(r"^(d|r)\d",s)]
    print("Single Player Labels:",single_hero_labels,"\n")
    return single_hero_labels

def drop_heros_labels(df:pd.DataFrame) -> pd.DataFrame:
    hero_id_labels = get_hero_id_labels(df)
    if (len(hero_id_labels) == 0):
        for label in df.columns:
            if re.match(r"^(d|r)_\d+$", label):  #regex: r_1 d_2 r_124 etc...
                df = df.drop(label,axis=1)
            elif re.match(r"^(d|r)\d_heroid\d+$",label):      #regex: r1_hero_id_12 d3_hero_id_101 ecc..
                df = df.drop(label,axis=1)
    else:
        df = df.drop(labels=hero_id_labels,axis=1)

    print("Dropped Dataframe Shape:",df.shape)

    return df


def playerstats_playerheros_transform(df: pd.DataFrame):

    features_toonehot = ["r1_hero_id",
                         "r2_hero_id",
                         "r3_hero_id",
                         "r4_hero_id",
                         "r5_hero_id",
                         "d1_hero_id",
                         "d2_hero_id",
                         "d3_hero_id",
                         "d4_hero_id",
                         "d5_hero_id"]
    df = pd.get_dummies(df,columns=features_toonehot)

    #target = target.loc[df.index]
    #print(target.shape)
    #df = df.drop('match_id_hash',axis=1)

    return df

def playerstats_teamheros_transform(df: pd.DataFrame):
    warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
    hero_id_labels = get_hero_id_labels(df)

    hero_id_set = {i: set() for i in range(len(hero_id_labels))}

    hero_id_set_tot = set()

    for n,label in enumerate(hero_id_labels):
        for id in df[label]:
            hero_id_set[n].add(id)

    for i in range(len(hero_id_labels)):
        #print(f"{i}. {len(hero_id_set[i])}")
        hero_id_set_tot = hero_id_set_tot.union(hero_id_set[i])

    print("Numbers of Heros: ",len(hero_id_set_tot),"\n")

    for hero_id in hero_id_set_tot:
        df[f"r_{hero_id}"] = 0
        df[f"r_{hero_id}"] = (
            (df["r1_hero_id"] == hero_id) |
            (df["r2_hero_id"] == hero_id) |
            (df["r3_hero_id"] == hero_id) |
            (df["r4_hero_id"] == hero_id) |
            (df["r5_hero_id"] == hero_id)
        ).astype(int)
        df[f"d_{hero_id}"] = (
            (df["d1_hero_id"] == hero_id) |
            (df["d2_hero_id"] == hero_id) |
            (df["d3_hero_id"] == hero_id) |
            (df["d4_hero_id"] == hero_id) |
            (df["d5_hero_id"] == hero_id)
        ).astype(int)

    df = df.drop(labels=hero_id_labels,axis=1) #removed ri_hero_id and di_hero_id

    print("Dataframe Shape:",df.shape,"\n")

    #print(df.iloc[0]["match_id_hash"])
    #print(df.iloc[0][df.iloc[0] == 1][-11:])

    print("NaN Count: ",pd.isna(df).sum().sum(),"\n")

    df = df.copy()

    """ i = 0
    for v in df['d_32']:
        if v == 1:
            i += 1
    print(f"Total: {i}") """

    #target = target.loc[df.index]
    #print(target.shape)


    return df

def teamstats_teamheros_transform(df: pd.DataFrame):
    #we handle PerformanceWarning by doing the copy of the dataframe, this ignore is for quality of outputs
    warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
    hero_id_labels = get_hero_id_labels(df)

    hero_id_set = {i: set() for i in range(len(hero_id_labels))}

    hero_id_set_tot = set()

    for n,label in enumerate(hero_id_labels):
        for id in df[label]:
            hero_id_set[n].add(id)

    for i in range(len(hero_id_labels)):
        #print(f"{i}. {len(hero_id_set[i])}")
        hero_id_set_tot = hero_id_set_tot.union(hero_id_set[i])

    print("Numbers of Heros: ",len(hero_id_set_tot),"\n")

    for hero_id in hero_id_set_tot:
        df[f"r_{hero_id}"] = (
            (df["r1_hero_id"] == hero_id) |
            (df["r2_hero_id"] == hero_id) |
            (df["r3_hero_id"] == hero_id) |
            (df["r4_hero_id"] == hero_id) |
            (df["r5_hero_id"] == hero_id)
        ).astype(int)
        df[f"d_{hero_id}"] = (
            (df["d1_hero_id"] == hero_id) |
            (df["d2_hero_id"] == hero_id) |
            (df["d3_hero_id"] == hero_id) |
            (df["d4_hero_id"] == hero_id) |
            (df["d5_hero_id"] == hero_id)
        ).astype(int)

    df = df.drop(labels=hero_id_labels,axis=1) #removed ri_hero_id and di_hero_id

    #print("Dataframe Shape:",df.shape,"\n")

    #print(df.iloc[0]["match_id_hash"])
    #print(df.iloc[0][df.iloc[0] == 1][-11:])

    print("NaN Count: ",pd.isna(df).sum().sum(),"\n")

    df = df.copy()

    """ i = 0
    for v in df['d_32']:
        if v == 1:
            i += 1
    print(f"Total: {i}") """

    single_hero_labels = get_single_hero_labels(df)
    single_hero_labels2 = single_hero_labels.copy()
    for label in single_hero_labels:
        if re.match(r".*(_x|_y)$",label):
            single_hero_labels2.remove(label)
            continue
        new_label = label[0]+label[2:] #r1_gold -> r_gold
        if not (new_label in df.columns):
            df[new_label] = df[label]
        else:
            df[new_label] += df[label]
    single_hero_labels = single_hero_labels2.copy()
    df = df.drop(labels=single_hero_labels,axis=1).copy()
    #print("New Dataframe Colums:",df.columns,"\n")
    print("New Dataframe Shape:",df.shape,"\n")

    #print(df.query("d_firstblood_claimed == 0 and r_firstblood_claimed == 0").shape)

    #for label in df.columns:
    #    if re.match(r"^(d|r)_\d*$",label): #regex to drop all d_numbers to drop heroes
    #        df = df.drop(label,axis=1)
    #print(df.shape)

    #target = target.loc[df.index]
    #print(target.shape)


    return df

def team_mean_position_transform (df: pd.DataFrame):
    labels_radiant_x = ["r1_x", "r2_x", "r3_x", "r4_x", "r5_x"]
    labels_radiant_y = ["r1_y", "r2_y", "r3_y", "r4_y", "r5_y"]
    labels_dire_x = ["d1_x", "d2_x", "d3_x", "d4_x", "d5_x"]
    labels_dire_y = ["d1_y", "d2_y", "d3_y", "d4_y", "d5_y"]

    #calculate average x and y for Radiant team
    df['radiant_avg_x'] = df[labels_radiant_x].mean(axis=1)
    df['radiant_avg_y'] = df[labels_radiant_y].mean(axis=1)

    #calculate average x and y for Dire team
    df['dire_avg_x'] = df[labels_dire_x].mean(axis=1)
    df['dire_avg_y'] = df[labels_dire_y].mean(axis=1)

    #drop the original x and y columns
    df = df.drop(labels=labels_radiant_x + labels_radiant_y + labels_dire_x + labels_dire_y, axis=1)

    return df

def team_weighted_mean_position_transform(df: pd.DataFrame):

    labels_radiant_x = ["r1_x", "r2_x", "r3_x", "r4_x", "r5_x"]
    labels_radiant_y = ["r1_y", "r2_y", "r3_y", "r4_y", "r5_y"]
    labels_dire_x = ["d1_x", "d2_x", "d3_x", "d4_x", "d5_x"]
    labels_dire_y = ["d1_y", "d2_y", "d3_y", "d4_y", "d5_y"]

    df_Weighted = df.copy(deep=True)
    df_Weighted  = get_average_distances(df_Weighted)

    distances_radiant = ["distance_r1", "distance_r2", "distance_r3", "distance_r4", "distance_r5"]
    distances_dire = ["distance_d1", "distance_d2", "distance_d3", "distance_d4", "distance_d5"]

    # Replace zero distances with 1 to avoid division by zero, only relevant for specific case of all players in the same position
    df_Weighted[distances_radiant] = df_Weighted[distances_radiant].replace(0, 1)
    df_Weighted[distances_dire] = df_Weighted[distances_dire].replace(0, 1)

    #calculate weights as the inverse of distances
    weights_radiant = 1 / df_Weighted[distances_radiant]
    weights_dire = 1 / df_Weighted[distances_dire]

    df['radiant_Weighted_avg_x'] = 0
    df['radiant_Weighted_avg_y'] = 0
    df['dire_Weighted_avg_x'] = 0
    df['dire_Weighted_avg_y'] = 0
    for i in range(5):
        #calculate weighted average x and y for Radiant team
        df['radiant_Weighted_avg_x'] += df[labels_radiant_x[i]] * weights_radiant.iloc[:, i]
        df['radiant_Weighted_avg_y'] += df[labels_radiant_y[i]] * weights_radiant.iloc[:, i]

        #calculate weighted average x and y for Dire team
        df['dire_Weighted_avg_x'] += df[labels_dire_x[i]] * weights_dire.iloc[:, i]
        df['dire_Weighted_avg_y'] += df[labels_dire_y[i]] * weights_dire.iloc[:, i]

    #normalize by the sum of weights
    df['radiant_Weighted_avg_x'] /= weights_radiant.sum(axis=1)
    df['radiant_Weighted_avg_y'] /= weights_radiant.sum(axis=1)
    df['dire_Weighted_avg_x'] /= weights_dire.sum(axis=1)
    df['dire_Weighted_avg_y'] /= weights_dire.sum(axis=1)

    #drop the original x and y columns
    df = df.drop(labels=labels_radiant_x + labels_radiant_y + labels_dire_x + labels_dire_y, axis=1)
    return df

#this is ok, tested
def get_average_distances(df: pd.DataFrame):
    labels_radiant_x = ["r1_x", "r2_x", "r3_x", "r4_x", "r5_x"]
    labels_radiant_y = ["r1_y", "r2_y", "r3_y", "r4_y", "r5_y"]
    labels_dire_x = ["d1_x", "d2_x", "d3_x", "d4_x", "d5_x"]
    labels_dire_y = ["d1_y", "d2_y", "d3_y", "d4_y", "d5_y"]

    radiant_distances = calculate_distances(df, labels_radiant_x, labels_radiant_y)
    dire_distances = calculate_distances(df, labels_dire_x, labels_dire_y)

    for label in radiant_distances:
        truncated_label = label[:-2]
        df[f'distance_{truncated_label}'] = radiant_distances[label]

    for label in dire_distances:
        truncated_label = label[:-2]
        df[f'distance_{truncated_label}'] = dire_distances[label]

    return df
#this is ok, tested
def calculate_distances(df: pd.DataFrame, x_labels, y_labels):
    distances = {label: [] for label in x_labels}
    for i in range(len(x_labels)):
        for j in range(len(x_labels)):
            if i != j:
                dist = np.sqrt((df[x_labels[i]] - df[x_labels[j]])**2 + (df[y_labels[i]] - df[y_labels[j]])**2)
                distances[x_labels[i]].append(dist)
    return {label: np.mean(distances[label], axis=0) for label in distances}


def feature_selection_transform(df: pd.DataFrame,target: pd.DataFrame, threshold: float) -> pd.DataFrame:
    feature_selector = RandomForestClassifier(max_depth=10,random_state=seed)

    feature_selector.fit(df,target)

    feature_importance = {
        name: value
        for name,value in zip(feature_selector.feature_names_in_,feature_selector.feature_importances_)
    }

    feature_importance = dict(reversed(sorted(feature_importance.items(), key=lambda item: item[1])))
    feature_names = list(feature_importance.keys())

    n_selected_features = np.sum(np.array(list(feature_importance.values())) > threshold)

    df_reduced = df[feature_names[:n_selected_features]]
    print("Shape Tranformation:\n",df.shape,"->", df_reduced.shape)

    return df_reduced

In [2]:
import pandas as pd
import numpy as np
# from dataset_functions import *
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV,StratifiedKFold

# AdaBoost

In [3]:
params = {
    'estimator__max_depth': [1,2,3],
    'n_estimators': [50,75,100],
    'learning_rate': [1,1.5,2],
}

scorings = ["roc_auc","accuracy","recall","precision","f1"]

model = AdaBoostClassifier(estimator=DecisionTreeClassifier(),random_state=42)

best_model = GridSearchCV(estimator=model,param_grid=params,scoring=scorings,refit="roc_auc",cv=StratifiedKFold(n_splits=5,shuffle=True),return_train_score=True,verbose=1)

In [4]:
df, target = get_dataset()
win = target['radiant_win']

Filtering Df:  (game_mode == 2 or game_mode == 22) and game_time > 0 

Dropped:  ['lobby_type', 'chat_len', 'game_mode', 'match_id_hash'] 

Dataframe Shape:  (32153, 242) 

Target shape: (32153, 6)


In [5]:
df_tt = teamstats_teamheros_transform(df.copy())
df_mp = team_mean_position_transform(df_tt.copy())
df_wmp = team_weighted_mean_position_transform(df_tt.copy())

df_tt = feature_selection_transform(df_tt,win,0.01)
df_mp = feature_selection_transform(df_mp,win,0.01)
df_wmp = feature_selection_transform(df_wmp,win,0.01)

Hero Id Labels: ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id'] 

Numbers of Heros:  115 

NaN Count:  0 

Single Player Labels: ['r1_kills', 'r1_deaths', 'r1_assists', 'r1_denies', 'r1_gold', 'r1_lh', 'r1_xp', 'r1_health', 'r1_max_health', 'r1_max_mana', 'r1_level', 'r1_x', 'r1_y', 'r1_stuns', 'r1_creeps_stacked', 'r1_camps_stacked', 'r1_rune_pickups', 'r1_firstblood_claimed', 'r1_teamfight_participation', 'r1_towers_killed', 'r1_roshans_killed', 'r1_obs_placed', 'r1_sen_placed', 'r2_kills', 'r2_deaths', 'r2_assists', 'r2_denies', 'r2_gold', 'r2_lh', 'r2_xp', 'r2_health', 'r2_max_health', 'r2_max_mana', 'r2_level', 'r2_x', 'r2_y', 'r2_stuns', 'r2_creeps_stacked', 'r2_camps_stacked', 'r2_rune_pickups', 'r2_firstblood_claimed', 'r2_teamfight_participation', 'r2_towers_killed', 'r2_roshans_killed', 'r2_obs_placed', 'r2_sen_placed', 'r3_kills', 'r3_deaths', 'r3_assists', 'r3_denies', 'r3_gold', 'r

# TT

In [7]:
best_model.fit(df_tt,win)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [8]:
best_model.cv_results_

{'mean_fit_time': array([ 4.67180333,  7.15392742,  9.23449521,  4.68169417,  6.94529796,
         9.38585505,  4.75335822,  7.08554072,  9.42038555,  9.04789677,
        13.98245387, 17.76096082,  9.03514862, 13.3932375 , 17.90156865,
         8.79347243, 13.36965041, 17.66200156, 13.55353417, 19.54463291,
        25.945681  , 13.02978945, 19.58951411, 25.94042468, 12.86246595,
        19.37387547, 26.02471132]),
 'std_fit_time': array([0.30335395, 0.60269659, 0.26678435, 0.40863649, 0.30124921,
        0.28977336, 0.38975444, 0.33609843, 0.28753073, 0.30913791,
        0.85295384, 0.14148286, 0.19238348, 0.04884665, 0.36508092,
        0.26891896, 0.16993291, 0.15947262, 0.52964742, 0.25287581,
        0.25738528, 0.06546438, 0.30899673, 0.06444165, 0.04398567,
        0.36881655, 0.20766091]),
 'mean_score_time': array([0.11371164, 0.1539444 , 0.18797545, 0.1105689 , 0.1496799 ,
        0.19206901, 0.09754429, 0.15182776, 0.19413471, 0.11106801,
        0.18034649, 0.21276035, 0.113

In [9]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:

    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'estimator__max_depth': 2, 'learning_rate': 1.5, 'n_estimators': 100} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.789926,0.810736
1,accuracy,0.707585,0.723136
2,recall,0.752964,0.768585
3,precision,0.709934,0.723471
4,f1,0.730784,0.745307


In [10]:
feature_importance = {
    name: value
    for name,value in zip(best_model.best_estimator_.feature_names_in_,best_model.best_estimator_.feature_importances_)
}

feature_importance = dict(reversed(sorted(feature_importance.items(), key=lambda item: item[1])))
feature_names = list(feature_importance.keys())
print("Most Important:\n", feature_names[:10])
print("Least Important:\n", feature_names[-10:])


Most Important:
 ['d_kills', 'r_kills', 'r_towers_killed', 'd_gold', 'r_denies', 'r_gold', 'd_lh', 'r_deaths', 'r_lh', 'd1_x']
Least Important:
 ['d3_y', 'd2_y', 'd4_y', 'd5_x', 'd1_y', 'game_time', 'd_level', 'd3_x', 'r3_x', 'r2_x']


# MP

In [11]:
best_model.fit(df_mp,win)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [12]:
best_model.cv_results_

{'mean_fit_time': array([ 3.63919683,  5.3198082 ,  7.29004483,  3.58614373,  5.29441047,
         7.25890999,  3.53956652,  5.33056803,  7.27928314,  6.77799168,
        10.42961149, 13.77922702,  6.84743843, 10.36682734, 13.55049934,
         6.88081145, 10.40345917, 13.63704643,  9.98282609, 15.01268706,
        20.13202133, 10.11902142, 15.12736025, 20.39961872, 10.18829317,
        15.23002539, 20.45044804]),
 'std_fit_time': array([0.29414268, 0.17832395, 0.32243996, 0.17445563, 0.19280456,
        0.29815085, 0.16365377, 0.22687763, 0.26525723, 0.35966425,
        0.11701405, 0.12057478, 0.3389535 , 0.01560468, 0.04254139,
        0.32544201, 0.03247565, 0.06078147, 0.25842567, 0.3820056 ,
        0.37903867, 0.29058335, 0.22316259, 0.3137829 , 0.33491373,
        0.12414629, 0.33954925]),
 'mean_score_time': array([0.0898407 , 0.13949075, 0.16592193, 0.09830642, 0.1436831 ,
        0.16537137, 0.0979619 , 0.15882392, 0.16762309, 0.10280414,
        0.12442966, 0.17736888, 0.092

In [13]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:

    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'estimator__max_depth': 2, 'learning_rate': 1.5, 'n_estimators': 100} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.795377,0.81388
1,accuracy,0.710074,0.72585
2,recall,0.756859,0.773585
3,precision,0.71167,0.724935
4,f1,0.733423,0.74839


In [14]:
feature_importance = {
    name: value
    for name,value in zip(best_model.best_estimator_.feature_names_in_,best_model.best_estimator_.feature_importances_)
}

feature_importance = dict(reversed(sorted(feature_importance.items(), key=lambda item: item[1])))
feature_names = list(feature_importance.keys())
print("Most Important:\n", feature_names[:10])
print("Least Important:\n", feature_names[-10:])


Most Important:
 ['radiant_avg_y', 'dire_avg_x', 'd_kills', 'radiant_avg_x', 'd_gold', 'd_health', 'r_health', 'r_gold', 'r_lh', 'r_towers_killed']
Least Important:
 ['d_rune_pickups', 'd_deaths', 'r_xp', 'r_max_health', 'r_assists', 'd_level', 'd_max_mana', 'r_level', 'game_time', 'd_max_health']


# WMP

In [19]:
best_model.fit(df_wmp,win)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [20]:
best_model.cv_results_

{'mean_fit_time': array([ 4.47199931,  6.51895366,  8.63074555,  4.31181726,  6.32724962,
         8.58922405,  4.3131567 ,  6.41771173,  8.47595625,  8.26504154,
        12.05791874, 16.0965251 ,  8.03122602, 12.11533508, 16.22368765,
         8.0555975 , 12.1950357 , 16.45166125, 12.22539244, 18.25124354,
        24.11033978, 12.16727486, 18.36015878, 24.53360476, 12.28664064,
        18.4825954 , 24.35104475]),
 'std_fit_time': array([0.53922222, 0.28012408, 0.2751586 , 0.2474305 , 0.36568745,
        0.27458689, 0.24969572, 0.26033608, 0.36254507, 0.29667351,
        0.05773972, 0.13165696, 0.23114098, 0.07956439, 0.35424247,
        0.29681297, 0.07776103, 0.21621839, 0.03968897, 0.0999676 ,
        0.15827514, 0.05493255, 0.33535864, 0.11325163, 0.09184254,
        0.38082684, 0.26356437]),
 'mean_score_time': array([0.10190387, 0.13118496, 0.16099448, 0.09158931, 0.1156672 ,
        0.16931715, 0.09346924, 0.12857299, 0.16894994, 0.08937764,
        0.12252345, 0.18458223, 0.099

In [21]:
cv_results:dict = best_model.cv_results_

print("Best Parameters:\n",best_model.best_params_,'\n')

results = []

for scoring in scorings:

    mean_test_roc_label = f"mean_test_{scoring}"

    mean_test_score_list = cv_results[f"mean_test_{scoring}"]

    best_test_position = np.argmin(best_model.cv_results_["rank_test_roc_auc"])

    mean_train_roc_label = f"mean_train_{scoring}"

    mean_train_score_list = cv_results[f"mean_train_{scoring}"]

    results.append({"Scoring": scoring, "Mean Test Score": mean_test_score_list[best_test_position], "Mean Train Score": mean_train_score_list[best_test_position]})

df_results = pd.DataFrame(results)

df_results

Best Parameters:
 {'estimator__max_depth': 2, 'learning_rate': 1, 'n_estimators': 100} 



Unnamed: 0,Scoring,Mean Test Score,Mean Train Score
0,roc_auc,0.79345,0.807899
1,accuracy,0.710478,0.721457
2,recall,0.762465,0.773998
3,precision,0.709853,0.719072
4,f1,0.735189,0.745515


In [22]:
feature_importance = {
    name: value
    for name,value in zip(best_model.best_estimator_.feature_names_in_,best_model.best_estimator_.feature_importances_)
}

feature_importance = dict(reversed(sorted(feature_importance.items(), key=lambda item: item[1])))
feature_names = list(feature_importance.keys())
print("Most Important:\n", feature_names[:10])
print("Least Important:\n", feature_names[-10:])


Most Important:
 ['r_kills', 'radiant_Weighted_avg_y', 'radiant_Weighted_avg_x', 'd_kills', 'dire_Weighted_avg_x', 'd_towers_killed', 'd_health', 'r_towers_killed', 'dire_Weighted_avg_y', 'r_lh']
Least Important:
 ['r_rune_pickups', 'd_rune_pickups', 'r_assists', 'r_xp', 'd_level', 'r_max_mana', 'r_level', 'd_max_mana', 'game_time', 'd_max_health']
