In [1]:
import pandas as pd
import numpy as np
from dataset_functions import *
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split
seed=10
np.random.seed(seed=10)

# Feature Selection

In [2]:
def fit_feature_selector(X_train,Y_train):
    feature_selector = RandomForestClassifier(max_depth=10,class_weight="balanced",random_state=seed)

    feature_selector.fit(X_train,Y_train)
    return feature_selector

def print_results(model:RandomForestClassifier,X_test,Y_test):
    Y_pred = model.predict(X_test)
    print("Accouracy:",accuracy_score(Y_pred,Y_test))
    print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

# Data Loading

In [3]:
print("Retriving Dataset")
df, target = get_dataset()

Retriving Dataset
Filtering Df:  (game_mode == 2 or game_mode == 22) and game_time > 0 

Dropped:  ['lobby_type', 'chat_len', 'game_mode', 'match_id_hash'] 

Dataframe Shape:  (32153, 242) 

Target shape: (32153, 6)


In [4]:
print("Team Stats-Team Heros:")
df_tt = teamstats_teamheros_transform(df.copy())
print("Team Mean Position:")
df_mp = team_mean_position_transform(df_tt.copy())
print(df_mp.shape)
print("Team Weighted Mean Position:")
df_wmp = team_weighted_mean_position_transform(df_tt.copy())
print(df_wmp.shape)

Team Stats-Team Heros:
Hero Id Labels: ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id'] 

Numbers of Heros:  115 

NaN Count:  0 

Single Player Labels: ['r1_kills', 'r1_deaths', 'r1_assists', 'r1_denies', 'r1_gold', 'r1_lh', 'r1_xp', 'r1_health', 'r1_max_health', 'r1_max_mana', 'r1_level', 'r1_x', 'r1_y', 'r1_stuns', 'r1_creeps_stacked', 'r1_camps_stacked', 'r1_rune_pickups', 'r1_firstblood_claimed', 'r1_teamfight_participation', 'r1_towers_killed', 'r1_roshans_killed', 'r1_obs_placed', 'r1_sen_placed', 'r2_kills', 'r2_deaths', 'r2_assists', 'r2_denies', 'r2_gold', 'r2_lh', 'r2_xp', 'r2_health', 'r2_max_health', 'r2_max_mana', 'r2_level', 'r2_x', 'r2_y', 'r2_stuns', 'r2_creeps_stacked', 'r2_camps_stacked', 'r2_rune_pickups', 'r2_firstblood_claimed', 'r2_teamfight_participation', 'r2_towers_killed', 'r2_roshans_killed', 'r2_obs_placed', 'r2_sen_placed', 'r3_kills', 'r3_deaths', 'r3_assists', 'r3

In [5]:
win = target["radiant_win"].astype(int)

# Team Stats - Team Heros

In [12]:
X_train,X_test,Y_train,Y_test = train_test_split(df_tt,win,test_size=0.2)

In [7]:
feature_selector = fit_feature_selector(X_train,Y_train)

In [8]:
print_results(feature_selector,X_test,Y_test)

Accouracy: 0.7107759290934536
Roc-Auc: 0.7106534120114317


In [10]:
X_train_reduced = feature_selection_transform(X_train,Y_train,threshold=0.01)
feature_selector = fit_feature_selector(X_train_reduced,Y_train)
print_results(feature_selector,X_test[X_train_reduced.columns],Y_test)

Shape Tranformation:
 (25722, 294) -> (25722, 46)
Accouracy: 0.7148188462136527
Roc-Auc: 0.7149490517584697


# Team Mean Positions

In [13]:
X_train,X_test,Y_train,Y_test = train_test_split(df_mp,win,test_size=0.2)

In [14]:
feature_selector = fit_feature_selector(X_train,Y_train)

In [15]:
print_results(feature_selector,X_test,Y_test)

Accouracy: 0.7070440055978853
Roc-Auc: 0.7062911758168482


In [16]:
X_train_reduced = feature_selection_transform(X_train,Y_train,threshold=0.01)
feature_selector = fit_feature_selector(X_train_reduced,Y_train)
print_results(feature_selector,X_test[X_train_reduced.columns],Y_test)

Shape Tranformation:
 (25722, 298) -> (25722, 47)
Accouracy: 0.7089099673456695
Roc-Auc: 0.7079233241873035


# Team Weighted Mean Positions

In [17]:
X_train,X_test,Y_train,Y_test = train_test_split(df_wmp,win,test_size=0.2)

In [18]:
feature_selector = fit_feature_selector(X_train,Y_train)

In [19]:
print_results(feature_selector,X_test,Y_test)

Accouracy: 0.708754470533354
Roc-Auc: 0.7097208416536147


In [20]:
X_train_reduced = feature_selection_transform(X_train,Y_train,threshold=0.01)
feature_selector = fit_feature_selector(X_train_reduced,Y_train)
print_results(feature_selector,X_test[X_train_reduced.columns],Y_test)

Shape Tranformation:
 (25722, 298) -> (25722, 48)
Accouracy: 0.710931425905769
Roc-Auc: 0.7114360378785207


# Sequential Feature Selection

In [100]:
from sklearn.feature_selection import SequentialFeatureSelector

df_tt_dropped_heros = drop_heros_labels(df_tt.copy())

sf = SequentialFeatureSelector(estimator=feature_selector,tol=0.01,direction='forward')

Hero Id Labels: [] 

Dropped Dataframe Shape: (32153, 64)


In [101]:
sf.fit(df_tt_dropped_heros,win)

In [103]:
sf.get_feature_names_out()
sf.scoring

In [108]:
model = RandomForestClassifier(**feature_selector.get_params())
X_train,X_test,Y_train,Y_test = train_test_split(df_tt_dropped_heros,win,test_size=0.2)

In [109]:
model.fit(sf.transform(X_train),Y_train)

In [111]:
Y_pred  = model.predict(sf.transform(X_test))
print(accuracy_score(Y_pred,Y_test))
print(roc_auc_score(Y_pred,Y_test))

0.6927382988648733
0.69179933403221
