In [59]:
import pandas as pd
import numpy as np
from dataset_functions import *
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split
seed=10

# Feature Selection

In [60]:
def fit_feature_selector(X_train,Y_train):
    feature_selector = RandomForestClassifier(max_depth=10,class_weight="balanced",random_state=seed)

    feature_selector.fit(X_train,Y_train)
    return feature_selector

def print_results(model:RandomForestClassifier,X_test,Y_test):
    Y_pred = model.predict(X_test)
    print("Accouracy:",accuracy_score(Y_pred,Y_test))
    print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

# Data Loading

In [61]:
print("Retriving Dataset")
df, target = get_dataset()

Retriving Dataset
Filtering Df:  (game_mode == 2 or game_mode == 22) and game_time > 0 

Dropped:  ['lobby_type', 'chat_len', 'game_mode', 'match_id_hash'] 

Dataframe Shape:  (32153, 242) 

Target shape: (32153, 6)


In [62]:
print("Team Stats-Team Heros:")
df_tt = teamstats_teamheros_transform(df.copy())
print("Team Mean Position:")
df_mp = team_mean_position_transform(df_tt.copy())
print(df_mp.shape)
print("Team Weighted Mean Position:")
df_wmp = team_weighted_mean_position_transform(df_tt.copy())
print(df_wmp.shape)

Team Stats-Team Heros:
Hero Id Labels: ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id'] 

Numbers of Heros:  115 

NaN Count:  0 

Single Player Labels: ['r1_kills', 'r1_deaths', 'r1_assists', 'r1_denies', 'r1_gold', 'r1_lh', 'r1_xp', 'r1_health', 'r1_max_health', 'r1_max_mana', 'r1_level', 'r1_x', 'r1_y', 'r1_stuns', 'r1_creeps_stacked', 'r1_camps_stacked', 'r1_rune_pickups', 'r1_firstblood_claimed', 'r1_teamfight_participation', 'r1_towers_killed', 'r1_roshans_killed', 'r1_obs_placed', 'r1_sen_placed', 'r2_kills', 'r2_deaths', 'r2_assists', 'r2_denies', 'r2_gold', 'r2_lh', 'r2_xp', 'r2_health', 'r2_max_health', 'r2_max_mana', 'r2_level', 'r2_x', 'r2_y', 'r2_stuns', 'r2_creeps_stacked', 'r2_camps_stacked', 'r2_rune_pickups', 'r2_firstblood_claimed', 'r2_teamfight_participation', 'r2_towers_killed', 'r2_roshans_killed', 'r2_obs_placed', 'r2_sen_placed', 'r3_kills', 'r3_deaths', 'r3_assists', 'r3

In [63]:
win = target["radiant_win"].astype(int)

# Team Stats - Team Heros

In [64]:
X_train,X_test,Y_train,Y_test = train_test_split(df_tt,win,test_size=0.2,random_state=seed)

In [65]:
feature_selector = fit_feature_selector(X_train,Y_train)

In [66]:
print_results(feature_selector,X_test,Y_test)

Accouracy: 0.7107759290934536
Roc-Auc: 0.7106534120114317


In [67]:
X_train_reduced = feature_selection_transform(X_train,Y_train,threshold=0.01)
feature_selector = fit_feature_selector(X_train_reduced,Y_train)
print_results(feature_selector,X_test[X_train_reduced.columns],Y_test)

Shape Tranformation:
 (25722, 294) -> (25722, 46)
Accouracy: 0.7188617633338517
Roc-Auc: 0.7191383583899008


# Team Mean Positions

In [68]:
X_train,X_test,Y_train,Y_test = train_test_split(df_mp,win,test_size=0.2,random_state=seed)

In [69]:
feature_selector = fit_feature_selector(X_train,Y_train)

In [70]:
print_results(feature_selector,X_test,Y_test)

Accouracy: 0.7174622920230135
Roc-Auc: 0.7172626297857931


In [71]:
X_train_reduced = feature_selection_transform(X_train,Y_train,threshold=0.01)
feature_selector = fit_feature_selector(X_train_reduced,Y_train)
print_results(feature_selector,X_test[X_train_reduced.columns],Y_test)

Shape Tranformation:
 (25722, 278) -> (25722, 31)
Accouracy: 0.7213497123308972
Roc-Auc: 0.7211851467369682


# Team Weighted Mean Positions

In [72]:
X_train,X_test,Y_train,Y_test = train_test_split(df_wmp,win,test_size=0.2)

In [73]:
feature_selector = fit_feature_selector(X_train,Y_train)

In [74]:
print_results(feature_selector,X_test,Y_test)

Accouracy: 0.712330897216607
Roc-Auc: 0.7111524034454015


In [75]:
X_train_reduced = feature_selection_transform(X_train,Y_train,threshold=0.01)
feature_selector = fit_feature_selector(X_train_reduced,Y_train)
print_results(feature_selector,X_test[X_train_reduced.columns],Y_test)

Shape Tranformation:
 (25722, 278) -> (25722, 32)
Accouracy: 0.712797387653553
Roc-Auc: 0.7117144151118376


# Sequential Feature Selection

In [19]:
from sklearn.feature_selection import SequentialFeatureSelector

df_tt_dropped_heros = drop_heros_labels(df_tt.copy())

sf = SequentialFeatureSelector(estimator=feature_selector,tol=0.01,direction='forward')

Hero Id Labels: [] 

Dropped Dataframe Shape: (32153, 64)


In [20]:
sf.fit(df_tt_dropped_heros,win)

In [26]:
sf.get_feature_names_out()

array(['r_kills', 'r_towers_killed', 'd_kills', 'd_towers_killed'],
      dtype=object)

In [27]:
model = RandomForestClassifier(**feature_selector.get_params())
X_train,X_test,Y_train,Y_test = train_test_split(df_tt_dropped_heros,win,test_size=0.2)

In [28]:
model.fit(sf.transform(X_train),Y_train)

In [29]:
Y_pred  = model.predict(sf.transform(X_test))
print(accuracy_score(Y_pred,Y_test))
print(roc_auc_score(Y_pred,Y_test))

0.6830974965013217
0.6829183049366535


# Feature Selection with PCA

In [79]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

pca_95 = PCA(n_components=0.95)
pca_99 = PCA(n_components=0.99)


In [80]:
df_tt = feature_selection_transform(df_tt,win,0.01)
df_mp = feature_selection_transform(df_mp,win,0.01)
df_wmp = feature_selection_transform(df_wmp,win,0.01)

Shape Tranformation:
 (32153, 294) -> (32153, 47)
Shape Tranformation:
 (32153, 278) -> (32153, 31)
Shape Tranformation:
 (32153, 278) -> (32153, 31)


## Team Stats - Team Heros

In [81]:
df_tt_95 = pca_95.fit_transform(scaler.fit_transform(df_tt))

model = RandomForestClassifier(max_depth=10,random_state=seed)
X_train,X_test,Y_train,Y_test = train_test_split(df_tt_95,win,test_size=0.2)

model.fit(X_train,Y_train)

Y_pred  = model.predict(X_test)
print("Accuracy:",accuracy_score(Y_pred,Y_test))
print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

print("Shape:",df_tt_95.shape)

Accuracy: 0.710464935468823
Roc-Auc: 0.7126663546145278
Shape: (32153, 25)


In [82]:
df_tt_99 = pca_99.fit_transform(scaler.fit_transform(df_tt))

model = RandomForestClassifier(max_depth=10,random_state=seed)
X_train,X_test,Y_train,Y_test = train_test_split(df_tt_99,win,test_size=0.2)

model.fit(X_train,Y_train)

Y_pred  = model.predict(X_test)
print("Accuracy:",accuracy_score(Y_pred,Y_test))
print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

print("Shape:",df_tt_99.shape)

Accuracy: 0.7176177888353289
Roc-Auc: 0.7209414486337664
Shape: (32153, 32)


## Team Mean Positions

In [83]:
df_mp_95 = pca_95.fit_transform(scaler.fit_transform(df_mp))

model = RandomForestClassifier(max_depth=10,random_state=seed)
X_train,X_test,Y_train,Y_test = train_test_split(df_mp_95,win,test_size=0.2)

model.fit(X_train,Y_train)

Y_pred  = model.predict(X_test)
print("Accuracy:",accuracy_score(Y_pred,Y_test))
print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

print("Shape:",df_mp_95.shape)

Accuracy: 0.7061110247239931
Roc-Auc: 0.7099359239507557
Shape: (32153, 11)


In [84]:
df_mp_99 = pca_99.fit_transform(scaler.fit_transform(df_mp))

model = RandomForestClassifier(max_depth=10,random_state=seed)
X_train,X_test,Y_train,Y_test = train_test_split(df_mp_99,win,test_size=0.2)

model.fit(X_train,Y_train)

Y_pred  = model.predict(X_test)
print("Accuracy:",accuracy_score(Y_pred,Y_test))
print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

print("Shape:",df_mp_99.shape)

Accuracy: 0.7138858653397605
Roc-Auc: 0.7159971603181395
Shape: (32153, 18)


## Team Weighted Mean Positions

In [85]:
df_wmp_95 = pca_95.fit_transform(scaler.fit_transform(df_wmp))

model = RandomForestClassifier(max_depth=10,random_state=seed)
X_train,X_test,Y_train,Y_test = train_test_split(df_wmp_95,win,test_size=0.2)

model.fit(X_train,Y_train)

Y_pred  = model.predict(X_test)
print("Accuracy:",accuracy_score(Y_pred,Y_test))
print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

print("Shape:",df_wmp_95.shape)

Accuracy: 0.7188617633338517
Roc-Auc: 0.7177715106126797
Shape: (32153, 11)


In [86]:
df_wmp_99 = pca_99.fit_transform(scaler.fit_transform(df_wmp))

model = RandomForestClassifier(max_depth=10,random_state=seed)
X_train,X_test,Y_train,Y_test = train_test_split(df_wmp_99,win,test_size=0.2)

model.fit(X_train,Y_train)

Y_pred  = model.predict(X_test)
print("Accuracy:",accuracy_score(Y_pred,Y_test))
print("Roc-Auc:",roc_auc_score(Y_pred,Y_test))

print("Shape:",df_wmp_99.shape)

Accuracy: 0.715440833462914
Roc-Auc: 0.7183761150096812
Shape: (32153, 18)
