In [62]:
import pandas as pd
import numpy as np  
import torch 
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
import json


In [63]:
df = pd.read_csv("combined_league_matches.csv")
#Some useful stuff we might wanna use
champion_ids = [col for col in df.columns if 'champion' in col]
all_games = pd.concat([df[col] for col in champion_ids]) 
games_per_champ = all_games.value_counts() #maybe useful, find total number of games per champ id

Basic Random Forest

In [72]:
df = pd.read_csv("combined_league_matches.csv")

encoder = LabelEncoder()

for col in ['red_champion_0', 'red_champion_1', 'red_champion_2', 'red_champion_3', 'red_champion_4',
            'blue_champion_0', 'blue_champion_1', 'blue_champion_2', 'blue_champion_3', 'blue_champion_4']:
    df[col] = encoder.fit_transform(df[col])

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

#if we want to test without mastery just uncomment below
#df = df.drop(columns=['red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
#            'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'])


In [73]:
X = df.drop(columns=['target'])  # Features (character data)
y = df['target']  # win/loss from blue perspective

In [74]:
#splitting data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) #can use random_state

In [67]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt','log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 1000,10)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10,14]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,6,8]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
              'criterion':['entropy','gini']}
print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [68]:
model = RandomForestClassifier()#can use random_state
randomizedModel = RandomizedSearchCV(estimator = model, param_distributions= random_grid, n_iter=100, random_state=100,n_jobs=1)
randomizedModel.fit(X_train, y_train)


# to see best parameters
randomizedModel.best_params_

# to make new classifier with the best estimator
best_random_grid = randomizedModel.best_estimator_
best_random_grid.fit(X_train, y_train)

#Predict
y_pred = best_random_grid.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

KeyboardInterrupt: 

Trying to scale the mastery levels and using random forest

In [77]:
#randomizedModel.best_params_
model = RandomForestClassifier(n_estimators= 600, min_samples_split= 5, min_samples_leaf= 1, max_features= 'sqrt', max_depth=10, criterion='gini')
model.fit(X_train, y_train)

#Predict
y_pred = model.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)
print(classification_report(y_test, y_pred))

0.5600221279734464
              precision    recall  f1-score   support

           0       0.59      0.25      0.36      2592
           1       0.55      0.84      0.67      2831

    accuracy                           0.56      5423
   macro avg       0.57      0.55      0.51      5423
weighted avg       0.57      0.56      0.52      5423



In [78]:
df = pd.read_csv("combined_league_matches.csv")
champion_ids = [col for col in df.columns if 'champion' in col]
all_games = pd.concat([df[col] for col in champion_ids]) 
games_per_champ = all_games.value_counts() #maybe useful, find total number of games per champ id


encoder = LabelEncoder()

for col in ['red_champion_0', 'red_champion_1', 'red_champion_2', 'red_champion_3', 'red_champion_4',
            'blue_champion_0', 'blue_champion_1', 'blue_champion_2', 'blue_champion_3', 'blue_champion_4']:
    df[col] = encoder.fit_transform(df[col])

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

#if we want to test without mastery just uncomment below
#df = df.drop(columns=['red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
#            'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'])

#scale all mastery values
mastery_columns = [
    'red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
    'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'
]

scaler = StandardScaler() #can also try minmaxscaler and keep within certain range

df[mastery_columns] = scaler.fit_transform(df[mastery_columns])

In [82]:
X = df.drop(columns=['target'])
y = df['target']

# Split the data and proceed with model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

model = RandomForestClassifier(n_estimators= 600, min_samples_split= 5, min_samples_leaf= 1, max_features= 'sqrt', max_depth=10, criterion='gini')
model.fit(X_train, y_train)

#Predict
y_pred = model.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)
print(classification_report(y_test, y_pred))

0.5637905604719764
              precision    recall  f1-score   support

           0       0.61      0.26      0.37      1305
           1       0.55      0.84      0.67      1407

    accuracy                           0.56      2712
   macro avg       0.58      0.55      0.52      2712
weighted avg       0.58      0.56      0.52      2712



#Using mean Winrate

In [81]:
df = pd.read_csv("combined_league_matches.csv")

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

with open("champions_with_mean_winrate.json", "r") as file:
    win_rate_data = json.load(file)
win_rates = {int(k): float(v.strip('%')) for k, v in win_rate_data.items()}

for col in df.columns:
    if "champion" in col:
        df[col] = df[col].map(win_rates)

X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 0.5633413239904113
Confusion Matrix:
 [[ 729 1882]
 [ 486 2326]]
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.28      0.38      2611
           1       0.55      0.83      0.66      2812

    accuracy                           0.56      5423
   macro avg       0.58      0.55      0.52      5423
weighted avg       0.58      0.56      0.53      5423



#Using mean with outlier replaced winrate

In [83]:
df = pd.read_csv("combined_league_matches.csv")

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

with open("outlier_removed_champion_winrates.json", "r") as file:
    win_rate_data = json.load(file)
win_rates = {int(k): float(v) for k, v in win_rate_data.items()}
for col in df.columns:
    if "champion" in col:
        df[col] = df[col].map(win_rates)

X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 0.5705329153605015
Confusion Matrix:
 [[ 718 1869]
 [ 460 2376]]
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.28      0.38      2587
           1       0.56      0.84      0.67      2836

    accuracy                           0.57      5423
   macro avg       0.58      0.56      0.53      5423
weighted avg       0.58      0.57      0.53      5423



In [86]:
df = pd.read_csv("combined_league_matches.csv")

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

with open("champion_max_winrates.json", "r") as file:
    win_rate_data = json.load(file)
win_rates = {int(k): float(v) for k, v in win_rate_data.items()}

for col in df.columns:
    if "champion" in col:
        df[col] = df[col].map(win_rates)

X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 0.5661073206712152
Confusion Matrix:
 [[ 718 1886]
 [ 467 2352]]
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.28      0.38      2604
           1       0.55      0.83      0.67      2819

    accuracy                           0.57      5423
   macro avg       0.58      0.56      0.52      5423
weighted avg       0.58      0.57      0.53      5423

