In [12]:
import pandas as pd
import numpy as np  
import torch 
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier


In [4]:
df = pd.read_csv("combined_league_matches.csv")
#Some useful stuff we might wanna use
champion_ids = [col for col in df.columns if 'champion' in col]
all_games = pd.concat([df[col] for col in champion_ids]) 
games_per_champ = all_games.value_counts() #maybe useful, find total number of games per champ id

Basic Random Forest

In [5]:
df = pd.read_csv("combined_league_matches.csv")

encoder = LabelEncoder()

for col in ['red_champion_0', 'red_champion_1', 'red_champion_2', 'red_champion_3', 'red_champion_4',
            'blue_champion_0', 'blue_champion_1', 'blue_champion_2', 'blue_champion_3', 'blue_champion_4']:
    df[col] = encoder.fit_transform(df[col])

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

#if we want to test without mastery just uncomment below
#df = df.drop(columns=['red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
#            'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'])


In [6]:
X = df.drop(columns=['target'])  # Features (character data)
y = df['target']  # win/loss from blue perspective

In [7]:
#splitting data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) #can use random_state

In [13]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt','log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 1000,10)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10,14]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,6,8]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
              'criterion':['entropy','gini']}
print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [14]:
model = RandomForestClassifier()#can use random_state
randomizedModel = RandomizedSearchCV(estimator = model, param_distributions= random_grid, n_iter=100, random_state=100,n_jobs=1)
randomizedModel.fit(X_train, y_train)


# to see best parameters
randomizedModel.best_params_

# to make new classifier with the best estimator
best_random_grid = randomizedModel.best_estimator_
best_random_grid.fit(X_train, y_train)

#Predict
y_pred = best_random_grid.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Accuracy: 0.56
              precision    recall  f1-score   support

           0       0.61      0.27      0.37      2259
           1       0.55      0.84      0.66      2400

    accuracy                           0.56      4659
   macro avg       0.58      0.55      0.52      4659
weighted avg       0.58      0.56      0.52      4659



Trying to scale the mastery levels and using random forest

In [15]:
#randomizedModel.best_params_
model = RandomForestClassifier(n_estimators= 600, min_samples_split= 5, min_samples_leaf= 1, max_features= 'sqrt', max_depth=10, criterion='gini')
model.fit(X_train, y_train)

#Predict
y_pred = model.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

{'n_estimators': 600,
 'min_samples_split': 5,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'max_depth': 10,
 'criterion': 'gini'}

In [9]:
df = pd.read_csv("combined_league_matches.csv")
champion_ids = [col for col in df.columns if 'champion' in col]
all_games = pd.concat([df[col] for col in champion_ids]) 
games_per_champ = all_games.value_counts() #maybe useful, find total number of games per champ id


encoder = LabelEncoder()

for col in ['red_champion_0', 'red_champion_1', 'red_champion_2', 'red_champion_3', 'red_champion_4',
            'blue_champion_0', 'blue_champion_1', 'blue_champion_2', 'blue_champion_3', 'blue_champion_4']:
    df[col] = encoder.fit_transform(df[col])

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

#if we want to test without mastery just uncomment below
#df = df.drop(columns=['red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
#            'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'])

#scale all mastery values
mastery_columns = [
    'red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
    'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'
]

scaler = StandardScaler() #can also try minmaxscaler and keep within certain range

df[mastery_columns] = scaler.fit_transform(df[mastery_columns])

In [10]:
X = df.drop(columns=['target'])
y = df['target']

# Split the data and proceed with model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = RandomForestClassifier()#can use random_state
model.fit(X_train, y_train)

#Predict
y_pred = model.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

Accuracy: 0.55
              precision    recall  f1-score   support

           0       0.54      0.46      0.50      2259
           1       0.56      0.63      0.59      2400

    accuracy                           0.55      4659
   macro avg       0.55      0.55      0.55      4659
weighted avg       0.55      0.55      0.55      4659

