In [24]:
import pandas as pd
import numpy as np  
import torch 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

In [None]:
df = pd.read_csv("combined_league_matches.csv")
#Some useful stuff we might wanna use
champion_ids = [col for col in df.columns if 'champion' in col]
all_games = pd.concat([df[col] for col in champion_ids]) 
games_per_champ = all_games.value_counts() #maybe useful, find total number of games per champ id

Basic Random Forest

In [7]:
df = pd.read_csv("combined_league_matches.csv")

encoder = LabelEncoder()

for col in ['red_champion_0', 'red_champion_1', 'red_champion_2', 'red_champion_3', 'red_champion_4',
            'blue_champion_0', 'blue_champion_1', 'blue_champion_2', 'blue_champion_3', 'blue_champion_4']:
    df[col] = encoder.fit_transform(df[col])

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

#if we want to test without mastery just uncomment below
#df = df.drop(columns=['red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
#            'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'])


In [40]:
X = df.drop(columns=['target'])  # Features (character data)
y = df['target']  # win/loss from blue perspective

In [41]:
#splitting data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) #can use random_state

In [42]:
model = RandomForestClassifier()#can use random_state
model.fit(X_train, y_train)

#Predict
y_pred = model.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

Accuracy: 0.56
              precision    recall  f1-score   support

           0       0.54      0.46      0.50      2231
           1       0.56      0.64      0.60      2428

    accuracy                           0.56      4659
   macro avg       0.55      0.55      0.55      4659
weighted avg       0.55      0.56      0.55      4659



Trying to scale the mastery levels and using random forest

In [18]:
df = pd.read_csv("combined_league_matches.csv")
champion_ids = [col for col in df.columns if 'champion' in col]
all_games = pd.concat([df[col] for col in champion_ids]) 
games_per_champ = all_games.value_counts() #maybe useful, find total number of games per champ id


encoder = LabelEncoder()

for col in ['red_champion_0', 'red_champion_1', 'red_champion_2', 'red_champion_3', 'red_champion_4',
            'blue_champion_0', 'blue_champion_1', 'blue_champion_2', 'blue_champion_3', 'blue_champion_4']:
    df[col] = encoder.fit_transform(df[col])

df['target'] = df['winner'].apply(lambda x: 1 if x == 'blue' else 0) #blueside win if 1
df = df.drop(columns=['match_id', 'winner'])

#if we want to test without mastery just uncomment below
#df = df.drop(columns=['red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
#            'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'])

#scale all mastery values
mastery_columns = [
    'red_mastery_0', 'red_mastery_1', 'red_mastery_2', 'red_mastery_3', 'red_mastery_4',
    'blue_mastery_0', 'blue_mastery_1', 'blue_mastery_2', 'blue_mastery_3', 'blue_mastery_4'
]

scaler = StandardScaler() #can also try minmaxscaler and keep within certain range

df[mastery_columns] = scaler.fit_transform(df[mastery_columns])

In [20]:
X = df.drop(columns=['target'])
y = df['target']

# Split the data and proceed with model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = RandomForestClassifier()#can use random_state
model.fit(X_train, y_train)

#Predict
y_pred = model.predict(X_test)

#Eval
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

Accuracy: 0.55
              precision    recall  f1-score   support

           0       0.51      0.48      0.50      2158
           1       0.58      0.61      0.59      2501

    accuracy                           0.55      4659
   macro avg       0.55      0.54      0.54      4659
weighted avg       0.55      0.55      0.55      4659

