# Predict Winner from Early-game Performance using Logistic Regression

In [298]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

## Dataset

Source: https://oracleselixir.com/tools/downloads

In [299]:
df = pd.read_csv('../data/2021_LoL_esports_match_data.csv', sep=',')
df_complete = df[df['datacompleteness'] == 'complete']
df_complete.reset_index(drop = True, inplace = True)
df = df_complete[df_complete['position'] == 'team']
df = df[df['side'] == 'Blue']
df.reset_index(drop = True, inplace = True)

print('# of matches: {}'.format(len(df)))

# of matches: 7296


## Dataset preprocessing

Add Winner for each match by merging two original columns:

In [300]:
df['winner'] = np.where(df['result']==1, 'blue', 'red')
df[['winner', 'result']].head()

Unnamed: 0,winner,result
0,blue,1
1,red,0
2,red,0
3,red,0
4,red,0



For this experiment, we reserve golddiff, expdiff, deathdiff at 15 min:

In [301]:
df[['golddiffat15', 'xpdiffat15']].head()
# df['deathdiffat15'] = df['deathsat15'] - df['opp_deathsat15']
# print(df[['deathdiffat15', 'deathsat15', 'opp_deathsat15']].head())

Unnamed: 0,golddiffat15,xpdiffat15
0,5018.0,4255.0
1,573.0,-1879.0
2,-579.0,-1643.0
3,951.0,-107.0
4,2145.0,-420.0


Get champion lineup stats:

In [302]:
champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]
df[champion_columns] = np.NAN

champion_map_blue = {'top':'blueTopChamp', 'jng':'blueJungleChamp', 'mid':'blueMiddleChamp', 'bot':'blueADCChamp', 'sup':'blueSupportChamp'}
champion_map_red = {'top':'redTopChamp', 'jng':'redJungleChamp', 'mid':'redMiddleChamp', 'bot':'redADCChamp', 'sup':'redSupportChamp'}

for i in range(len(df)) :
    for j in range(5) :
        position = df_complete['position'][i * 12 + j]
        df[champion_map_blue[position]][i] = df_complete['champion'][i * 12 + j]
        position = df_complete['position'][i * 12 + 5 + j ]
        df[champion_map_red[position]][i] = df_complete['champion'][i * 12 + 5 + j]

columns = champion_columns + ['golddiffat15', 'xpdiffat15']
x = df[columns]
y = df['result']

Encode labels:

In [303]:
champion_label_encoder = LabelEncoder()
champions = set()
for champ in champion_columns :
    champions |= set(pd.unique(x[champ]))
champions = np.array(list(champions))
print(f'{len(champions)} champions: {champions}')

champion_label_encoder.fit(champions)
for champ in champion_columns :
    x[champ] = champion_label_encoder.transform(x[champ])

x.head()

155 champions: ['Quinn' 'Rell' 'Karma' 'Graves' 'Illaoi' 'Akshan' 'Alistar' 'Fizz'
 'Teemo' 'Vi' 'Malphite' 'Galio' 'Poppy' 'Miss Fortune' 'Lillia' 'Lucian'
 "Kai'Sa" 'Jarvan IV' 'Kled' 'Lux' 'Xayah' 'Fiora' 'Viktor' 'Twitch'
 'Karthus' 'Kayle' 'Udyr' 'Taliyah' 'Thresh' 'Bard' 'Kassadin' 'Samira'
 'Sejuani' "Cho'Gath" 'Morgana' 'Lissandra' 'Zilean' 'Janna' 'Camille'
 'Sylas' 'Warwick' 'Vex' 'Shen' 'Zyra' 'Zed' 'Jayce' 'Zac' 'Corki'
 'Fiddlesticks' 'Yone' 'Viego' 'Malzahar' 'Ashe' "Kog'Maw" 'Braum'
 'Trundle' 'Kindred' 'Draven' 'Kennen' 'Rengar' 'Brand' 'Aphelios' 'Amumu'
 'Yuumi' 'Skarner' 'Rakan' 'Azir' 'Tahm Kench' 'Renekton' 'Sion' 'Ornn'
 'Singed' 'Tryndamere' 'Cassiopeia' 'Shyvana' 'Senna' 'Katarina'
 'Mordekaiser' 'Vayne' 'Zoe' 'Riven' 'Caitlyn' 'Pantheon' 'Evelynn'
 'Shaco' 'Xerath' 'Sivir' 'Aatrox' 'Akali' 'Diana' 'Ahri' 'Garen' 'Varus'
 'Sett' 'Ivern' 'Olaf' 'Neeko' 'Urgot' 'Seraphine' 'Jinx' "Kha'Zix"
 'Kalista' 'Kayn' 'Lee Sin' 'Talon' 'Nidalee' 'Volibear' 'Leona' 'Maokai'
 

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiffat15,xpdiffat15
0,75,37,153,74,32,86,60,85,28,65,5018.0,4255.0
1,36,37,99,50,4,86,84,116,74,32,573.0,-1879.0
2,36,37,153,145,4,99,81,146,74,32,-579.0,-1643.0
3,52,87,116,101,65,0,81,137,50,32,951.0,-107.0
4,46,39,85,101,4,17,66,137,50,73,2145.0,-420.0


Normalize data:

In [304]:
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)
x = pd.DataFrame(x, columns = columns)

x.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiffat15,xpdiffat15
0,0.356081,-1.246656,1.447364,0.00794,-0.912481,0.650655,-0.566314,-0.060945,-1.022736,-0.054882,1.496092,1.875853
1,-0.61963,-1.246656,0.229419,-0.54377,-1.595501,0.650655,0.099456,0.619456,0.019262,-0.870246,0.089944,-0.846535
2,-0.61963,-1.246656,1.447364,1.640082,-1.595501,0.972483,0.016235,1.277908,0.019262,-0.870246,-0.274484,-0.741793
3,-0.219338,0.128378,0.612846,0.628614,-0.107494,-1.478358,0.016235,1.080372,-0.524389,-0.870246,0.209522,-0.060087
4,-0.369447,-1.191654,-0.086345,0.628614,-1.595501,-1.057507,-0.399871,1.080372,-0.524389,0.142782,0.587236,-0.199002


Finally, we get our x and y data for training.

In [305]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

x_train.shape, x_test.shape

((5836, 12), (1460, 12))

## Train & Test

Train Logistic Regression model and test its accuracy.

Using `SGDClassifier(loss='log')` is equivalent to Logistic Regression optimized via Stochastic Gradient Descent taught in CS181 lecture.

In [306]:
parameters = {'alpha':[0.001, 0.005, 0.01, 0.05, 0.1, 0.5,  1, 2, 4, 8, 16]}
model = SGDClassifier(loss = 'log', random_state = 0)
model_LR_grid = GridSearchCV(model, param_grid = parameters)
model_LR_grid.fit(x_train, y_train)

model_LR = model_LR_grid.best_estimator_
print(model_LR)
print(model_LR.coef_)

y_pred = model_LR.predict(x_test)

print(classification_report(y_test, y_pred))

SGDClassifier(alpha=0.1, loss='log', random_state=0)
[[-0.03749475 -0.02517247  0.02171632 -0.01049216  0.00431097  0.02789022
   0.02071576  0.04019872  0.01428512  0.00536357  0.68051888  0.51028488]]
              precision    recall  f1-score   support

           0       0.73      0.72      0.72       656
           1       0.77      0.78      0.78       804

    accuracy                           0.75      1460
   macro avg       0.75      0.75      0.75      1460
weighted avg       0.75      0.75      0.75      1460



Test/predict single one match:

In [307]:
''' S11 EDG vs DK, match 3, DK(red) wins '''
# golddiffat15 = 922
# xpdiffat15 = -219
# champion_lineup_blue = ['Jayce', 'Xin Zhao', 'Twisted Fate', 'Jhin', 'Leona'] 
# champion_lineup_red = ['Gragas', 'Lee Sin', 'Sylas', 'Aphelios', 'Braum']

''' S11 EDG vs DK, match 4, EDG(blue) wins '''
# golddiffat15 = 2057
# xpdiffat15 = 1563
# champion_lineup_blue = ['Graves', 'Viego', 'Zoe', 'Lucian', 'Lulu'] 
# champion_lineup_red = ['Gwen', 'Talon', 'Orianna', 'Jhin', 'Nami'] 

''' S11 DK vs EDG, match 5, EDG(red) wins ''' 
golddiffat15 = -795
xpdiffat15 = -1087
champion_lineup_blue = ['Graves', 'Trundle', 'Syndra', 'Ziggs', 'Leona'] 
champion_lineup_red = ['Kennen', 'Xin Zhao', 'Zoe', 'Aphelios', 'Rakan']

x = [np.concatenate([champion_label_encoder.transform(champion_lineup_blue), champion_label_encoder.transform(champion_lineup_red) , [golddiffat15, xpdiffat15]])]
x = scaler.transform(x)

y_pred = model_LR.predict_proba(x)

print(f"model predicted winner : {'blue' if model_LR.predict(x)[0] else 'red'}")
print(f"red wins: {y_pred[0][0] * 100:.1f}% | blue wins: {y_pred[0][1] * 100:.1f}%")

model predicted winner : red
red wins: 57.0% | blue wins: 43.0%
