# Predict Winner from Early-game Performance using Logistic Regression

In [181]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

## Dataset

Source: https://oracleselixir.com/tools/downloads

In [182]:
df = pd.read_csv('../data/LeagueofLegends.csv', sep=',')
df.head()

Unnamed: 0,League,Year,Season,Type,blueTeamTag,bResult,rResult,redTeamTag,gamelength,golddiff,...,redMiddleChamp,goldredMiddle,redADC,redADCChamp,goldredADC,redSupport,redSupportChamp,goldredSupport,redBans,Address
0,NALCS,2015,Spring,Season,TSM,1,0,C9,40,"[0, 0, -14, -65, -268, -431, -488, -789, -494,...",...,Fizz,"[475, 475, 552, 842, 1178, 1378, 1635, 1949, 2...",Sneaky,Sivir,"[475, 475, 532, 762, 1097, 1469, 1726, 2112, 2...",LemonNation,Thresh,"[515, 515, 577, 722, 911, 1042, 1194, 1370, 14...","['Tristana', 'Leblanc', 'Nidalee']",http://matchhistory.na.leagueoflegends.com/en/...
1,NALCS,2015,Spring,Season,CST,0,1,DIG,38,"[0, 0, -26, -18, 147, 237, -152, 18, 88, -242,...",...,Azir,"[475, 475, 552, 786, 1097, 1389, 1660, 1955, 2...",CoreJJ,Corki,"[475, 475, 532, 868, 1220, 1445, 1732, 1979, 2...",KiWiKiD,Annie,"[515, 515, 583, 752, 900, 1066, 1236, 1417, 15...","['RekSai', 'Janna', 'Leblanc']",http://matchhistory.na.leagueoflegends.com/en/...
2,NALCS,2015,Spring,Season,WFX,1,0,GV,40,"[0, 0, 10, -60, 34, 37, 589, 1064, 1258, 913, ...",...,Azir,"[475, 475, 533, 801, 1006, 1233, 1385, 1720, 1...",Cop,Corki,"[475, 475, 533, 781, 1085, 1398, 1782, 1957, 2...",BunnyFuFuu,Janna,"[515, 515, 584, 721, 858, 1002, 1168, 1303, 14...","['Leblanc', 'Zed', 'RekSai']",http://matchhistory.na.leagueoflegends.com/en/...
3,NALCS,2015,Spring,Season,TIP,0,1,TL,41,"[0, 0, -15, 25, 228, -6, -243, 175, -346, 16, ...",...,Lulu,"[475, 475, 532, 771, 1046, 1288, 1534, 1776, 2...",KEITH,KogMaw,"[475, 475, 532, 766, 1161, 1438, 1776, 1936, 2...",Xpecial,Janna,"[515, 515, 583, 721, 870, 1059, 1205, 1342, 15...","['RekSai', 'Rumble', 'LeeSin']",http://matchhistory.na.leagueoflegends.com/en/...
4,NALCS,2015,Spring,Season,CLG,1,0,T8,35,"[40, 40, 44, -36, 113, 158, -121, -191, 23, 20...",...,Lulu,"[475, 475, 532, 807, 1042, 1338, 1646, 1951, 2...",Maplestreet8,Corki,"[475, 475, 532, 792, 1187, 1488, 1832, 2136, 2...",Dodo8,Annie,"[475, 475, 538, 671, 817, 948, 1104, 1240, 136...","['Rumble', 'Sivir', 'Rengar']",http://matchhistory.na.leagueoflegends.com/en/...


## Dataset preprocessing

Add Winner for each match by merging two original columns:

In [183]:
df['winner'] = np.where(df['bResult'] == 1, 'blue', 'red')

df['winner'].head()

0    blue
1     red
2    blue
3     red
4    blue
Name: winner, dtype: object


For this experiment, we reserve golddiff, expdiff, deathdiff at 15 min:

In [184]:
from ast import literal_eval
df['golddiff'] = df['golddiff'].apply(literal_eval)
df['golddiff'].head()
df['golddiff'] = df['golddiff'].apply(lambda x : x[14])
df['golddiff'].head()

0    -790
1    1394
2    2922
3    -335
4    -574
Name: golddiff, dtype: int64

In [185]:
from ast import literal_eval
df['bDragons'] = df['bDragons'].apply(literal_eval)
df['rDragons'] = df['rDragons'].apply(literal_eval)

def func(List,time = 15):
    Count = 0
    for recource in List:
        if recource[0] < time:
            Count += 1
    return Count

df['bDragons'] = df['bDragons'].apply(func,{"time":15})
df['rDragons'] = df['rDragons'].apply(func,{"time":15})
df['dragonDiff'] = df['bDragons'] - df['rDragons']

df['dragonDiff'].head()

0    0
1   -1
2    0
3   -1
4    1
Name: dragonDiff, dtype: int64

In [186]:
from ast import literal_eval
df['bTowers'] = df['bTowers'].apply(literal_eval)
df['rTowers'] = df['rTowers'].apply(literal_eval)


df['bTowers'] = df['bTowers'].apply(func,{"time":15})
df['rTowers'] = df['rTowers'].apply(func,{"time":15})
df['TowersDiff'] = df['bTowers'] - df['rTowers']

df['TowersDiff'].head()

0    0
1    0
2    0
3    0
4   -1
Name: TowersDiff, dtype: int64

In [188]:
from ast import literal_eval
df['bKills'] = df['bKills'].apply(literal_eval)
df['rKills'] = df['rKills'].apply(literal_eval)


df['bKills'] = df['bKills'].apply(func,{"time":15})
df['rKills'] = df['rKills'].apply(func,{"time":15})
df['KillsDiff'] = df['bKills'] - df['rKills']

df['KillsDiff'].head()

0    1
1    2
2    5
3   -2
4    0
Name: KillsDiff, dtype: int64

Get champion lineup stats:

In [189]:
df.columns

champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]
columns = ['golddiff','dragonDiff','TowersDiff','HeraldsDiff','KillsDiff', 'winner']
df = df[columns]

df.head()

Unnamed: 0,golddiff,dragonDiff,TowersDiff,KillsDiff,winner
0,-790,0,0,1,blue
1,1394,-1,0,2,red
2,2922,0,0,5,blue
3,-335,-1,0,-2,red
4,-574,1,-1,0,blue


Encode labels:

In [190]:


y = df['winner']
x = df[['golddiff','dragonDiff','TowersDiff','HeraldsDiff','KillsDiff']] 
x.head()

Unnamed: 0,golddiff,dragonDiff,TowersDiff,KillsDiff
0,-790,0,0,1
1,1394,-1,0,2
2,2922,0,0,5
3,-335,-1,0,-2
4,-574,1,-1,0


Normalize data:

In [191]:
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)
columns = ['golddiff','dragonDiff','TowersDiff','HeraldsDiff','KillsDiff']
x = pd.DataFrame(x, columns = columns)

x.head()

Unnamed: 0,golddiff,dragonDiff,TowersDiff,KillsDiff
0,-0.432756,0.067537,-0.118261,0.290513
1,0.525309,-0.965859,-0.118261,0.633512
2,1.195603,0.067537,-0.118261,1.662506
3,-0.23316,-0.965859,-0.118261,-0.738482
4,-0.338003,1.100933,-0.948809,-0.052485


Finally, we get our x and y data for training.

In [192]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)

x_train.shape, x_test.shape

((5334, 4), (2286, 4))

## Train & Test

Train Logistic Regression model and test its accuracy.

Using `SGDClassifier(loss='log')` is equivalent to Logistic Regression optimized via Stochastic Gradient Descent taught in CS181 lecture.

In [193]:
parameters = {'alpha':[0.001, 0.005, 0.01, 0.05, 0.1, 0.5,  1, 2, 4, 8, 16]}
model = SGDClassifier(loss = 'log', random_state = 0)
model_LR_grid = GridSearchCV(model, param_grid = parameters)
model_LR_grid.fit(x_train, y_train)

model_LR = model_LR_grid.best_estimator_
print(model_LR)
print(model_LR.coef_)

y_pred = model_LR.predict(x_test)

print(classification_report(y_test, y_pred))

SGDClassifier(alpha=0.01, loss='log', random_state=0)
[[-0.86371889 -0.32403381 -0.13359875 -0.376122  ]]
              precision    recall  f1-score   support

        blue       0.75      0.75      0.75      1269
         red       0.69      0.69      0.69      1017

    accuracy                           0.72      2286
   macro avg       0.72      0.72      0.72      2286
weighted avg       0.72      0.72      0.72      2286



Test/predict single one match:

In [194]:
''' S11 EDG vs DK, match 3, DK(red) wins '''
# golddiffat15 = 922
# xpdiffat15 = -219
# champion_lineup_blue = ['Jayce', 'Xin Zhao', 'Twisted Fate', 'Jhin', 'Leona'] 
# champion_lineup_red = ['Gragas', 'Lee Sin', 'Sylas', 'Aphelios', 'Braum']

''' S11 EDG vs DK, match 4, EDG(blue) wins '''
# golddiffat15 = 2057
# xpdiffat15 = 1563
# champion_lineup_blue = ['Graves', 'Viego', 'Zoe', 'Lucian', 'Lulu'] 
# champion_lineup_red = ['Gwen', 'Talon', 'Orianna', 'Jhin', 'Nami'] 

''' S11 DK vs EDG, match 5, EDG(red) wins ''' 
golddiff = 0
dragonDiff = 0
TowersDiff = 0
HeraldsDiff = 0
KillsDiff = 0
champion_lineup_blue = ['Gnar', 'Rengar', 'Ahri', 'Caitlyn', 'Leona']
champion_lineup_red = ['Irelia', 'JarvanIV', 'Azir', 'Corki', 'Annie']

x = [np.concatenate([[golddiff,dragonDiff,TowersDiff,HeraldsDiff,KillsDiff]])]
x = scaler.transform(x)

y_pred = model_LR.predict_proba(x)

print(f"model predicted winner : {'blue' if model_LR.predict(x)[0] else 'red'}")
print(f"red wins: {y_pred[0][0] * 100:.1f}% | blue wins: {y_pred[0][1] * 100:.1f}%")

model predicted winner : blue
red wins: 53.0% | blue wins: 47.0%
