# Predict Winner from Early-game Performance (15min) using Logistic Regression

In [140]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings('ignore')

PREDICT_TIME = 15

## Dataset

Source: https://www.kaggle.com/chuckephron/leagueoflegends

In [141]:
df = pd.read_csv('../data/LeagueofLegends.csv', sep=',')
df.reset_index(drop = True, inplace = True)
print(f'# of matches: {len(df)}')
df.head()

# of matches: 7620


Unnamed: 0,League,Year,Season,Type,blueTeamTag,bResult,rResult,redTeamTag,gamelength,golddiff,...,redMiddleChamp,goldredMiddle,redADC,redADCChamp,goldredADC,redSupport,redSupportChamp,goldredSupport,redBans,Address
0,NALCS,2015,Spring,Season,TSM,1,0,C9,40,"[0, 0, -14, -65, -268, -431, -488, -789, -494,...",...,Fizz,"[475, 475, 552, 842, 1178, 1378, 1635, 1949, 2...",Sneaky,Sivir,"[475, 475, 532, 762, 1097, 1469, 1726, 2112, 2...",LemonNation,Thresh,"[515, 515, 577, 722, 911, 1042, 1194, 1370, 14...","['Tristana', 'Leblanc', 'Nidalee']",http://matchhistory.na.leagueoflegends.com/en/...
1,NALCS,2015,Spring,Season,CST,0,1,DIG,38,"[0, 0, -26, -18, 147, 237, -152, 18, 88, -242,...",...,Azir,"[475, 475, 552, 786, 1097, 1389, 1660, 1955, 2...",CoreJJ,Corki,"[475, 475, 532, 868, 1220, 1445, 1732, 1979, 2...",KiWiKiD,Annie,"[515, 515, 583, 752, 900, 1066, 1236, 1417, 15...","['RekSai', 'Janna', 'Leblanc']",http://matchhistory.na.leagueoflegends.com/en/...
2,NALCS,2015,Spring,Season,WFX,1,0,GV,40,"[0, 0, 10, -60, 34, 37, 589, 1064, 1258, 913, ...",...,Azir,"[475, 475, 533, 801, 1006, 1233, 1385, 1720, 1...",Cop,Corki,"[475, 475, 533, 781, 1085, 1398, 1782, 1957, 2...",BunnyFuFuu,Janna,"[515, 515, 584, 721, 858, 1002, 1168, 1303, 14...","['Leblanc', 'Zed', 'RekSai']",http://matchhistory.na.leagueoflegends.com/en/...
3,NALCS,2015,Spring,Season,TIP,0,1,TL,41,"[0, 0, -15, 25, 228, -6, -243, 175, -346, 16, ...",...,Lulu,"[475, 475, 532, 771, 1046, 1288, 1534, 1776, 2...",KEITH,KogMaw,"[475, 475, 532, 766, 1161, 1438, 1776, 1936, 2...",Xpecial,Janna,"[515, 515, 583, 721, 870, 1059, 1205, 1342, 15...","['RekSai', 'Rumble', 'LeeSin']",http://matchhistory.na.leagueoflegends.com/en/...
4,NALCS,2015,Spring,Season,CLG,1,0,T8,35,"[40, 40, 44, -36, 113, 158, -121, -191, 23, 20...",...,Lulu,"[475, 475, 532, 807, 1042, 1338, 1646, 1951, 2...",Maplestreet8,Corki,"[475, 475, 532, 792, 1187, 1488, 1832, 2136, 2...",Dodo8,Annie,"[475, 475, 538, 671, 817, 948, 1104, 1240, 136...","['Rumble', 'Sivir', 'Rengar']",http://matchhistory.na.leagueoflegends.com/en/...


## Dataset preprocessing

Add a winner column for each match by merging two original columns:

In [142]:
df['winner'] = np.where(df['bResult'] == 1, 'blue', 'red')

df[['winner']].head()

Unnamed: 0,winner
0,blue
1,red
2,blue
3,red
4,blue



For this experiment, we reserve difference between two sides in team gold, number of enemy towers destroyed, number of jungle monsters killed, death at 15 min:

In [143]:
from ast import literal_eval
df['golddiff'] = df['golddiff'].apply(literal_eval)
df['golddiff'] = df['golddiff'].apply(lambda x : x[14])
df[['golddiff']].head()

Unnamed: 0,golddiff
0,-790
1,1394
2,2922
3,-335
4,-574


In [144]:
def count_item(items):
    Count = 0
    for item in items:
        if item[0] <= PREDICT_TIME:
            Count += 1
    return Count

df['bDragons'] = df['bDragons'].apply(literal_eval)
df['rDragons'] = df['rDragons'].apply(literal_eval)

df['bDragons'] = df['bDragons'].apply(count_item)
df['rDragons'] = df['rDragons'].apply(count_item)
df['dragondiff'] = df['bDragons'] - df['rDragons']

df[['dragondiff']].tail()

Unnamed: 0,dragondiff
7615,-1
7616,-1
7617,-1
7618,1
7619,0


In [145]:
df['bTowers'] = df['bTowers'].apply(literal_eval)
df['rTowers'] = df['rTowers'].apply(literal_eval)

df['bTowers'] = df['bTowers'].apply(count_item)
df['rTowers'] = df['rTowers'].apply(count_item)
df['towerdiff'] = df['bTowers'] - df['rTowers']

df[['towerdiff']].head()

Unnamed: 0,towerdiff
0,0
1,0
2,0
3,0
4,-1


In [146]:
df['bHeralds'] = df['bHeralds'].apply(literal_eval)
df['rHeralds'] = df['rHeralds'].apply(literal_eval)

df['bHeralds'] = df['bHeralds'].apply(count_item)
df['rHeralds'] = df['rHeralds'].apply(count_item)
df['heralddiff'] = df['bHeralds'] - df['rHeralds']

df[['heralddiff']].head()

Unnamed: 0,heralddiff
0,0
1,0
2,0
3,0
4,0


In [147]:
df['bKills'] = df['bKills'].apply(literal_eval)
df['rKills'] = df['rKills'].apply(literal_eval)

df['bKills'] = df['bKills'].apply(count_item)
df['rKills'] = df['rKills'].apply(count_item)
df['killdiff'] = df['bKills'] - df['rKills']

df[['killdiff']].head()

Unnamed: 0,killdiff
0,1
1,2
2,5
3,-2
4,0


Get champion lineup stats:

In [148]:
df.columns

champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]
columns = ['golddiff','dragondiff','heralddiff','towerdiff', 'killdiff']
df = df[columns + ['winner']]

df.head()

Unnamed: 0,golddiff,dragondiff,heralddiff,towerdiff,killdiff,winner
0,-790,0,0,0,1,blue
1,1394,-1,0,0,2,red
2,2922,0,0,0,5,blue
3,-335,-1,0,0,-2,red
4,-574,1,0,-1,0,blue


Encode labels:

In [149]:
y = df['winner']
x = df[columns] 
x.head()

Unnamed: 0,golddiff,dragondiff,heralddiff,towerdiff,killdiff
0,-790,0,0,0,1
1,1394,-1,0,0,2
2,2922,0,0,0,5
3,-335,-1,0,0,-2
4,-574,1,0,-1,0


Normalize data:

In [150]:
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)
x = pd.DataFrame(x, columns = columns)

x.head()

Unnamed: 0,golddiff,dragondiff,heralddiff,towerdiff,killdiff
0,-0.432756,0.067396,-0.069947,-0.11784,0.29035
1,0.525309,-0.965918,-0.069947,-0.11784,0.633209
2,1.195603,0.067396,-0.069947,-0.11784,1.661786
3,-0.23316,-0.965918,-0.069947,-0.11784,-0.738226
4,-0.338003,1.10071,-0.069947,-0.948496,-0.052509


Finally, we get our x and y data for training.

In [151]:
# x = x[["golddiff"]]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)

x_train.shape, x_test.shape

((5334, 5), (2286, 5))

## Train & Test

Train Logistic Regression model and test its accuracy.

Using `SGDClassifier(loss='log')` is equivalent to Logistic Regression optimized via Stochastic Gradient Descent taught in CS181 lecture.

In [152]:
parameters = {'alpha':[0.001, 0.005, 0.01, 0.05, 0.1]}
model = SGDClassifier(loss = 'log', random_state = 0)
model_LR_grid = GridSearchCV(model, param_grid = parameters)
model_LR_grid.fit(x_train, y_train)

model_LR = model_LR_grid.best_estimator_
print(model_LR)
print("\nLearned weight:")
print({column : weight for column, weight in zip(columns, model_LR.coef_[0])})

y_pred = model_LR.predict(x_test)
print("\nMetrics:")
print(classification_report(y_test, y_pred))

print(f"Accuracy:{accuracy_score(y_test, y_pred) * 100:.2f}%")

SGDClassifier(alpha=0.001, loss='log', random_state=0)

Learned weight:
{'golddiff': -1.018653262302649, 'dragondiff': -0.39443061370544125, 'heralddiff': -0.0013503234704892969, 'towerdiff': -0.04361738178023362, 'killdiff': -0.3664231335562153}

Metrics:
              precision    recall  f1-score   support

        blue       0.75      0.75      0.75      1269
         red       0.69      0.69      0.69      1017

    accuracy                           0.72      2286
   macro avg       0.72      0.72      0.72      2286
weighted avg       0.72      0.72      0.72      2286

Accuracy:72.31%


Predict S11 World Championship Final: EDG vs DK

In [153]:
match_stats = {}

''' S11 EDG vs DK, match 3, DK(red) wins '''
golddiff = 890
dragondiff = -1
heralddiff = -1
towerdiff = 0
killdiff = 0

''' S11 EDG vs DK, match 4, EDG(blue) wins '''
# golddiff = 2057
# dragondiff = 1
# heralddiff = 2
# towerdiff = 1
# killdiff = 0


''' S11 DK vs EDG, match 5, EDG(red) wins '''
# golddiff = -797
# dragondiff = 0
# heralddiff = 1
# towerdiff = 0
# killdiff = -3

# print({ stat : match_stats[stat][14] for stat in ['golddiff','dragondiff', 'heralddiff', 'towerdiff','killdiff']})


x = [np.concatenate([[golddiff, dragondiff, heralddiff, towerdiff, killdiff]])]
x = scaler.transform(x)

y_pred = model_LR.predict_proba(x)

print(f"model predicted winner : {model_LR.predict(x)[0]}")
print(f"red wins: {y_pred[0][1] * 100:.1f}% | blue wins: {y_pred[0][0] * 100:.1f}%")

model predicted winner : blue
red wins: 46.8% | blue wins: 53.2%
