# Predict Winner from Champion Lineup using Logistic Regression

In [210]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings('ignore')

## Dataset

Source: https://oracleselixir.com/tools/downloads

In [211]:
df = pd.read_csv('../data/2021_LoL_esports_match_data.csv', sep=',')
df_complete = df[df['datacompleteness'] == 'complete']
df_complete.reset_index(drop = True, inplace = True)
df = df_complete[df_complete['position'] == 'team']
df = df[df['side'] == 'Blue']
df.reset_index(drop = True, inplace = True)

print('# of matches: {}'.format(len(df)))

# of matches: 7296


## Dataset preprocessing

Add Winner for each match by merging two original columns:

In [212]:
df['winner'] = np.where(df['result']==1, 'blue', 'red')
df[['winner', 'result']].head()

Unnamed: 0,winner,result
0,blue,1
1,red,0
2,red,0
3,red,0
4,red,0



For this experiment, we reserve golddiff, expdiff, deathdiff at 15 min:

In [213]:
df[['golddiffat15', 'xpdiffat15']].head()
# df['deathdiffat15'] = df['deathsat15'] - df['opp_deathsat15']
# print(df[['deathdiffat15', 'deathsat15', 'opp_deathsat15']].head())

Unnamed: 0,golddiffat15,xpdiffat15
0,5018.0,4255.0
1,573.0,-1879.0
2,-579.0,-1643.0
3,951.0,-107.0
4,2145.0,-420.0


Get champion lineup stats:

In [214]:
champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]
df[champion_columns] = np.NAN

champion_map_blue = {'top':'blueTopChamp', 'jng':'blueJungleChamp', 'mid':'blueMiddleChamp', 'bot':'blueADCChamp', 'sup':'blueSupportChamp'}
champion_map_red = {'top':'redTopChamp', 'jng':'redJungleChamp', 'mid':'redMiddleChamp', 'bot':'redADCChamp', 'sup':'redSupportChamp'}

for i in range(len(df)) :
    for j in range(5) :
        position = df_complete['position'][i * 12 + j]
        df[champion_map_blue[position]][i] = df_complete['champion'][i * 12 + j]
        position = df_complete['position'][i * 12 + 5 + j ]
        df[champion_map_red[position]][i] = df_complete['champion'][i * 12 + 5 + j]

df.head()

Unnamed: 0,gameid,datacompleteness,url,league,year,split,playoffs,date,game,patch,...,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp
0,ESPORTSTMNT03/1632489,complete,http://matchhistory.na.leagueoflegends.com/en/...,KeSPA,2021,,0,2021-01-02 07:40:39,1,10.25,...,Mordekaiser,Graves,Zoe,Miss Fortune,Galio,Ornn,Kindred,Orianna,Ezreal,Leona
1,ESPORTSTMNT03/1632500,complete,http://matchhistory.na.leagueoflegends.com/en/...,KeSPA,2021,,0,2021-01-02 08:53:45,2,10.25,...,Gragas,Graves,Rumble,Kai'Sa,Alistar,Ornn,Olaf,Syndra,Miss Fortune,Galio
2,ESPORTSTMNT03/1632502,complete,http://matchhistory.na.leagueoflegends.com/en/...,KeSPA,2021,,0,2021-01-02 09:50:49,3,10.25,...,Gragas,Graves,Zoe,Yasuo,Alistar,Rumble,Nidalee,Yone,Miss Fortune,Galio
3,6909-9183,complete,https://lpl.qq.com/es/stats.shtml?bmid=6909,LPL,2021,Spring,0,2021-01-09 09:22:04,1,11.01,...,Karma,Pantheon,Syndra,Samira,Leona,Aatrox,Nidalee,Viktor,Kai'Sa,Galio
4,6909-9184,complete,https://lpl.qq.com/es/stats.shtml?bmid=6909,LPL,2021,Spring,0,2021-01-09 10:16:20,2,11.01,...,Jax,Hecarim,Orianna,Samira,Alistar,Camille,Lillia,Viktor,Kai'Sa,Maokai


In [215]:

columns = champion_columns
x = df[columns]
y = df['result']

x.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp
0,Mordekaiser,Graves,Zoe,Miss Fortune,Galio,Ornn,Kindred,Orianna,Ezreal,Leona
1,Gragas,Graves,Rumble,Kai'Sa,Alistar,Ornn,Olaf,Syndra,Miss Fortune,Galio
2,Gragas,Graves,Zoe,Yasuo,Alistar,Rumble,Nidalee,Yone,Miss Fortune,Galio
3,Karma,Pantheon,Syndra,Samira,Leona,Aatrox,Nidalee,Viktor,Kai'Sa,Galio
4,Jax,Hecarim,Orianna,Samira,Alistar,Camille,Lillia,Viktor,Kai'Sa,Maokai


Encode labels:

In [216]:
champion_label_encoder = LabelEncoder()
champions = set()
for champ in champion_columns :
    champions |= set(pd.unique(x[champ]))
champions = np.array(list(champions))
print(f'{len(champions)} champions: {champions}')

champion_label_encoder.fit(champions)
for champ in champion_columns :
    x[champ] = champion_label_encoder.transform(x[champ])

x.head()

155 champions: ['Jax' 'Tahm Kench' "Rek'Sai" 'Annie' 'Vi' 'Kayle' 'Xerath' 'Sion' 'Riven'
 'Wukong' 'Zyra' 'Zoe' 'Yasuo' 'Kayn' 'Brand' 'Bard' 'Elise' 'Nami'
 'Taric' 'Singed' 'Vex' 'Lux' 'Rakan' 'Ivern' 'Gnar' 'Viego' 'Teemo'
 "Vel'Koz" 'Jayce' 'Lucian' 'Viktor' 'Volibear' 'Gangplank' 'Quinn'
 'Orianna' 'Ashe' 'Sivir' 'Sejuani' 'Qiyana' 'Fiora' 'Aphelios' 'Ahri'
 'Malzahar' 'Draven' 'Rumble' 'Vayne' 'Rell' 'Ezreal' 'Twisted Fate'
 'Graves' 'Fizz' 'Mordekaiser' 'Zac' 'Zed' 'Irelia' 'Lissandra' 'Talon'
 'Jarvan IV' 'Katarina' 'Hecarim' 'Kled' 'Ziggs' 'Karthus' 'Rengar'
 'Tryndamere' 'Fiddlesticks' 'Leona' 'Aatrox' "Kog'Maw" 'Poppy' 'Nasus'
 'Nocturne' 'Gragas' 'Dr. Mundo' 'Veigar' 'Lulu' 'Miss Fortune' 'Tristana'
 'Soraka' 'LeBlanc' 'Evelynn' 'Warwick' 'Yuumi' 'Ryze' 'Seraphine' 'Sett'
 'Syndra' 'Neeko' 'Lee Sin' 'Xayah' 'Braum' "Kai'Sa" "Cho'Gath"
 'Cassiopeia' 'Shaco' 'Ornn' 'Lillia' 'Aurelion Sol' 'Malphite' 'Akali'
 'Camille' 'Maokai' 'Garen' 'Kindred' 'Samira' 'Karma' 'Janna' 'Shyv

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp
0,75,37,153,74,32,86,60,85,28,65
1,36,37,99,50,4,86,84,116,74,32
2,36,37,153,145,4,99,81,146,74,32
3,52,87,116,101,65,0,81,137,50,32
4,46,39,85,101,4,17,66,137,50,73


Normalize data:

In [217]:
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)
x = pd.DataFrame(x, columns = columns)
x.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp
0,0.356081,-1.246656,1.447364,0.00794,-0.912481,0.650655,-0.566314,-0.060945,-1.022736,-0.054882
1,-0.61963,-1.246656,0.229419,-0.54377,-1.595501,0.650655,0.099456,0.619456,0.019262,-0.870246
2,-0.61963,-1.246656,1.447364,1.640082,-1.595501,0.972483,0.016235,1.277908,0.019262,-0.870246
3,-0.219338,0.128378,0.612846,0.628614,-0.107494,-1.478358,0.016235,1.080372,-0.524389,-0.870246
4,-0.369447,-1.191654,-0.086345,0.628614,-1.595501,-1.057507,-0.399871,1.080372,-0.524389,0.142782


Finally, we get our x and y data for training.

In [218]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 10)

x_train.shape, x_test.shape

((5107, 10), (2189, 10))

## Train & Test

Train Logistic Regression model and test its accuracy.

Using `SGDClassifier(loss='log')` is equivalent to Logistic Regression optimized via Stochastic Gradient Descent taught in CS181 lecture.

In [219]:
parameters = {'alpha':[0.001, 0.005, 0.01, 0.05, 0.1, 0.5,  1, 2, 4, 8, 16]}
model = SGDClassifier(loss = 'log', random_state = 0)
model_LR_grid = GridSearchCV(model, param_grid = parameters)
model_LR_grid.fit(x_train, y_train)

model_LR = model_LR_grid.best_estimator_
print(model_LR)
print(model_LR.coef_)

y_pred = model_LR.predict(x_test)

print(classification_report(y_test, y_pred))
print(f"Accuracy:{accuracy_score(y_test, y_pred) * 100:.2f}%")

SGDClassifier(alpha=1, loss='log', random_state=0)
[[-0.01456206  0.00250467  0.0092145  -0.00251167  0.00143067  0.01248499
  -0.00251188  0.00457269  0.0051565  -0.00181853]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1005
           1       0.54      1.00      0.70      1184

    accuracy                           0.54      2189
   macro avg       0.27      0.50      0.35      2189
weighted avg       0.29      0.54      0.38      2189

Accuracy:54.09%


Predict S11 World Championship Final: EDG vs DK

In [220]:
''' S11 EDG vs DK, match 3, DK(red) wins '''
champion_lineup_blue = ['Jayce', 'Xin Zhao', 'Twisted Fate', 'Jhin', 'Leona'] 
champion_lineup_red = ['Gragas', 'Lee Sin', 'Sylas', 'Aphelios', 'Braum']

''' S11 EDG vs DK, match 4, EDG(blue) wins '''
# champion_lineup_blue = ['Graves', 'Viego', 'Zoe', 'Lucian', 'Lulu'] 
# champion_lineup_red = ['Gwen', 'Talon', 'Orianna', 'Jhin', 'Nami'] 

# ''' S11 DK vs EDG, match 5, EDG(red) wins ''' 
# champion_lineup_blue = ['Graves', 'Trundle', 'Syndra', 'Ziggs', 'Leona'] 
# champion_lineup_red = ['Kennen', 'Xin Zhao', 'Zoe', 'Aphelios', 'Rakan']

x = [np.concatenate([champion_label_encoder.transform(champion_lineup_blue), champion_label_encoder.transform(champion_lineup_red) ])]
x = scaler.transform(x)

y_pred = model_LR.predict_proba(x)
print(f"model predicted winner : {'blue' if model_LR.predict(x)[0] else 'red'}")
print(f"red wins: {y_pred[0][0] * 100:.1f}% | blue wins: {y_pred[0][1] * 100:.1f}%")

model predicted winner : blue
red wins: 47.8% | blue wins: 52.2%
