# Simple Expreiment: predict winner from champion lineup, gold & exp difference at 15 minute

In [84]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

## Dataset

Source: https://oracleselixir.com/tools/downloads

In [85]:
df = pd.read_csv('../data/2021_LoL_esports_match_data.csv', sep=',')
df_complete = df[df['datacompleteness'] == 'complete']
df_complete.reset_index(drop = True, inplace = True)
df = df_complete[df_complete['position'] == 'team']
df = df[df['side'] == 'Blue']
df.reset_index(drop = True, inplace = True)
#print(df)
print('# of matches: {}'.format(len(df)))

# of matches: 7268


## Dataset preprocessing

Add Winner for each match by merging two original columns:

In [86]:
df['winner'] = np.where(df['result']==1, 'blue', 'red')
df[['winner', 'result']].head()

Unnamed: 0,winner,result
0,blue,1
1,red,0
2,red,0
3,red,0
4,red,0



For this experiment, we reserve golddiff, expdiff, deathdiff at 15 min:

In [87]:
df[['golddiffat15', 'xpdiffat15']].head()
#df.head()
# df['deathdiffat15'] = df['deathsat15'] - df['opp_deathsat15']
# print(df[['deathdiffat15', 'deathsat15', 'opp_deathsat15']].head())

Unnamed: 0,golddiffat15,xpdiffat15
0,5018.0,4255.0
1,573.0,-1879.0
2,-579.0,-1643.0
3,951.0,-107.0
4,2145.0,-420.0


Get champion lineup stats:

In [88]:

champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]

for champion in champion_columns:
    df[champion] = np.NAN

# columns = champion_columns + ['golddiff', 'winner']
# df = df[columns]
champion_map_blue = {'top':'blueTopChamp', 'jng':'blueJungleChamp', 'mid':'blueMiddleChamp', 'bot':'blueADCChamp', 'sup':'blueSupportChamp'}
champion_map_red = {'top':'redTopChamp', 'jng':'redJungleChamp', 'mid':'redMiddleChamp', 'bot':'redADCChamp', 'sup':'redSupportChamp'}

for i in range(len(df)) :
    for j in range(5) :
        position = df_complete['position'][i * 12 + j]
        df[champion_map_blue[position]][i] = df_complete['champion'][i * 12 + j]
        position = df_complete['position'][i * 12 + 5 + j ]
        df[champion_map_red[position]][i] = df_complete['champion'][i * 12 + 5 + j]

columns =  champion_columns + ['winner']
df = df[columns]

df.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,winner
0,Mordekaiser,Graves,Zoe,Miss Fortune,Galio,Ornn,Kindred,Orianna,Ezreal,Leona,blue
1,Gragas,Graves,Rumble,Kai'Sa,Alistar,Ornn,Olaf,Syndra,Miss Fortune,Galio,red
2,Gragas,Graves,Zoe,Yasuo,Alistar,Rumble,Nidalee,Yone,Miss Fortune,Galio,red
3,Karma,Pantheon,Syndra,Samira,Leona,Aatrox,Nidalee,Viktor,Kai'Sa,Galio,red
4,Jax,Hecarim,Orianna,Samira,Alistar,Camille,Lillia,Viktor,Kai'Sa,Maokai,red


Encode labels:

In [89]:
champion_label_encoder = LabelEncoder()
champions = set()
for champ in champion_columns :
    champions |= set(pd.unique(df[champ]))
champions = np.array(list(champions))
print(champions)

champion_label_encoder.fit(champions)
for champ in champion_columns :
    df[champ] = champion_label_encoder.transform(df[champ])

label_encoder = LabelEncoder()
df['winner'] = label_encoder.fit_transform(df['winner'])

df.head()

['Yone' 'Swain' 'Kassadin' 'Brand' 'Seraphine' 'Rengar' 'Qiyana' 'Lux'
 'Miss Fortune' 'Quinn' 'Darius' 'Taliyah' 'Skarner' 'Amumu' "Kai'Sa"
 'Volibear' 'Jinx' 'Lucian' 'Kayle' 'Poppy' 'Gragas' 'Camille' 'Nidalee'
 'Karma' 'Corki' 'Bard' 'Sivir' 'Blitzcrank' 'Sett' 'Soraka' 'Gnar'
 'Urgot' 'Diana' 'Singed' 'Xerath' 'Orianna' 'Rumble' 'Garen' 'Jhin'
 'Nasus' 'Lissandra' 'Ziggs' 'Neeko' 'Heimerdinger' 'Twitch' 'Caitlyn'
 'Akshan' 'Anivia' 'Yuumi' 'Azir' 'Udyr' 'Tahm Kench' 'Nautilus' 'Rakan'
 'Fiora' 'Elise' 'Ahri' "Kog'Maw" 'Cassiopeia' 'Draven' 'Jayce' 'Thresh'
 'Samira' 'Viego' 'Kalista' 'Nunu & Willump' 'Yasuo' 'Syndra' 'Zyra'
 'Maokai' 'Shen' 'Morgana' 'Lulu' "Cho'Gath" 'Warwick' 'LeBlanc' "Vel'Koz"
 'Ashe' 'Trundle' 'Akali' 'Jarvan IV' 'Fizz' 'Ornn' 'Aurelion Sol'
 'Aatrox' 'Vayne' 'Braum' 'Alistar' 'Zac' 'Zed' 'Tristana' 'Zoe'
 'Nocturne' 'Shyvana' 'Annie' 'Yorick' 'Gangplank' 'Karthus' 'Kindred'
 'Malphite' "Kha'Zix" 'Pantheon' 'Vi' 'Illaoi' "Rek'Sai" 'Rell'
 'Tryndamere' 'Ryze' 

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,winner
0,75,37,153,74,32,86,60,85,28,65,0
1,36,37,99,50,4,86,84,116,74,32,1
2,36,37,153,145,4,99,81,146,74,32,1
3,52,87,116,101,65,0,81,137,50,32,1
4,46,39,85,101,4,17,66,137,50,73,1


Finally, we get our x and y data for training.

In [90]:
x = df.drop(['winner'], axis = 1)
y = df['winner']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

x_train.head(10)
# x_train = np.array(x_train).reshape(-1, 1)
# x_test = np.array(x_test).reshape(-1, 1)

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp
526,33,118,137,101,65,17,66,68,50,32
7060,145,64,67,48,92,37,144,63,24,4
3078,139,128,137,142,122,36,37,11,50,4
6596,0,38,68,51,122,96,118,100,130,79
5802,110,99,116,103,117,52,128,68,9,69
3732,33,39,18,142,92,35,66,145,50,95
5784,110,99,90,101,65,129,144,52,49,4
1647,110,37,100,50,4,96,81,11,101,95
4767,35,139,116,50,95,71,66,68,49,122
5645,35,128,137,130,92,68,139,11,123,65


## Train & Test

Train Logistic Regression model and test its accuracy.

In [101]:
# using SGDClassifier(loss='log') is equivalent to LogisticRegression which is fitted via Stochastic Gradient Descent, which is taught in lecture
from sklearn.naive_bayes import CategoricalNB

parameters = {'alpha':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100]}
#prior = {'fit_prior':[False for _ in parameters['alpha']]}
model = CategoricalNB()
model_LR_grid = GridSearchCV(model, param_grid = parameters)
model_LR_grid.fit(x_train, y_train)

model_LR = model_LR_grid.best_estimator_
print(model_LR)
print(model_LR.coef_)

y_pred = model_LR.predict(x_test)

print(classification_report(y_test, y_pred))

ImportError: cannot import name 'CategoricalNB' from 'sklearn.naive_bayes' (C:\ProgramData\Anaconda3\lib\site-packages\sklearn\naive_bayes.py)

Test/predict single one match:

In [92]:
# EDG vs DK, match 3, DK(red) wins
golddiffat15 = 922
xpdiffat15 = -219
champion_lineup_blue = ['Jayce', 'Xin Zhao', 'Twisted Fate', 'Jhin', 'Leona'] 
champion_lineup_red = ['Gragas', 'Lee Sin', 'Sylas', 'Aphelios', 'Braum']

# EDG vs DK, match 4, EDG(blue) wins
# golddiffat15 = 2057
# xpdiffat15 = 1563
# champion_lineup_blue = ['Graves', 'Viego', 'Zoe', 'Lucian', 'Lulu'] 
# champion_lineup_red = ['Gwen', 'Talon', 'Orianna', 'Jhin', 'Nami'] 

# DK vs EDG, match 5, EDG(red) wins
# golddiffat15 = -795
# xpdiffat15 = -1087
# champion_lineup_blue = ['Graves', 'Trundle', 'Syndra', 'Ziggs', 'Leona'] 
# champion_lineup_red = ['Kennen', 'Xin Zhao', 'Zoe', 'Aphelios', 'Rakan']

# champion_lineup_blue = ['Gwen', 'Lee Sin','LeBlanc', 'Aphelios', 'Lulu']
# champion_lineup_red = ['Graves', 'Jarvan IV', 'Twisted Fate','Lucian','Nami']

# champion_lineup_blue = ['Gnar', 'Rengar', 'Ahri', 'Caitlyn', 'Leona']
# champion_lineup_red = ['Irelia', 'Jarvan IV', 'Azir', 'Corki', 'Annie']

# champion_lineup_blue = ['Jayce', 'Lee Sin', 'Twisted Fate', 'Miss Fortune', 'Leona']
# champion_lineup_red = ['Kennen', 'Jarvan IV', 'Azir', 'Vayne', 'Lulu']

x = [np.concatenate([champion_label_encoder.transform(champion_lineup_blue), champion_label_encoder.transform(champion_lineup_red) , [golddiffat15, xpdiffat15, 0]])]
x = scaler.transform(x)[:, :-1]
y_pred = model_LR.predict_proba(x)

print(model_LR.predict(x))
print("blue wins: {:.1f}% red wins: {:.1f}%".format(y_pred[0][0] * 100, y_pred[0][1] * 100))

ValueError: operands could not be broadcast together with shapes (1,13) (11,) (1,13) 