# Simple Expreiment: predict winner from champion lineup, gold & exp difference at 15 minute

In [142]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

## Dataset

Source: https://oracleselixir.com/tools/downloads

In [94]:
df = pd.read_csv('../data/2021_LoL_esports_match_data.csv', sep=',')
df_complete = df[df['datacompleteness'] == 'complete']
df_complete.reset_index(drop = True, inplace = True)
df = df_complete[df_complete['position'] == 'team']
df = df[df['side'] == 'Blue']
df.reset_index(drop = True, inplace = True)

print('# of matches: {}'.format(len(df)))

# of matches: 7296


## Dataset preprocessing

Add Winner for each match by merging two original columns:

In [95]:
df['winner'] = np.where(df['result']==1, 'blue', 'red')
df[['winner', 'result']].head()

Unnamed: 0,winner,result
0,blue,1
1,red,0
2,red,0
3,red,0
4,red,0



For this experiment, we reserve golddiff, expdiff, deathdiff at 15 min:

In [96]:
df[['golddiffat15', 'xpdiffat15']].head()
# df['deathdiffat15'] = df['deathsat15'] - df['opp_deathsat15']
# print(df[['deathdiffat15', 'deathsat15', 'opp_deathsat15']].head())

Unnamed: 0,golddiffat15,xpdiffat15
0,5018.0,4255.0
1,573.0,-1879.0
2,-579.0,-1643.0
3,951.0,-107.0
4,2145.0,-420.0


Get champion lineup stats:

In [97]:
champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]
df[champion_columns] = np.NAN
# columns = champion_columns + ['golddiff', 'winner']
# df = df[columns]
champion_map_blue = {'top':'blueTopChamp', 'jng':'blueJungleChamp', 'mid':'blueMiddleChamp', 'bot':'blueADCChamp', 'sup':'blueSupportChamp'}
champion_map_red = {'top':'redTopChamp', 'jng':'redJungleChamp', 'mid':'redMiddleChamp', 'bot':'redADCChamp', 'sup':'redSupportChamp'}

for i in range(len(df)) :
    for j in range(5) :
        position = df_complete['position'][i * 12 + j]
        df[champion_map_blue[position]][i] = df_complete['champion'][i * 12 + j]
        position = df_complete['position'][i * 12 + 5 + j ]
        df[champion_map_red[position]][i] = df_complete['champion'][i * 12 + 5 + j]

columns = champion_columns + ['golddiffat15', 'xpdiffat15', 'winner']
df = df[columns]

df.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiffat15,xpdiffat15,winner
0,Mordekaiser,Graves,Zoe,Miss Fortune,Galio,Ornn,Kindred,Orianna,Ezreal,Leona,5018.0,4255.0,blue
1,Gragas,Graves,Rumble,Kai'Sa,Alistar,Ornn,Olaf,Syndra,Miss Fortune,Galio,573.0,-1879.0,red
2,Gragas,Graves,Zoe,Yasuo,Alistar,Rumble,Nidalee,Yone,Miss Fortune,Galio,-579.0,-1643.0,red
3,Karma,Pantheon,Syndra,Samira,Leona,Aatrox,Nidalee,Viktor,Kai'Sa,Galio,951.0,-107.0,red
4,Jax,Hecarim,Orianna,Samira,Alistar,Camille,Lillia,Viktor,Kai'Sa,Maokai,2145.0,-420.0,red


Encode labels:

In [98]:
champion_label_encoder = LabelEncoder()
champions = set()
for champ in champion_columns :
    champions |= set(pd.unique(df[champ]))
champions = np.array(list(champions))
print(champions)

champion_label_encoder.fit(champions)
for champ in champion_columns :
    df[champ] = champion_label_encoder.transform(df[champ])

label_encoder = LabelEncoder()
df['winner'] = label_encoder.fit_transform(df['winner'])

df.head()

['Zyra' 'Shaco' 'Evelynn' 'Nami' 'Sett' 'Garen' 'Samira' 'Qiyana'
 'Skarner' 'Talon' 'Braum' 'Viego' 'Swain' 'Nasus' 'Renekton' 'Quinn'
 'Malphite' 'Shyvana' 'Nunu & Willump' 'Katarina' 'Ornn' 'Vi' 'Jax' 'Zoe'
 'Sivir' 'Aatrox' 'Amumu' 'Bard' 'Azir' 'Elise' 'Viktor' 'Karma' 'Jinx'
 'Yorick' 'Nidalee' 'Aphelios' 'Draven' 'Volibear' "Kai'Sa" 'Rell' 'Galio'
 'Udyr' 'Yone' 'Nocturne' 'Zilean' 'Neeko' 'Fizz' 'Sylas' 'Tryndamere'
 "Cho'Gath" 'Syndra' "Rek'Sai" "Kog'Maw" 'Caitlyn' 'Pantheon' 'Maokai'
 'Jarvan IV' 'Lee Sin' 'Rumble' 'Senna' 'Xayah' 'Gwen' 'Tristana' 'Singed'
 'Kalista' 'Illaoi' 'Morgana' 'Shen' 'Mordekaiser' 'Gragas' 'Vayne'
 'Akali' 'Graves' 'Taliyah' 'Jayce' 'Kayle' 'Vex' 'Gangplank' 'Akshan'
 'Cassiopeia' 'Ziggs' 'Jhin' 'Fiora' 'Karthus' 'Heimerdinger' 'Corki'
 'Blitzcrank' 'Vladimir' "Vel'Koz" 'Leona' 'Poppy' 'Warwick' 'Lulu'
 'Annie' 'Janna' 'Varus' 'Nautilus' 'Urgot' 'Taric' 'Kassadin' 'Ahri'
 'Yasuo' 'Kennen' 'Aurelion Sol' 'LeBlanc' 'Hecarim' 'Ashe' 'Yuumi'
 "Kha'Zix" 

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiffat15,xpdiffat15,winner
0,75,37,153,74,32,86,60,85,28,65,5018.0,4255.0,0
1,36,37,99,50,4,86,84,116,74,32,573.0,-1879.0,1
2,36,37,153,145,4,99,81,146,74,32,-579.0,-1643.0,1
3,52,87,116,101,65,0,81,137,50,32,951.0,-107.0,1
4,46,39,85,101,4,17,66,137,50,73,2145.0,-420.0,1


Normalize Data:

In [99]:
scaler = StandardScaler()
scaler.fit(df)
df = scaler.transform(df)
df = pd.DataFrame(df, columns = columns)

# for champ in champion_columns :
#     df[champ] = label_encoder.fit_transform(df[champ])
df['winner'] = label_encoder.fit_transform(df['winner'])

df.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiffat15,xpdiffat15,winner
0,0.356081,-1.246656,1.447364,0.00794,-0.912481,0.650655,-0.566314,-0.060945,-1.022736,-0.054882,1.496092,1.875853,0
1,-0.61963,-1.246656,0.229419,-0.54377,-1.595501,0.650655,0.099456,0.619456,0.019262,-0.870246,0.089944,-0.846535,1
2,-0.61963,-1.246656,1.447364,1.640082,-1.595501,0.972483,0.016235,1.277908,0.019262,-0.870246,-0.274484,-0.741793,1
3,-0.219338,0.128378,0.612846,0.628614,-0.107494,-1.478358,0.016235,1.080372,-0.524389,-0.870246,0.209522,-0.060087,1
4,-0.369447,-1.191654,-0.086345,0.628614,-1.595501,-1.057507,-0.399871,1.080372,-0.524389,0.142782,0.587236,-0.199002,1


Finally, we get our x and y data for training.

In [140]:
x = df.drop(['winner'], axis = 1)
y = df['winner']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

x_train.head(10)
# x_train = np.array(x_train).reshape(-1, 1)
# x_test = np.array(x_test).reshape(-1, 1)

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiffat15,xpdiffat15
1262,-0.644648,-0.449136,1.447364,-1.509262,1.282939,-1.057507,0.376861,1.080372,0.019262,-0.870246,-1.208015,-1.442584
4554,-0.644648,-1.191654,-1.281736,1.134348,-1.327172,-1.007995,1.320035,-0.060945,-0.524389,0.29103,0.880487,0.361098
1520,-1.520285,-1.246656,0.838392,-1.486275,1.282939,-0.958483,1.320035,-1.685127,0.676173,0.933439,-0.369071,0.01048
2455,0.631281,-0.009125,-1.755381,-0.566758,1.282939,-1.478358,-0.399871,0.773095,0.676173,1.229935,-0.654729,-0.846091
1467,1.957247,-0.449136,0.590292,0.00794,-0.107494,0.898215,1.042631,-1.531488,-0.569693,0.908731,0.101649,-0.185244
6934,0.130917,-1.246656,1.289483,0.00794,-1.571107,0.279316,-0.455352,0.597507,-1.022736,-0.054882,0.039962,-0.113345
5476,0.080881,-0.174129,-1.980927,0.674589,0.868248,0.700167,0.043976,0.619456,-0.547041,0.04395,-1.40225,-1.93478
5835,1.231719,0.458386,-0.55999,-0.566758,-1.327172,-0.611899,0.043976,-1.882662,1.129216,-0.870246,-1.699929,-1.507382
4494,2.007284,-0.641641,-0.086345,1.134348,-1.595501,0.279316,-1.148862,-1.685127,-0.524389,-0.870246,-0.78443,-1.332517
489,-1.094976,0.980899,-1.281736,-0.589746,-1.595501,-0.339584,0.016235,0.597507,1.559606,1.353475,0.435391,-0.259805


## Train & Test

Train Logistic Regression model and test its accuracy.

In [141]:
# using SGDClassifier(loss='log') is equivalent to LogisticRegression which is fitted via Stochastic Gradient Descent, which is taught in lecture

parameters = {'alpha':[0.001, 0.005, 0.01, 0.05, 0.1, 0.5,  1, 2, 4, 8, 16]}
model = SGDClassifier(loss = 'log', shuffle = False)
model_LR_grid = GridSearchCV(model, param_grid = parameters)
model_LR_grid.fit(x_train, y_train)

model_LR = model_LR_grid.best_estimator_
print(model_LR)
print(model_LR.coef_)

y_pred = model_LR.predict(x_test)

print(classification_report(y_test, y_pred))

SGDClassifier(alpha=0.05, loss='log', shuffle=False)
[[ 0.04193924  0.04148529 -0.0211742   0.01444786 -0.00790207 -0.03868626
  -0.02817047 -0.05437778 -0.02189349 -0.01149658 -0.85542176 -0.57836662]]
              precision    recall  f1-score   support

           0       0.78      0.77      0.78       804
           1       0.72      0.73      0.73       656

    accuracy                           0.75      1460
   macro avg       0.75      0.75      0.75      1460
weighted avg       0.75      0.75      0.75      1460



Test/predict single one match:

In [104]:
# EDG vs DK, match 3, DK(red) wins
golddiffat15 = 922
xpdiffat15 = -219
champion_lineup_blue = ['Jayce', 'Xin Zhao', 'Twisted Fate', 'Jhin', 'Leona'] 
champion_lineup_red = ['Gragas', 'Lee Sin', 'Sylas', 'Aphelios', 'Braum']

# EDG vs DK, match 4, EDG(blue) wins
# golddiffat15 = 2057
# xpdiffat15 = 1563
# champion_lineup_blue = ['Graves', 'Viego', 'Zoe', 'Lucian', 'Lulu'] 
# champion_lineup_red = ['Gwen', 'Talon', 'Orianna', 'Jhin', 'Nami'] 

# DK vs EDG, match 5, EDG(red) wins
# golddiffat15 = -795
# xpdiffat15 = -1087
# champion_lineup_blue = ['Graves', 'Trundle', 'Syndra', 'Ziggs', 'Leona'] 
# champion_lineup_red = ['Kennen', 'Xin Zhao', 'Zoe', 'Aphelios', 'Rakan']

# champion_lineup_blue = ['Gwen', 'Lee Sin','LeBlanc', 'Aphelios', 'Lulu']
# champion_lineup_red = ['Graves', 'Jarvan IV', 'Twisted Fate','Lucian','Nami']

# champion_lineup_blue = ['Gnar', 'Rengar', 'Ahri', 'Caitlyn', 'Leona']
# champion_lineup_red = ['Irelia', 'Jarvan IV', 'Azir', 'Corki', 'Annie']

# champion_lineup_blue = ['Jayce', 'Lee Sin', 'Twisted Fate', 'Miss Fortune', 'Leona']
# champion_lineup_red = ['Kennen', 'Jarvan IV', 'Azir', 'Vayne', 'Lulu']

x = [np.concatenate([champion_label_encoder.transform(champion_lineup_blue), champion_label_encoder.transform(champion_lineup_red) , [golddiffat15, xpdiffat15, 0]])]
x = scaler.transform(x)[:, :-1]

y_pred = model_LR.predict_proba(x)

print(model_LR.predict(x))
print("blue wins: {:.1f}% red wins: {:.1f}%".format(y_pred[0][0] * 100, y_pred[0][1] * 100))

  (0, 0)	0.018345807780519603
  (0, 1)	0.03358477291328404
  (0, 2)	-0.02088309338811777
  (0, 3)	0.011780974820784745
  (0, 4)	-0.012977328467729948
  (0, 5)	-0.03525768145209855
  (0, 6)	-0.026549202359458957
  (0, 7)	-0.04669256481417951
  (0, 8)	-0.0232982174724223
  (0, 9)	-0.005152774496172212
  (0, 10)	-0.8613291051534129
  (0, 11)	-0.5699417491846434
[0]
blue wins: 55.5% red wins: 44.5%
