# Simple Expreiment: predict winner from champion lineup & gold difference at 10 minute

In [391]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

## Dataset

Source: https://www.kaggle.com/chuckephron/leagueoflegends

In [392]:
df = pd.read_csv('../data/LeagueofLegends.csv', sep=',')
df.head()

Unnamed: 0,League,Year,Season,Type,blueTeamTag,bResult,rResult,redTeamTag,gamelength,golddiff,...,redMiddleChamp,goldredMiddle,redADC,redADCChamp,goldredADC,redSupport,redSupportChamp,goldredSupport,redBans,Address
0,NALCS,2015,Spring,Season,TSM,1,0,C9,40,"[0, 0, -14, -65, -268, -431, -488, -789, -494,...",...,Fizz,"[475, 475, 552, 842, 1178, 1378, 1635, 1949, 2...",Sneaky,Sivir,"[475, 475, 532, 762, 1097, 1469, 1726, 2112, 2...",LemonNation,Thresh,"[515, 515, 577, 722, 911, 1042, 1194, 1370, 14...","['Tristana', 'Leblanc', 'Nidalee']",http://matchhistory.na.leagueoflegends.com/en/...
1,NALCS,2015,Spring,Season,CST,0,1,DIG,38,"[0, 0, -26, -18, 147, 237, -152, 18, 88, -242,...",...,Azir,"[475, 475, 552, 786, 1097, 1389, 1660, 1955, 2...",CoreJJ,Corki,"[475, 475, 532, 868, 1220, 1445, 1732, 1979, 2...",KiWiKiD,Annie,"[515, 515, 583, 752, 900, 1066, 1236, 1417, 15...","['RekSai', 'Janna', 'Leblanc']",http://matchhistory.na.leagueoflegends.com/en/...
2,NALCS,2015,Spring,Season,WFX,1,0,GV,40,"[0, 0, 10, -60, 34, 37, 589, 1064, 1258, 913, ...",...,Azir,"[475, 475, 533, 801, 1006, 1233, 1385, 1720, 1...",Cop,Corki,"[475, 475, 533, 781, 1085, 1398, 1782, 1957, 2...",BunnyFuFuu,Janna,"[515, 515, 584, 721, 858, 1002, 1168, 1303, 14...","['Leblanc', 'Zed', 'RekSai']",http://matchhistory.na.leagueoflegends.com/en/...
3,NALCS,2015,Spring,Season,TIP,0,1,TL,41,"[0, 0, -15, 25, 228, -6, -243, 175, -346, 16, ...",...,Lulu,"[475, 475, 532, 771, 1046, 1288, 1534, 1776, 2...",KEITH,KogMaw,"[475, 475, 532, 766, 1161, 1438, 1776, 1936, 2...",Xpecial,Janna,"[515, 515, 583, 721, 870, 1059, 1205, 1342, 15...","['RekSai', 'Rumble', 'LeeSin']",http://matchhistory.na.leagueoflegends.com/en/...
4,NALCS,2015,Spring,Season,CLG,1,0,T8,35,"[40, 40, 44, -36, 113, 158, -121, -191, 23, 20...",...,Lulu,"[475, 475, 532, 807, 1042, 1338, 1646, 1951, 2...",Maplestreet8,Corki,"[475, 475, 532, 792, 1187, 1488, 1832, 2136, 2...",Dodo8,Annie,"[475, 475, 538, 671, 817, 948, 1104, 1240, 136...","['Rumble', 'Sivir', 'Rengar']",http://matchhistory.na.leagueoflegends.com/en/...


## Dataset preprocessing

Add Winner for each match by merging two original columns:

In [393]:
df['winner'] = np.where(df['bResult']==1, 'blue', 'red')
df.drop(['bResult', 'rResult'], axis = 1, inplace = True)

df['winner'].head()

0    blue
1     red
2    blue
3     red
4    blue
Name: winner, dtype: object

Convert and get list-like gold difference:

In [394]:
from ast import literal_eval
df['golddiff'] = df['golddiff'].apply(literal_eval)
df['golddiff'].head()

0    [0, 0, -14, -65, -268, -431, -488, -789, -494,...
1    [0, 0, -26, -18, 147, 237, -152, 18, 88, -242,...
2    [0, 0, 10, -60, 34, 37, 589, 1064, 1258, 913, ...
3    [0, 0, -15, 25, 228, -6, -243, 175, -346, 16, ...
4    [40, 40, 44, -36, 113, 158, -121, -191, 23, 20...
Name: golddiff, dtype: object


For this experiment, we only reserve golddiff at 10 min:

In [395]:
df['golddiff'] = df['golddiff'].apply(lambda x : x[9])
df['golddiff'].head()

0   -625
1   -242
2    913
3     16
4    205
Name: golddiff, dtype: int64

Get champion lineup stats:

In [396]:
df.columns

champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]
columns = champion_columns + ['golddiff', 'winner']
df = df[columns]

df.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiff,winner
0,Irelia,RekSai,Ahri,Jinx,Janna,Gnar,Elise,Fizz,Sivir,Thresh,-625,blue
1,Gnar,Rengar,Ahri,Caitlyn,Leona,Irelia,JarvanIV,Azir,Corki,Annie,-242,red
2,Renekton,Rengar,Fizz,Sivir,Annie,Sion,LeeSin,Azir,Corki,Janna,913,blue
3,Irelia,JarvanIV,Leblanc,Sivir,Thresh,Gnar,Nunu,Lulu,KogMaw,Janna,16,red
4,Gnar,JarvanIV,Lissandra,Tristana,Janna,Sion,RekSai,Lulu,Corki,Annie,205,blue


Encode labels:

In [397]:
label_encoder = LabelEncoder()
for champ in champion_columns :
    df[champ] = label_encoder.fit_transform(df[champ])
df['winner'] = label_encoder.fit_transform(df['winner'])

df.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiff,winner
0,21,41,0,12,18,16,7,15,22,45,-625,0
1,16,42,0,3,24,22,18,4,2,1,-242,1
2,53,42,15,21,2,64,25,4,2,14,913,0
3,21,20,33,21,48,16,35,36,15,14,16,1
4,16,20,35,22,18,64,42,36,2,1,205,0


Normalize Data:

In [398]:
scaler = StandardScaler()
scaler.fit(df)
df = scaler.transform(df)
df = pd.DataFrame(df, columns = columns)

# for champ in champion_columns :
#     df[champ] = label_encoder.fit_transform(df[champ])
df['winner'] = label_encoder.fit_transform(df['winner'])

df.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,golddiff,winner
0,21,41,0,12,18,16,7,15,22,45,-0.622309,0
1,16,42,0,3,24,22,18,4,2,1,-0.278625,1
2,53,42,15,21,2,64,25,4,2,14,0.757809,0
3,21,20,33,21,48,16,35,36,15,14,-0.04711,1
4,16,20,35,22,18,64,42,36,2,1,0.122488,0


Finally, we get our x and y data for training.

In [399]:
x = df.drop(['winner'], axis = 1)
y = df['winner']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1)

# x_train = np.array(x_train).reshape(-1, 1)
# x_test = np.array(x_test).reshape(-1, 1)

## Train & Test

Train Logistic Regression model and test its accuracy.

In [400]:
model_LR = LogisticRegression()
model_LR.fit(x_train, y_train)
y_pred = model_LR.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.67      0.73      0.70       413
           1       0.65      0.58      0.61       349

    accuracy                           0.66       762
   macro avg       0.66      0.66      0.66       762
weighted avg       0.66      0.66      0.66       762

