# Predict Winner from Champion Lineup using Naive Bayes

In [279]:
# import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

## Dataset

Source: https://oracleselixir.com/tools/downloads

In [280]:
df = pd.read_csv('../data/2021_LoL_esports_match_data.csv', sep=',')
df_complete = df[df['datacompleteness'] == 'complete']
df_complete.reset_index(drop = True, inplace = True)
df = df_complete[df_complete['position'] == 'team']
df = df[df['side'] == 'Blue']
df.reset_index(drop = True, inplace = True)
print(f'# of matches: {len(df)}')

# of matches: 7296


## Dataset preprocessing

Add winner column for each match by merging two original columns:

In [281]:
df['winner'] = np.where(df['result']==1, 'blue', 'red')
df[['winner', 'result']].head()

Unnamed: 0,winner,result
0,blue,1
1,red,0
2,red,0
3,red,0
4,red,0


Get champion lineup stats:

In [282]:
champion_columns = [
       'blueTopChamp', 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp',
       'redTopChamp', 'redJungleChamp', 'redMiddleChamp', 'redADCChamp','redSupportChamp'
]

for champion in champion_columns:
    df[champion] = np.NAN

champion_map_blue = {'top':'blueTopChamp', 'jng':'blueJungleChamp', 'mid':'blueMiddleChamp', 'bot':'blueADCChamp', 'sup':'blueSupportChamp'}
champion_map_red = {'top':'redTopChamp', 'jng':'redJungleChamp', 'mid':'redMiddleChamp', 'bot':'redADCChamp', 'sup':'redSupportChamp'}

for i in range(len(df)) :
    for j in range(5) :
        position = df_complete['position'][i * 12 + j]
        df[champion_map_blue[position]][i] = df_complete['champion'][i * 12 + j]
        position = df_complete['position'][i * 12 + 5 + j ]
        df[champion_map_red[position]][i] = df_complete['champion'][i * 12 + 5 + j]

columns =  champion_columns + ['winner']
df = df[columns]

df.head()

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,winner
0,Mordekaiser,Graves,Zoe,Miss Fortune,Galio,Ornn,Kindred,Orianna,Ezreal,Leona,blue
1,Gragas,Graves,Rumble,Kai'Sa,Alistar,Ornn,Olaf,Syndra,Miss Fortune,Galio,red
2,Gragas,Graves,Zoe,Yasuo,Alistar,Rumble,Nidalee,Yone,Miss Fortune,Galio,red
3,Karma,Pantheon,Syndra,Samira,Leona,Aatrox,Nidalee,Viktor,Kai'Sa,Galio,red
4,Jax,Hecarim,Orianna,Samira,Alistar,Camille,Lillia,Viktor,Kai'Sa,Maokai,red


Encode champion labels:

In [283]:
champion_label_encoder = LabelEncoder()
champions = set()
for champ in champion_columns :
    champions |= set(pd.unique(df[champ]))
champions = np.array(list(champions))
print(f'{len(champions)} champions: {champions}')

champion_label_encoder.fit(champions)
for champ in champion_columns :
    df[champ] = champion_label_encoder.transform(df[champ])

df.head()

155 champions: ['Viktor' 'Amumu' 'Garen' 'Aatrox' 'Swain' 'Caitlyn' 'Jinx' 'Gwen' 'Sylas'
 'Malzahar' 'Teemo' 'Fiddlesticks' 'Corki' 'Twisted Fate' 'Gnar' 'Sion'
 'Neeko' 'Zilean' 'Samira' 'Lissandra' 'Shen' 'Bard' 'Rumble' 'Urgot'
 'Yuumi' 'Seraphine' 'Ivern' 'Veigar' 'Miss Fortune' 'Sett' 'Karthus'
 "Vel'Koz" 'Braum' 'Aphelios' 'Ornn' 'Lulu' 'Fiora' 'Irelia' 'Draven'
 'Lee Sin' 'Ekko' 'Fizz' "Kog'Maw" 'Zed' 'Kassadin' 'Rengar' 'Maokai'
 'Sivir' 'Evelynn' 'Singed' "Kha'Zix" 'Qiyana' 'Annie' 'Nocturne' 'Brand'
 'Renekton' 'Xayah' 'Rakan' 'Ahri' 'Quinn' 'Shaco' 'Vayne' 'Olaf' 'Ryze'
 'Shyvana' 'Katarina' 'Vladimir' 'Kayle' "Kai'Sa" 'Dr. Mundo'
 'Aurelion Sol' 'Zoe' 'Senna' 'Blitzcrank' 'Rell' 'Warwick' 'Pantheon'
 'Rammus' 'Nami' 'Thresh' 'Mordekaiser' 'Kled' 'Nunu & Willump' 'Talon'
 'Graves' 'Tristana' 'Varus' 'Gragas' 'Malphite' 'Lillia' 'Leona' 'Lux'
 'Vex' 'Xin Zhao' 'Akali' 'Darius' "Cho'Gath" 'Sejuani' 'Cassiopeia'
 'Skarner' 'Taric' 'Anivia' 'Kennen' 'Elise' 'Riven' 'Yasuo'
 'He

Unnamed: 0,blueTopChamp,blueJungleChamp,blueMiddleChamp,blueADCChamp,blueSupportChamp,redTopChamp,redJungleChamp,redMiddleChamp,redADCChamp,redSupportChamp,winner
0,75,37,153,74,32,86,60,85,28,65,blue
1,36,37,99,50,4,86,84,116,74,32,red
2,36,37,153,145,4,99,81,146,74,32,red
3,52,87,116,101,65,0,81,137,50,32,red
4,46,39,85,101,4,17,66,137,50,73,red


Finally, we get our data for training.

In [284]:
x = df.drop(['winner'], axis = 1)
y = df['winner']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

x_train.shape, x_test.shape

((5836, 10), (1460, 10))

## Train & Test

Train Naive Bayes model (with Laplace smoothing $k=3$ ) and test its accuracy.

### Sklearn toolkit Naive Bayes

Using `sklearn.naive_bayes.CategoricalNB` is equivalent to Naive Bayes taught in CS181 lecture.

In [285]:
from sklearn.naive_bayes import CategoricalNB

parameters = {'alpha': list(range(10))}
model = CategoricalNB(min_categories = len(champions))
model_NB_grid = GridSearchCV(model, param_grid = parameters)
model_NB_grid.fit(x_train, y_train)

model_NB = model_NB_grid.best_estimator_
print(model_NB)
print(f'Shape of NB parameters: {np.asarray(model_NB.coef_).shape}')

y_pred = model_NB.predict(x_test)

print(classification_report(y_test, y_pred))

CategoricalNB(alpha=3, min_categories=155)
Shape of NB parameters: (9, 2, 155)
              precision    recall  f1-score   support

        blue       0.57      0.70      0.62       804
         red       0.48      0.34      0.40       656

    accuracy                           0.54      1460
   macro avg       0.52      0.52      0.51      1460
weighted avg       0.53      0.54      0.52      1460



### Self-implementation Naive Bayes

In [286]:
class NaiveBayes():
	def __init__(self, x_train, y_train, k = 3):
		self.x_train = x_train
		self.y_train = y_train
		self.k = k
		self.counter = dict()
		self.sumWin = 0
		self.sumLoss = 0
		for pos in x_train:
			tempDict = dict()
			for i in range(len(x_train)):
				pick = x_train[pos][x_train.index[i]]
				if pick in tempDict:
					tempDict[pick][0] += 1
				else:
					tempDict[pick] = [1, 0]
				if y_train[y_train.index[i]] == "blue":
					tempDict[pick][1] += 1
					self.sumWin += 1
				else:
					self.sumLoss += 1
			self.counter[pos] = tempDict
	
	def predWin(self, x_test):
		ans = []
		for i in range(len(x_test)):
			probWin = 0.5
			probLoss = 0.5
			for pos in x_test:
				pick = x_test[pos][x_test.index[i]]
				if pick in self.counter[pos]:
					probWin *= (self.counter[pos][pick][1] + self.k) / (self.sumWin + self.k * len(self.counter[pos]))
					probLoss *= (self.counter[pos][pick][0] - self.counter[pos][pick][1] + self.k) / (self.sumLoss + self.k * len(self.counter[pos]))
			ans.append(probWin / (probWin + probLoss))
		return ans

	def test(self, x_test, y_test):
		predProb = self.predWin(x_test)
		corret = 0
		for i in range(len(x_test)):
			y_pred = "blue" if predProb[i]> 0.5 else "red"
			if y_pred == y_test[y_test.index[i]]:
				corret += 1
		return corret / len(y_test)
	
model = NaiveBayes(x_train, y_train, k = 3)
pred = model.predWin(x_test)
print(f'accuracy = {model.test(x_test, y_test) * 100 :.3f}%')

accuracy = 53.973%


Test/predict single one match:

In [287]:
''' S11 EDG vs DK, match 3, DK(red) wins '''
# champion_lineup_blue = ['Jayce', 'Xin Zhao', 'Twisted Fate', 'Jhin', 'Leona'] 
# champion_lineup_red = ['Gragas', 'Lee Sin', 'Sylas', 'Aphelios', 'Braum']

''' S11 EDG vs DK, match 4, EDG(blue) wins '''
# champion_lineup_blue = ['Graves', 'Viego', 'Zoe', 'Lucian', 'Lulu'] 
# champion_lineup_red = ['Gwen', 'Talon', 'Orianna', 'Jhin', 'Nami']

''' S11 DK vs EDG, match 5, EDG(red) wins ''' 
champion_lineup_blue = ['Graves', 'Trundle', 'Syndra', 'Ziggs', 'Leona'] 
champion_lineup_red = ['Kennen', 'Xin Zhao', 'Zoe', 'Aphelios', 'Rakan']

# champion_lineup_blue = ['Gwen', 'Lee Sin','LeBlanc', 'Aphelios', 'Lulu']
# champion_lineup_red = ['Graves', 'Jarvan IV', 'Twisted Fate','Lucian','Nami']

# champion_lineup_blue = ['Gnar', 'Rengar', 'Ahri', 'Caitlyn', 'Leona']
# champion_lineup_red = ['Irelia', 'Jarvan IV', 'Azir', 'Corki', 'Annie']

# champion_lineup_blue = ['Jayce', 'Lee Sin', 'Twisted Fate', 'Miss Fortune', 'Leona']
# champion_lineup_red = ['Kennen', 'Jarvan IV', 'Azir', 'Vayne', 'Lulu']

x = [np.concatenate([champion_label_encoder.transform(champion_lineup_blue), champion_label_encoder.transform(champion_lineup_red)])]
y_pred =model_NB.predict_proba(x)

print(f"model predicted winner : {model_NB.predict(x)[0]}")
print(f"blue wins: {y_pred[0][0] * 100:.1f}% | red wins: {y_pred[0][1] * 100:.1f}%")

model predicted winner : red
blue wins: 45.9% | red wins: 54.1%
