# League of Legends Analysis
# Vy Nguyen and Daniel Strub
# CPSC 322

In [2]:
import importlib
import os
import copy
import random
from tabulate import tabulate

import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils

import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 

import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyKNeighborsClassifier, MySimpleLinearRegressor, MyNaiveBayesClassifier, MyDecisionTreeClassifier, MyRandomForestClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
import mysklearn.myevaluation as myevaluation

In [3]:
game_fname = os.path.join("input_data", "games_small.csv")
game_table = MyPyTable()
game_table.load_from_file(game_fname)

winner = game_table.get_column("winner")
first_Blood = game_table.get_column("firstBlood")
first_Tower = game_table.get_column("firstTower")
first_Inhibitor = game_table.get_column("firstInhibitor")
first_Baron = game_table.get_column("firstBaron")
first_Dragon = game_table.get_column("firstDragon")
first_RiftHerald = game_table.get_column("firstRiftHerald")

game_X = [[first_Blood[x], first_Tower[x], first_Inhibitor[x], first_Baron[x], first_Dragon[x], first_RiftHerald[x]] for x in range(len(winner))]
game_Y = [y for y in winner]


In [4]:
print(str(round(myutils.get_win_count(game_table, "winner", "firstBlood") * 100, 2)) + "% of games with first Blood are won")

print(str(round(myutils.get_win_count(game_table, "winner", "firstTower") * 100, 2)) + "% of games with first Tower are won")

print(str(round(myutils.get_win_count(game_table, "winner", "firstInhibitor") * 100, 2)) + "% of games with first Inhibitor are won")

print(str(round(myutils.get_win_count(game_table, "winner", "firstBaron") * 100, 2)) + "% of games with first Baron are won")

print(str(round(myutils.get_win_count(game_table, "winner", "firstDragon") * 100, 2)) + "% of games with first Dragon are won")

print(str(round(myutils.get_win_count(game_table, "winner", "firstRiftHerald") * 100, 2)) + "% of games with first Rift Herald are won")

58.56% of games with first Blood are won
68.92% of games with first Tower are won
79.83% of games with first Inhibitor are won
49.1% of games with first Baron are won
65.6% of games with first Dragon are won
34.0% of games with first Rift Herald are won


In [5]:
# KNNeightbors
test_size = 2000
X_train, X_test, y_train, y_test = myevaluation.train_test_split(copy.deepcopy(game_X), copy.deepcopy(game_Y), test_size=test_size, shuffle=True)

knn = MyKNeighborsClassifier()
knn.fit(X_train, y_train)
predictions = []
for i, x in enumerate(X_test):
    prediction = knn.predict([x])
    predictions.append(prediction[0])

acc = round(sum([int(x==y) for x,y in zip(predictions, y_test)])/len(predictions), 2)
print("KNN:" )
print("Accuracy = " + str(acc))
print("Error rate = " + str(1-acc))

KNN:
Accuracy = 0.48
Error rate = 0.52


In [6]:
headers = ["Team", "1", "2", "total", "recognition (%)"]
mat = myevaluation.confusion_matrix(y_test, predictions, [1.0, 2.0])
myutils.build_confusion_matrix(mat)
print("KNN Classifier (Train Test Split)")
print(tabulate(mat, headers))

KNN Classifier (Train Test Split)
  Team    1     2    total    recognition (%)
------  ---  ----  -------  -----------------
     1    0  1033     1033                  0
     2    0   967      967                100


In [7]:
test_size = 2000

X_train, X_test, y_train, y_test = myevaluation.train_test_split(copy.deepcopy(game_X), copy.deepcopy(game_Y), test_size=test_size, shuffle=True)

nb = MyNaiveBayesClassifier()
nb.fit(X_train, y_train)
predictions = []
for i, x in enumerate(X_test):
    prediction = nb.predict([x])
    predictions.append(prediction[0])
acc = round(sum([int(x==y) for x,y in zip(predictions, y_test)])/len(predictions), 2)
print("Naive Bayes: ")
print("Accuracy = " + str(acc))
print("Error rate = " + str(1-acc))

Naive Bayes: 
Accuracy = 0.9
Error rate = 0.09999999999999998


In [8]:
headers = ["Team", "1", "2", "total", "recognition (%)"]
mat = myevaluation.confusion_matrix(y_test, predictions, [1.0, 2.0])
myutils.build_confusion_matrix(mat)
print("Naive Bayes Classifier (Train Test Split)")
print(tabulate(mat, headers))

Naive Bayes Classifier (Train Test Split)
  Team    1    2    total    recognition (%)
------  ---  ---  -------  -----------------
     1  902   96      998              90.38
     2  113  889     1002              88.72


In [24]:
X_train, X_test, y_train, y_test = myevaluation.train_test_split(copy.deepcopy(game_X), copy.deepcopy(game_Y), test_size=2000)

best_trees = []
min_accuracy = .50

for i in range(20):
    F = random.randint(1,10)
    N = random.randint(10,20)
    M = random.randint(1,N)

    rf = MyRandomForestClassifier()
    rf.fit(game_X, game_Y, N=5, M=4, F=1)

    predictions = []
    for i, x in enumerate(X_test):
        prediction = rf.predict([x])
        predictions.append(int(prediction[0] == y_test[i]))
    if sum(predictions)/len(predictions) > min_accuracy:
        print("F:", F, "N:", N, "M:", M, "Accuracy:", sum(predictions)/len(predictions))
        f = open("best_tree.txt", "w")
        f.write(str(rf.trees))
        f.close()
        max_accuracy = sum(predictions)/len(predictions)
        best_trees = rf.trees

F: 2 N: 17 M: 12 Accuracy: 0.899
F: 1 N: 10 M: 8 Accuracy: 0.899
F: 9 N: 13 M: 2 Accuracy: 0.899
F: 6 N: 15 M: 1 Accuracy: 0.8985
F: 6 N: 20 M: 8 Accuracy: 0.898
F: 3 N: 16 M: 3 Accuracy: 0.897
F: 3 N: 18 M: 9 Accuracy: 0.898
F: 2 N: 19 M: 7 Accuracy: 0.899
F: 1 N: 18 M: 16 Accuracy: 0.8975
F: 8 N: 14 M: 3 Accuracy: 0.9015
F: 7 N: 13 M: 11 Accuracy: 0.899
F: 5 N: 14 M: 12 Accuracy: 0.902
F: 2 N: 13 M: 10 Accuracy: 0.8985
F: 3 N: 14 M: 14 Accuracy: 0.897
F: 9 N: 17 M: 12 Accuracy: 0.8985
F: 5 N: 19 M: 10 Accuracy: 0.9015
F: 8 N: 12 M: 7 Accuracy: 0.899
F: 5 N: 18 M: 5 Accuracy: 0.8975
F: 3 N: 12 M: 2 Accuracy: 0.902
F: 4 N: 17 M: 9 Accuracy: 0.9015


In [13]:
import ast

if best_trees == []:
    with open("best_tree.txt", "r") as data:
        best_trees = ast.literal_eval(data.read())

In [25]:
test_size = 2000

X_train, X_test, y_train, y_test = myevaluation.train_test_split(copy.deepcopy(game_X), copy.deepcopy(game_Y), test_size=test_size, shuffle=True)

rf = MyRandomForestClassifier()
rf.trees = copy.deepcopy(best_trees)
predictions = []

f = open("best_tree.txt", "w")
f.write(str(rf.trees))
f.close()
for i, x in enumerate(X_test):
    prediction = rf.predict([x])
    predictions.append(prediction[0])

acc = round(sum([int(x==y) for x,y in zip(predictions, y_test)])/len(predictions), 2)
print("Random Forest")
print("Accuracy = ", acc)
print("Error Rate =", str(1-acc))

Random Forest
Accuracy =  0.9
Error Rate = 0.09999999999999998


In [26]:
headers = ["Team", "1", "2", "total", "recognition (%)"]
mat = myevaluation.confusion_matrix(y_test, predictions, [1.0, 2.0])
myutils.build_confusion_matrix(mat)
print("Random Forest Classifier (Train Test Split)")
print(tabulate(mat, headers))

Random Forest Classifier (Train Test Split)
  Team    1    2    total    recognition (%)
------  ---  ---  -------  -----------------
     1  941  104     1045              90.05
     2  100  855      955              89.53
