In [224]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np

# the dataframe has two rows for each game, one for each team, we split the dataframe into
# into blue and red side and then merge them so we have a single row for each game containing
# the data for both teams
df = pd.read_csv("csv/sql/processed.csv", dtype= {'teamid': 'str'})
blue_side = df[df["side"] == 100]
blue_side = blue_side.rename(columns={
    "teamid": "blue_teamid",
})
blue_side.drop(columns=["side"], inplace=True)

red_side = df[df["side"] == 200]
red_side = red_side.rename(columns={
    "teamid": "red_teamid",
})
red_side.drop(columns=["side", "esportsgameid"], inplace=True)

df = blue_side.merge(red_side, on="platformgameid", suffixes=("_blue", "_red"))
df['winningteam'] = np.where(df['win_blue'] == 1, 100, 200)
df.drop(columns=["win_blue", "win_red"], inplace=True)
display(df.head())

df.fillna(0, inplace=True)
features = [
    # "first_blood_avg",
    # "first_inhibitor_avg",
    # "first_tower_avg",
    # "kills_avg",
    "win_avg",
    # "deaths_avg",
    # "level_avg",
    # "cs_avg",
]

def gaus_test(features, df):
    X = df[[i+"_blue" for i in features] + [i+"_red" for i in features ]]
    y = df["winningteam"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

    gnb = GaussianNB()
    model = gnb.fit(X_train,y_train)
    predictive_labels = gnb.predict(X_test)

    return round(accuracy_score(y_test, predictive_labels), 3)

def gaus_test_iter(features, df, n):
    acc = 0
    for i in range(n):
        acc += gaus_test(features, df)
    return acc/n

gaus_test_iter(features, df, 100)

Unnamed: 0,blue_teamid,esportsgameid,platformgameid,first_blood_time_blue,first_blood_blue,first_tower_time_blue,first_inhibitor_time_blue,first_tower_blue,first_inhibitor_blue,kills_blue,...,deaths_avg_red,level_avg_red,cs_avg_red,first_blood_avg_red,first_tower_avg_red,first_inhibitor_avg_red,first_blood_time_avg_red,first_tower_time_avg_red,first_inhibitor_time_avg_red,winningteam
0,100205573495116443,102181142519049888,ESPORTSTMNT01:1130786,22.6971,1,1048277,1972924,0,0,5,...,5.0,17.0,60.0,0.0,1.0,1.0,22.6971,1048277.0,1972924.0,200
1,100205573495116443,102181142519246497,ESPORTSTMNT01:1130806,9.405167,1,781709,1977250,1,0,4,...,4.5,16.5,56.5,0.0,0.5,1.0,16.051133,914993.0,1975087.0,200
2,100205573495116443,102181142519377570,ESPORTSTMNT01:1140917,9.104283,1,1166569,1915734,1,1,16,...,8.333333,16.0,57.333333,0.0,0.333333,0.666667,13.735517,998851.7,1955303.0,100
3,100205573495116443,102181142541397833,ESPORTSTMNT01:1143282,3.75565,1,769258,1844212,0,1,13,...,14.6,15.2,60.2,0.0,0.8,0.4,10.96587,931040.8,1970765.0,100
4,100205573495116443,102181142541463370,ESPORTSTMNT01:1143414,6.889967,1,893223,1445342,1,0,10,...,13.2,14.8,59.0,0.0,0.6,0.4,11.69172,913104.4,1914671.0,200


0.8050499999999999

In [225]:
import itertools

for comb in itertools.permutations(features, 5):
    print(comb)
    acc = gaus_test_iter(comb, df, 100)
    if acc > 0.8:
        break
    print(acc)

In [226]:
X = df[[i+"_blue" for i in features] + [i+"_red" for i in features ]]
y = df["winningteam"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

gnb = GaussianNB()
model = gnb.fit(X_train,y_train)
gnb.predict_proba(X_test)

array([[0.91325634, 0.08674366],
       [0.96904167, 0.03095833],
       [0.96851916, 0.03148084],
       ...,
       [0.53840084, 0.46159916],
       [0.91325634, 0.08674366],
       [0.98918551, 0.01081449]])