In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt

In [None]:
training_data = pd.read_csv("combineddata/training.csv").drop(["Team"], axis = 1)
print(training_data.shape)
training_data = training_data


In [None]:
print(training_data.columns)

wide_training = pd.concat([training_data[training_data["Side"] == "Blue"].reset_index(drop = True), training_data[training_data["Side"] == "Red"].reset_index(drop = True)], axis = 1)

print(wide_training)

In [None]:

blue_cols = []
red_cols = []
for col in training_data.columns:
    blue_cols.append("B_" + col)
    red_cols.append("R_" + col)

wide_training.columns = blue_cols + red_cols
print(training_data.columns)
wide_training = wide_training.drop(["R_Winner", "R_Side", "B_Side", "R_Tournament", "B_Tournament", "R_Match_Number", "B_Match_Number", "B_Season", "R_Season"], axis = 1)
print(wide_training.shape)


In [None]:
test_data = pd.read_csv("combineddata/test.csv")
wide_test = pd.concat([test_data[test_data["Side"] == "Blue"].reset_index(drop = True), test_data[test_data["Side"] == "Red"].reset_index(drop = True)], axis = 1)
blue_cols = []
red_cols = []
for col in test_data.columns:
    blue_cols.append("B_" + col)
    red_cols.append("R_" + col)

wide_test.columns = blue_cols + red_cols
print(test_data.columns)
wide_test = wide_test.drop(["R_Winner", "R_Side", "B_Side"], axis = 1)
print(wide_test.shape)


In [None]:
# setting up for xgboost
training_label = wide_training["B_Winner"]
training_dat = wide_training.drop(axis = 1, labels = "B_Winner")
test_label = wide_test["B_Winner"]
test_dat = wide_test.drop(axis = 1, labels = ["B_Winner", "B_Team", "R_Team", "B_Tournament", "R_Tournament", "B_Match_Number", "R_Match_Number", "B_Season", "R_Season"])

dtrain = xgb.DMatrix(data = training_dat, label = training_label)
dtest = xgb.DMatrix(data = test_dat, label = test_label)

print(training_dat.shape)
print(test_dat.shape)

In [None]:
param = {'max_depth': 3, 'learning_rate': 0.03, 'objective': 'binary:logistic',
         'alpha': 0.4, 'subsample': 0.8, 'colsample_bynode': 0.5, 'colsample_bytree': 0.5,
         'num_parallel_tree': 100, 'gamma': 0.2}
param['eval_metric'] = 'error'

num_round = 145
evallist = [(dtrain, "train"), (dtest, "test")]
bst = xgb.train(param, dtrain, num_round, evallist, num_boost_round = 1)

In [None]:
xgb.plot_importance(bst)

In [None]:
predictions = wide_test[["B_Season", "B_Tournament", "B_Team", "R_Team", "B_Match_Number", "B_Winner"]]
predictions.columns = ["Season", "Tournament", "Blue", "Red", "Match_Number", "Blue_Win"]
predictions["Prediction"] = bst.predict(dtest)
predictions["Correct"] = predictions.apply(lambda row: int(abs(row.Blue_Win - row.Prediction) < 0.5), axis = 1)

print(predictions)

In [None]:
summary = pd.DataFrame()

summary = pd.concat([summary, predictions[["Season", "Correct"]].groupby("Season").count()])

summary = pd.concat([summary, predictions[["Season", "Correct"]].groupby("Season").mean()], axis = 1)
summary = pd.concat([summary, predictions[["Season", "Correct"]].groupby("Season").sum()], axis = 1)

summary = summary.reset_index()
summary.columns = ["Season", "Games", "Accuracy", "Correct"]
#summary = pd.concat([summary, predictions[["Tournament", "Correct"]].groupby("Tournament").count().reset_index()])


summary


In [None]:
summary = pd.DataFrame()

summary = pd.concat([summary, predictions[["Tournament", "Correct"]].groupby("Tournament").count()])

summary = pd.concat([summary, predictions[["Tournament", "Correct"]].groupby("Tournament").mean()], axis = 1)
summary = pd.concat([summary, predictions[["Tournament", "Correct"]].groupby("Tournament").sum()], axis = 1)

summary = summary.reset_index()
summary.columns = ["Tournament", "Games", "Accuracy", "Correct"]
#summary = pd.concat([summary, predictions[["Tournament", "Correct"]].groupby("Tournament").count().reset_index()])

summary

In [None]:

match_predictions = pd.concat([predictions.groupby(["Tournament", "Match_Number"]).count()["Prediction"],predictions.groupby(["Tournament", "Match_Number"]).sum()[["Correct", "Blue_Win"]]], axis = 1)
# match_predictions = pd.concat([match_predictions, predictions.drop(["Blue", "Red"], axis = 1).groupby(["Tournament", "Match_Number"]).mean()["Prediction"]], axis = 1)
match_predictions["Match_Correct"] = match_predictions.apply(lambda row: int(row.Correct > row.Prediction / 2), axis = 1)

# predictions[predictions["Match_Number"] == 199]

match_predictions = match_predictions.reset_index()




In [None]:
summary = pd.DataFrame()

summary = pd.concat([summary, match_predictions[["Tournament", "Match_Correct"]].groupby("Tournament").count()])

summary = pd.concat([summary, match_predictions[["Tournament", "Match_Correct"]].groupby("Tournament").mean()], axis = 1)
summary = pd.concat([summary, match_predictions[["Tournament", "Match_Correct"]].groupby("Tournament").sum()], axis = 1)

summary = summary.reset_index()
summary.columns = ["Tournament", "Games", "Accuracy", "Correct"]
#summary = pd.concat([summary, predictions[["Tournament", "Correct"]].groupby("Tournament").count().reset_index()])
summary

In [None]:
# 
bst.save_model("models/6_4nominortwoyears.json")