In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import xgboost as xgb

In [79]:
from src.dataset import load_team_data, load_agg_player_data
from src.preprocessing import impute_missing_values, split_data, remove_name_columns, encode_target_variable

team_statistics, y = load_team_data()
player_statistics = load_agg_player_data()

x = pd.concat([team_statistics, player_statistics], axis=1, join='inner')

In [80]:
x = remove_name_columns(x)
y = encode_target_variable(y)
(x_train, y_train), (x_val, y_val), (x_test, y_test) = split_data(x, y)

x_train, imputer, columns = impute_missing_values(x_train)
x_val, _, _ = impute_missing_values(x_val, imputer=imputer, numeric_columns=columns)
x_test, _, _ = impute_missing_values(x_test, imputer=imputer, numeric_columns=columns)

In [81]:
#--colsample_bylevel=0.21270862659553913 --colsample_bynode=0.3425437174862951 --colsample_bytree=0.5109253108234333 --early_stopping_rounds=8 --eta=0.28997103267508106 --gamma=0.17127497737359776 --l1_reg=0.03016313633404244 --l2_reg=1.9098450598114027 --max_depth=14 --max_leaves=3 --min_child_weight=0.971287126583725 --num_boost_round=961 --subsample=0.6839398034341768
sweep_result = {
    "colsample_bylevel": 0.21270862659553913,
    "colsample_bynode": 0.3425437174862951,
    "colsample_bytree": 0.5109253108234333,
    "eta": 0.28997103267508106,
    "gamma": 0.17127497737359776,
    "alpha": 0.03016313633404244,
    "lambda": 1.9098450598114027,
    "max_depth": 14,
    "max_leaves": 3,
    "min_child_weight": 0.971287126583725,
    "subsample": 0.6839398034341768

}
num_boost_round_sweep= 961
early_stopping_rounds_sweep= 8

In [82]:
xgb_params = sweep_result

xgb_params["booster"] = "gbtree"
xgb_params["device"] = "cuda"
xgb_params["objective"] = "multi:softmax"
xgb_params["num_class"] = 3
xgb_params["eval_metric"] = "merror"
xgb_params["verbosity"] = 0

num_boost_round = num_boost_round_sweep
early_stopping_rounds = early_stopping_rounds_sweep

dtrain = xgb.DMatrix(x_train, label=y_train)
dval = xgb.DMatrix(x_val, label=y_val)
dtest = xgb.DMatrix(x_test, label=y_test)

evals = [(dtrain, "train"), (dval, "val")]

bst = xgb.train(xgb_params, dtrain, num_boost_round=num_boost_round, evals=evals, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)

[0]	train-merror:0.52852	val-merror:0.52006
[1]	train-merror:0.52445	val-merror:0.51397
[2]	train-merror:0.52458	val-merror:0.50990
[3]	train-merror:0.52369	val-merror:0.50482
[4]	train-merror:0.52166	val-merror:0.50787
[5]	train-merror:0.51962	val-merror:0.50533
[6]	train-merror:0.51759	val-merror:0.50127
[7]	train-merror:0.51670	val-merror:0.50076
[8]	train-merror:0.51492	val-merror:0.49975
[9]	train-merror:0.51454	val-merror:0.49619
[10]	train-merror:0.51061	val-merror:0.49467
[11]	train-merror:0.51061	val-merror:0.49314
[12]	train-merror:0.50934	val-merror:0.49467
[13]	train-merror:0.50756	val-merror:0.49365
[14]	train-merror:0.50413	val-merror:0.49416
[15]	train-merror:0.50311	val-merror:0.49670
[16]	train-merror:0.50260	val-merror:0.49365
[17]	train-merror:0.50133	val-merror:0.49467
[18]	train-merror:0.50044	val-merror:0.49416


In [83]:
from src.evaluate import evaluate_model

acc_val = evaluate_model(bst, dval, y_val)
acc_test = evaluate_model(bst, dtest, y_test)

print(f"Validation accuracy: {acc_val:.4f}")
print(f"Test accuracy: {acc_test:.4f}")

Validation accuracy: 0.5053
Test accuracy: 0.4900


In [85]:
from src.postprocessing import  compute_prediction, save_predictions

team_statistics = load_team_data(train=False)
player_statistics = load_agg_player_data(train=False)

x_test = pd.concat([team_statistics, player_statistics], axis=1, join='inner')
x_test = remove_name_columns(x_test)
x_test, _, _ = impute_missing_values(x_test, imputer=imputer, numeric_columns=columns)

dtest = xgb.DMatrix(x_test)

y_pred = bst.predict(dtest, iteration_range=(0, bst.best_iteration))
predictions = compute_prediction(y_pred, x_test)

save_predictions(predictions, "xgboost_player.csv")