# Import Module

In [37]:
import numpy as np
import pandas as pd

from xgboost import XGBClassifier

import joblib
import warnings
import os
import sys
sys.path.append('../code')
warnings.filterwarnings('ignore')
print(os.listdir('../dataset'))

from sklearn.metrics import accuracy_score, confusion_matrix

from dataset.load import Load_Data
from preprocessing.preprocessing import DeriveFeature, Preprocess
from modeling.Validation import KFoldValidation
from modeling.HyperParameterTuning import RandomForestEvaluation, XGBEvaluation


['Challenger_Ranked_Games_10minute.csv', 'Challenger_Ranked_Games_15minute.csv', 'preprocessing']


# Load Dataset

In [20]:
data_path = '../dataset'

# 1. Load Data
train, test = Load_Data(data_path, minute = 10, return_test = True, split_size = 0.25)

# 2. Preprocessing
X_train, X_test, y_train, y_test = Preprocess(train, test, scaling = True)

Train Dataset Shape:  (19806, 50)
Test Dataset Shape:  (6603, 50)
(19693, 58)
(6571, 58)


# Load Model

In [26]:
model_path = '../model'

pre_train_xgb = joblib.load(os.path.join(model_path, 'XGB_0.734'))
default_xgb = XGBClassifier(max_depth = 6, random_state = 42)
default_xgb.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

# Prediction Test Data

In [27]:
pre_train_pred = pre_train_xgb.predict(X_test)
print("Pre-train Model Accuracy Score: ", accuracy_score(pre_train_pred, y_test))

default_xgb_pred = default_xgb.predict(X_test)
print("Default Model Accuracy Score: ", accuracy_score(default_xgb_pred, y_test))

Pre-train Model Accuracy Score:  0.7339826510424593
Default Model Accuracy Score:  0.7338304672043829


# Experiement

Using the 10-minute game start data, put 15 minutes of data into the model and check the results of the win or loss prediction.

It is important to note that this method is to experiment with how the model will perform in a completely different dataset, not actually available.

In [35]:
train, test = Load_Data(data_path, minute = 15, return_test = True, split_size = 0.25)
X_train, X_test, y_train, y_test = Preprocess(train, test, scaling = True)

X_train = np.concatenate((X_train, X_test))
y_train = np.concatenate((y_train, y_test))

print("15 minute Dataset Shape: ", X_train.shape)

Train Dataset Shape:  (20125, 50)
Test Dataset Shape:  (6709, 50)
(8193, 58)
(2752, 58)
15 minute Dataset Shape:  (10945, 58)


## Pre-train Model Prediction

In [38]:
exp_pre_train_pred = pre_train_xgb.predict(X_train)
print("Accuracy: ", accuracy_score(exp_pre_train_pred, y_train))
confusion_matrix(exp_pre_train_pred, y_train)

Accuracy:  0.7889447236180904


array([[4083,  983],
       [1327, 4552]], dtype=int64)

## Default Model Prediction

In [39]:
default_xgb_pred = default_xgb.predict(X_train)
print("Accuracy: ", accuracy_score(default_xgb_pred, y_train))
confusion_matrix(default_xgb_pred, y_train)

Accuracy:  0.7860210141617177


array([[3995,  927],
       [1415, 4608]], dtype=int64)