In [22]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Traditional ML
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score, 
    roc_curve, 
    auc
)

# XGBoost
import xgboost as xgb

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# System utilities
from pathlib import Path
import warnings

In [23]:
#Taking the train_test_split data from pstats_featsel.ipynb

X_train = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/X_train.csv')
X_test = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/X_test.csv')
y_train = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/y_train.csv')
y_test = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/y_test.csv')

In [24]:
#Assignment of 'nickname' column as index
X_train = X_train.set_index('nickname')
X_test = X_test.set_index('nickname')
y_train = y_train.set_index('nickname')
y_test = y_test.set_index('nickname')

X_train[:5]

Unnamed: 0_level_0,series_count,game_count,total_kills,avg_kills,max_kills,min_kills,total_deaths,avg_deaths,max_deaths,min_deaths,kills_per_game,deaths_per_game
nickname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
-Nami,28,67,962,34.357143,50,7,1046,37.357143,53,16,14.358209,15.61194
mtsGOD,3,3,30,10.0,11,9,49,16.333333,20,14,10.0,16.333333
★ ⑳ JonY BoY,7,18,286,40.857143,63,25,275,39.285714,61,26,15.888889,15.277778
betinho,1,3,6,6.0,6,6,15,15.0,15,15,2.0,5.0
agoz,2,4,73,36.5,51,22,62,31.0,37,25,18.25,15.5


In [25]:
#Asjusting dataframes to contain specific columns for predictions
X_train = X_train[['series_count', 'game_count', 'kills_per_game', 'deaths_per_game', 'avg_kills', 'avg_deaths']]
X_test = X_test[['series_count', 'game_count', 'kills_per_game', 'deaths_per_game', 'avg_kills', 'avg_deaths']]

In [26]:
#Scaling numerical data
scale = StandardScaler()

X_train_scaled = scale.fit_transform(X_train)
X_test_scaled  = scale.fit_transform(X_test)
display(X_train_scaled[:5])
display(X_test_scaled[:5])

array([[ 0.2595198 ,  0.25740021,  0.05552067,  0.43306803,  0.19284888,
         0.5127854 ],
       [-0.55939891, -0.61476167, -1.42361186,  0.73460304, -2.92621771,
        -2.4742164 ],
       [-0.42837192, -0.41034873,  0.57501813,  0.29339148,  1.02520976,
         0.78679123],
       [-0.62491241, -0.61476167, -4.13873184, -4.0026158 , -3.43843979,
        -2.66365252],
       [-0.59215566, -0.60113414,  1.37635563,  0.38627812,  0.46725357,
        -0.39041899]])

array([[-0.4961775 , -0.55680676,  1.38382099,  1.15771108, -1.389593  ,
        -1.90027426],
       [ 4.04611852,  3.97590289,  0.68321703,  0.21308419,  0.52667462,
         0.19651641],
       [-0.4641895 , -0.46265552, -0.25995742,  0.46258212, -0.04372989,
         0.68695709],
       [ 0.2715345 ,  0.29055439,  0.31273049,  0.85780953,  0.37186383,
         0.91005169],
       [-0.5601535 , -0.57025694,  1.47883709,  2.68935117,  0.5259913 ,
         1.25634777]])

## Gradient Boost + Neural Network

In [27]:
#Creating Gradient Boost Model
xgbr  = xgb.XGBRegressor(
    objective="reg:squarederror",
    n_estimators = 100,
    max_depth = 6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree = 0.8,
    random_state = 60
)

xgbr.fit(X_train_scaled, y_train)

In [28]:
#Retrieving XGB predictions
xgb_train_pred  = xgbr.predict(X_train_scaled)
xgb_test_pred = xgbr.predict(X_test_scaled)
display(xgb_train_pred[:5])
display(xgb_test_pred[:5])


array([0.9128467 , 0.61061186, 1.0310825 , 0.39916033, 1.17431   ],
      dtype=float32)

array([1.0204053, 1.0676484, 0.8287656, 0.8896236, 0.9228232],
      dtype=float32)

In [29]:
#Preparing XGB content to flow into Neural Network
X_train_nn = np.column_stack((X_train_scaled, xgb_train_pred))
X_test_nn = np.column_stack((X_test, xgb_test_pred))
display(X_train_nn[:5])
display(X_test_nn[:5])

array([[ 0.2595198 ,  0.25740021,  0.05552067,  0.43306803,  0.19284888,
         0.5127854 ,  0.91284668],
       [-0.55939891, -0.61476167, -1.42361186,  0.73460304, -2.92621771,
        -2.4742164 ,  0.61061186],
       [-0.42837192, -0.41034873,  0.57501813,  0.29339148,  1.02520976,
         0.78679123,  1.03108251],
       [-0.62491241, -0.61476167, -4.13873184, -4.0026158 , -3.43843979,
        -2.66365252,  0.39916033],
       [-0.59215566, -0.60113414,  1.37635563,  0.38627812,  0.46725357,
        -0.39041899,  1.17430997]])

array([[  3.        ,   3.        ,  18.66666667,  17.66666667,
         18.66666667,  17.66666667,   1.02040529],
       [145.        , 340.        ,  16.20882353,  14.99411765,
         38.00689655,  35.15862069,   1.06764841],
       [  4.        ,  10.        ,  12.9       ,  15.7       ,
         32.25      ,  39.25      ,   0.82876557],
       [ 27.        ,  66.        ,  14.90909091,  16.81818182,
         36.44444444,  41.11111111,   0.88962358],
       [  1.        ,   2.        ,  19.        ,  22.        ,
         38.        ,  44.        ,   0.92282319]])

In [30]:
#Create/Build Neural Network Model
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=X_train_nn.shape[1], activation='relu'))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(1))

nn_model.compile(optimizer='adam', loss='mean_squared_error')

nn_model.fit(X_train_nn, y_train,epochs=50,batch_size=32, validation_data=(X_test_nn, y_test))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 1.1604 - val_loss: 38.0063
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.2981 - val_loss: 364.4749
Epoch 3/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.1193 - val_loss: 560.1114
Epoch 4/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0820 - val_loss: 494.5771
Epoch 5/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0493 - val_loss: 389.1700
Epoch 6/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0363 - val_loss: 339.7477
Epoch 7/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0179 - val_loss: 366.6721
Epoch 8/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0137 - val_loss: 326.3754
Epoch 9/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1551b6510>

In [34]:
#Retrieving XGB + Neural Network Prediction/Mean Squared Error Data
y_pred_nn = nn_model.predict(X_test_nn)
display(y_pred_nn[:5])

mse_nn = mean_squared_error(y_test,  y_pred_nn)
display(mse_nn)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


array([[ 3.002139 ],
       [47.931145 ],
       [ 3.6866665],
       [ 8.771574 ],
       [ 4.439583 ]], dtype=float32)

144.69810634111252

## Notes

- Perhaps using a Neural Network with XGBoost wasn't the best decision when attempting to make predictions.
- The mean_squared_error is exponentially high,  meaning the predictions are significantly  far from the data used to make the predictions.

## Thoughts

- Perhaps only using XGBoost will result with a better mean_squared_error.

In [35]:
#Retrieving XGB prediction/mean squared error data
XG_mse = mean_squared_error(y_test, xgb_test_pred)
XG_mse

0.007714637396182838

## Final Note
- XGBoost is the better model to use for predictions based off of the kill_death_ratio i the cleaned player stats data collection.
- The mean squared error is ~0.008 compared to the XGBoost  + Neural Network mean squared error of ~144.7