In [1]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Traditional ML
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score, 
    roc_curve, 
    auc
)

# XGBoost
import xgboost as xgb

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# System utilities
from pathlib import Path
import warnings

2024-11-12 21:31:11.247450: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
#Taking the train_test_split data from pstats_featsel.ipynb

X_train = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/X_train.csv')
X_test = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/X_test.csv')
y_train = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/y_train.csv')
y_test = pd.read_csv('/Users/powellshayne/Desktop/githubrepos/VictorVis2.0/Shayne/y_test.csv')

Unnamed: 0,nickname,series_count,game_count,total_kills,avg_kills,max_kills,min_kills,total_deaths,avg_deaths,max_deaths,min_deaths,kills_per_game,deaths_per_game
0,-Nami,28,67,962,34.357143,50,7,1046,37.357143,53,16,14.358209,15.61194
1,mtsGOD,3,3,30,10.0,11,9,49,16.333333,20,14,10.0,16.333333
2,★ ⑳ JonY BoY,7,18,286,40.857143,63,25,275,39.285714,61,26,15.888889,15.277778
3,betinho,1,3,6,6.0,6,6,15,15.0,15,15,2.0,5.0
4,agoz,2,4,73,36.5,51,22,62,31.0,37,25,18.25,15.5


In [7]:
X_train = X_train.set_index('nickname')
X_test = X_test.set_index('nickname')
y_train = y_train.set_index('nickname')
y_test = y_test.set_index('nickname')

X_train[:5]

Unnamed: 0_level_0,series_count,game_count,total_kills,avg_kills,max_kills,min_kills,total_deaths,avg_deaths,max_deaths,min_deaths,kills_per_game,deaths_per_game
nickname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
-Nami,28,67,962,34.357143,50,7,1046,37.357143,53,16,14.358209,15.61194
mtsGOD,3,3,30,10.0,11,9,49,16.333333,20,14,10.0,16.333333
★ ⑳ JonY BoY,7,18,286,40.857143,63,25,275,39.285714,61,26,15.888889,15.277778
betinho,1,3,6,6.0,6,6,15,15.0,15,15,2.0,5.0
agoz,2,4,73,36.5,51,22,62,31.0,37,25,18.25,15.5


In [10]:
scale = StandardScaler()

X_train_scaled = scale.fit_transform(X_train)
X_test_scaled  = scale.fit_transform(X_test)
display(X_train_scaled[:5])
display(X_test_scaled[:5])

array([[ 0.2595198 ,  0.25740021,  0.22137764,  0.19284888, -0.08041345,
        -1.35357333,  0.2773775 ,  0.5127854 ,  0.2222762 , -0.5204694 ,
         0.05552067,  0.43306803],
       [-0.55939891, -0.61476167, -0.60428727, -2.92621771, -2.50725923,
        -1.10428726, -0.59115075, -2.4742164 , -2.16797125, -0.77996057,
        -1.42361186,  0.73460304],
       [-0.42837192, -0.41034873, -0.37749519,  1.02520976,  0.72853515,
         0.89000125, -0.39427273,  0.78679123,  0.80173013,  0.77698646,
         0.57501813,  0.29339148],
       [-0.62491241, -0.61476167, -0.62554902, -3.43843979, -2.81839331,
        -1.47821636, -0.62076957, -2.66365252, -2.53012995, -0.65021499,
        -4.13873184, -4.0026158 ],
       [-0.59215566, -0.60113414, -0.56619328,  0.46725357, -0.01818663,
         0.51607215, -0.57982591, -0.39041899, -0.93663165,  0.64724088,
         1.37635563,  0.38627812]])

array([[-4.96177502e-01, -5.56806760e-01, -5.14312237e-01,
        -1.38959300e+00, -1.38918339e+00, -4.10605572e-01,
        -5.36988135e-01, -1.90027426e+00, -1.91308123e+00,
        -6.22522951e-01,  1.38382099e+00,  1.15771108e+00],
       [ 4.04611852e+00,  3.97590289e+00,  3.95227204e+00,
         5.26674624e-01,  1.23347807e+00, -7.52561000e-01,
         3.92327852e+00,  1.96516406e-01,  1.76786385e+00,
        -6.22522951e-01,  6.83217035e-01,  2.13084186e-01],
       [-4.64189502e-01, -4.62655521e-01, -4.54539431e-01,
        -4.37298916e-02,  2.43289560e-03,  3.87290429e-01,
        -4.45042103e-01,  6.86957092e-01,  3.78827967e-01,
         6.09792420e-01, -2.59957423e-01,  4.62582119e-01],
       [ 2.71534501e-01,  2.90554392e-01,  2.45539315e-01,
         3.71863828e-01,  3.77098818e-01, -2.96620429e-01,
         3.97501825e-01,  9.10051687e-01,  1.28170129e+00,
        -6.23796004e-02,  3.12730486e-01,  8.57809527e-01],
       [-5.60153502e-01, -5.70256937e-01, -5.2905073

## Gradient Boost + Neural Network

In [11]:
xgbr  = xgb.XGBRegressor(
    objective="reg:squarederror",
    n_estimators = 100,
    max_depth = 6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree = 0.8,
    random_state = 60
)

xgbr.fit(X_train_scaled, y_train)

In [14]:
xgb_train_pred  = xgbr.predict(X_train_scaled)
xgb_test_pred = xgbr.predict(X_test_scaled)
display(xgb_train_pred[:5])
display(xgb_test_pred[:5])


array([0.91450185, 0.61156946, 1.0319568 , 0.3999947 , 1.1783853 ],
      dtype=float32)

array([1.0693251 , 1.0663396 , 0.83534074, 0.90695447, 0.93472546],
      dtype=float32)

In [15]:
X_train_nn = np.column_stack((X_train_scaled, xgb_train_pred))
X_test_nn = np.column_stack((X_test, xgb_test_pred))
display(X_train_nn[:5])
display(X_test_nn[:5])

array([[ 0.2595198 ,  0.25740021,  0.22137764,  0.19284888, -0.08041345,
        -1.35357333,  0.2773775 ,  0.5127854 ,  0.2222762 , -0.5204694 ,
         0.05552067,  0.43306803,  0.91450185],
       [-0.55939891, -0.61476167, -0.60428727, -2.92621771, -2.50725923,
        -1.10428726, -0.59115075, -2.4742164 , -2.16797125, -0.77996057,
        -1.42361186,  0.73460304,  0.61156946],
       [-0.42837192, -0.41034873, -0.37749519,  1.02520976,  0.72853515,
         0.89000125, -0.39427273,  0.78679123,  0.80173013,  0.77698646,
         0.57501813,  0.29339148,  1.03195679],
       [-0.62491241, -0.61476167, -0.62554902, -3.43843979, -2.81839331,
        -1.47821636, -0.62076957, -2.66365252, -2.53012995, -0.65021499,
        -4.13873184, -4.0026158 ,  0.3999947 ],
       [-0.59215566, -0.60113414, -0.56619328,  0.46725357, -0.01818663,
         0.51607215, -0.57982591, -0.39041899, -0.93663165,  0.64724088,
         1.37635563,  0.38627812,  1.17838526]])

array([[3.00000000e+00, 3.00000000e+00, 5.60000000e+01, 1.86666667e+01,
        2.40000000e+01, 1.50000000e+01, 5.30000000e+01, 1.76666667e+01,
        2.10000000e+01, 1.50000000e+01, 1.86666667e+01, 1.76666667e+01,
        1.06932509e+00],
       [1.45000000e+02, 3.40000000e+02, 5.51100000e+03, 3.80068966e+01,
        7.30000000e+01, 1.20000000e+01, 5.09800000e+03, 3.51586207e+01,
        7.40000000e+01, 1.50000000e+01, 1.62088235e+01, 1.49941176e+01,
        1.06633961e+00],
       [4.00000000e+00, 1.00000000e+01, 1.29000000e+02, 3.22500000e+01,
        5.00000000e+01, 2.20000000e+01, 1.57000000e+02, 3.92500000e+01,
        5.40000000e+01, 2.60000000e+01, 1.29000000e+01, 1.57000000e+01,
        8.35340738e-01],
       [2.70000000e+01, 6.60000000e+01, 9.84000000e+02, 3.64444444e+01,
        5.70000000e+01, 1.60000000e+01, 1.11000000e+03, 4.11111111e+01,
        6.70000000e+01, 2.00000000e+01, 1.49090909e+01, 1.68181818e+01,
        9.06954467e-01],
       [1.00000000e+00, 2.00000000e+

In [16]:
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=X_train_nn.shape[1], activation='relu'))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(1))

nn_model.compile(optimizer='adam', loss='mean_squared_error')

nn_model.fit(X_train_nn, y_train,epochs=50,batch_size=32, validation_data=(X_test_nn, y_test))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.5545 - val_loss: 120713.0234
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.2087 - val_loss: 148340.5000
Epoch 3/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.1084 - val_loss: 77089.8984
Epoch 4/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0598 - val_loss: 53362.2734
Epoch 5/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0331 - val_loss: 53249.1016
Epoch 6/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0242 - val_loss: 45797.0391
Epoch 7/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0180 - val_loss: 38413.4414
Epoch 8/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0136 - val_loss: 39442.6719
Epoch 9/50
[1m11/11[0m [32m━━

<keras.src.callbacks.history.History at 0x154f03650>

In [17]:
y_pred_nn = nn_model.predict(X_test_nn)
display(y_pred_nn[:5])

mse_nn = mean_squared_error(y_test,  y_pred_nn)
display(mse_nn)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


array([[ 14.021378 ],
       [598.0219   ],
       [ 35.71355  ],
       [143.54182  ],
       [ 12.9273615]], dtype=float32)

24892.17064430419