In [None]:
import warnings

warnings.filterwarnings('ignore')

import os

from tools.modeling.timeserieshyperparamtuner import TimeSeriesHyperParamTuner
from tools.modeling.timeseriesmodeler import TimeSeriesModeler

from nba.common import add_game_log_lag, add_game_log_last_5
import nba.constants as c

from xgboost import XGBRegressor
from hyperopt import hp

import numpy as np
import pandas as pd

import glob
import shap

ROOT_DIR = os.getcwd()[:-17] # verify this points to the root directory of the project


SEED = 1
early_stopping = 50
season = c.SEASON_2022_2023
player_game_log_path = ROOT_DIR + 'data/game_logs/2022-23/player'
player_game_log_csv_files = glob.glob(os.path.join(player_game_log_path, '*.csv'))

log = pd.DataFrame()

ROOT_DIR

In [None]:
# Get Data
for f in player_game_log_csv_files:
    df = pd.read_csv(f)
    log = pd.concat([log, df], ignore_index=True)

log.to_csv(ROOT_DIR + '/data/game_logs/2022-23/player_game_log_2022-2023.csv', index=False)
log

In [None]:
log[c.GAME_DATE] = pd.to_datetime(log[c.GAME_DATE])
log = add_game_log_last_5(log)
log = add_game_log_lag(log)

log.sort_values(by=[c.GAME_DATE])
log

In [None]:
# Preprocess Data
PTS_PREDICTION_MASK = [c.PTS] + c.GAME_LOG_LAST_5_MASK + c.GAME_LOG_LAG_MASK
log = log[PTS_PREDICTION_MASK]
log.dropna()

cats = log.select_dtypes(exclude=np.number).columns.tolist()
for col in cats:
    log[col] = log[col].astype('category')

In [None]:
rmse = {
    'base_model': 0,
    'tuned_model': 0,
}

params = {
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'n_estimators': 1000
}

In [None]:
ts_modeler = TimeSeriesModeler(XGBRegressor())
ts_modeler.ts_train(params, log, c.PTS)
rmse['base_model'] = ts_modeler.ts_rmse()

In [None]:
shap.plots.bar(ts_modeler.shap_values(), max_display=100)

In [None]:
space = {
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'max_depth': hp.quniform('max_depth', 3, 18, 1),
    'gamma': hp.uniform('gamma', 1, 9),
    'reg_alpha': hp.quniform('reg_alpha', 40, 180, 1),
    'reg_lambda': hp.uniform('reg_lambda', 0, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
    'min_child_weight': hp.quniform('min_child_weight', 0, 10, 1),
    'eta': hp.quniform('eta', 0.025, 0.5, 0.025),
    'subsample': hp.quniform('subsample', 0.5, 1, 0.05),
    'n_estimators': hp.quniform('n_estimators', 100, 1000, 1),
}

tuner = TimeSeriesHyperParamTuner(ts_modeler, space)
best = tuner.ts_optimize(250, log, c.PTS)
best

In [None]:
ts_modeler.ts_train(best, log, c.PTS)
rmse['tuned_model'] = ts_modeler.ts_rmse()
rmse

In [None]:
shap.plots.bar(ts_modeler.shap_values(), max_display=100)