# AutoML - Regression

PyCaret
* Main Site - https://pycaret.org/
* Docs - https://pycaret.readthedocs.io/en/latest/

## Table of Contents

* [Setup and Preprocessing](#setup)  
* [Compare Models](#compare)  
* [Create Model](#create)  
* [Tune Model](#tune)  
* [Evaluate Model](#evaluate)  
* [Finalize and Store Model](#finalize_and_store)


## Imports and Global Settings

In [1]:
import os
import sys
import datetime
import json
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from pycaret.regression import RegressionExperiment

here = os.getcwd()
sys.path.append(os.path.join(here, ".."))

from src.utils.modeling_utils import (
    ModelSetup,
    evaluate_reg_model,
    calculate_roi,
    save_model_report,
)

load_dotenv()
RDS_ENDPOINT = os.getenv("RDS_ENDPOINT")
RDS_PASSWORD = os.getenv("RDS_PASSWORD")

# Pandas Settings
pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 1000)
pd.options.display.max_info_columns = 200
pd.options.display.precision = 5

## Load Data

In [2]:
username = "postgres"
password = RDS_PASSWORD
endpoint = RDS_ENDPOINT
database = "nba_betting"
port = "5432"

# Create the connection string
connection_string = (
    f"postgresql+psycopg2://{username}:{password}@{endpoint}:{port}/{database}"
)

### Games

In [3]:
start_date = "2010-09-01"
start_date_int = int(start_date.replace("-", ""))  # Convert date to YYYYMMDD format

features = [
    "game_id",
    "game_datetime",
    "home_team",
    "away_team",
    "open_line",
    "home_score",
    "away_score",
    "game_completed",
    "odds_last_update",
    "scores_last_update",
]

# Extracting the YYYYMMDD portion of the game_id and comparing it with start_date_int
games_query = f"SELECT {', '.join(features)} FROM games WHERE CAST(LEFT(game_id, 8) AS INTEGER) >= {start_date_int};"

with create_engine(connection_string).connect() as connection:
    games = pd.read_sql_query(games_query, connection)

### Features

In [4]:
start_date = "2010-09-01"
start_date_int = int(start_date.replace("-", ""))  # Convert date to YYYYMMDD format

features = ["game_id", "data"]

# Extracting the YYYYMMDD portion of the game_id and comparing it with start_date_int
features_query = f"SELECT {', '.join(features)} FROM all_features_json WHERE CAST(LEFT(game_id, 8) AS INTEGER) >= {start_date_int};"

with create_engine(connection_string).connect() as connection:
    all_features = pd.read_sql_query(features_query, connection)

# Normalize the JSON strings in the 'data' column
expanded_data = pd.json_normalize(all_features["data"])

# Drop the original 'data' column and concatenate the expanded data
all_features = pd.concat([all_features.drop(columns=["data"]), expanded_data], axis=1)

In [5]:
games_features = pd.merge(
    games,
    all_features,
    on="game_id",
    how="left",
    validate="one_to_one",
    suffixes=("", "_drop"),
)
# Drop the columns from df2 (with suffix '_drop')
games_features = games_features[
    games_features.columns.drop(list(games_features.filter(regex="_drop")))
]

<a id='basic_data_overview'></a>

## Basic Data Overview

In [6]:
df = games_features.copy()

In [7]:
df.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16669 entries, 0 to 16668
Data columns (total 778 columns):
 #    Column                                        Non-Null Count  Dtype         
---   ------                                        --------------  -----         
 0    game_id                                       16669 non-null  object        
 1    game_datetime                                 16669 non-null  datetime64[ns]
 2    home_team                                     16669 non-null  object        
 3    away_team                                     16669 non-null  object        
 4    open_line                                     16389 non-null  float64       
 5    home_score                                    16568 non-null  float64       
 6    away_score                                    16568 non-null  float64       
 7    game_completed                                16669 non-null  bool          
 8    odds_last_update                              93 non-n

In [8]:
df.head(10)

Unnamed: 0,game_id,game_datetime,home_team,away_team,open_line,home_score,away_score,game_completed,odds_last_update,scores_last_update,season,away_ATL,away_BKN,away_BOS,away_CHA,away_CHI,away_CLE,away_DAL,away_DEN,away_DET,away_GSW,away_HOU,away_IND,away_LAC,away_LAL,away_MEM,away_MIA,away_MIL,away_MIN,away_NOP,away_NYK,away_OKC,away_ORL,away_PHI,away_PHX,away_POR,away_SAC,away_SAS,away_TOR,away_UTA,away_WAS,elo1_pre,elo2_pre,home_ATL,home_BKN,home_BOS,home_CHA,home_CHI,home_CLE,home_DAL,home_DEN,home_DET,home_GSW,home_HOU,home_IND,home_LAC,home_LAL,home_MEM,home_MIA,home_MIL,home_MIN,home_NOP,home_NYK,home_OKC,home_ORL,home_PHI,home_PHX,home_POR,home_SAC,home_SAS,home_TOR,home_UTA,home_WAS,538_prob1,elo_prob1,elo_prob2,last_5_hv,streak_hv,win_pct_hv,raptor1_pre,raptor2_pre,season_type,raptor_prob1,raptor_prob2,rest_diff_hv,carm_elo1_pre,carm_elo2_pre,day_of_season,point_diff_hv,carm_elo_prob1,carm_elo_prob2,away_team_last_5,away_team_streak,home_team_last_5,home_team_streak,away_team_win_pct,home_team_win_pct,point_diff_last_5_hv,pie_away_all_advanced,pie_away_l2w_advanced,pie_home_all_advanced,pie_home_l2w_advanced,l_away_all_traditional,l_away_l2w_traditional,l_home_all_traditional,l_home_l2w_traditional,pace_away_all_advanced,pace_away_l2w_advanced,pace_home_all_advanced,pace_home_l2w_advanced,poss_away_all_advanced,poss_away_l2w_advanced,poss_home_all_advanced,poss_home_l2w_advanced,w_away_all_traditional,w_away_l2w_traditional,w_home_all_traditional,w_home_l2w_traditional,gp_away_all_traditional,gp_away_l2w_traditional,gp_home_all_traditional,gp_home_l2w_traditional,ast_to_away_all_advanced,ast_to_away_l2w_advanced,ast_to_home_all_advanced,ast_to_home_l2w_advanced,away_team_avg_point_diff,e_pace_away_all_advanced,e_pace_away_l2w_advanced,e_pace_home_all_advanced,e_pace_home_l2w_advanced,home_team_avg_point_diff,min_away_all_traditional,min_away_l2w_traditional,min_home_all_traditional,min_home_l2w_traditional,opp_pf_away_all_opponent,opp_pf_away_l2w_opponent,opp_pf_home_all_opponent,opp_pf_home_l2w_opponent,pts_away_all_traditional,pts_away_l2w_traditional,pts_home_all_traditional,pts_home_l2w_traditional,ts_pct_away_all_advanced,ts_pct_away_l2w_advanced,ts_pct_home_all_advanced,ts_pct_home_l2w_advanced,ast_pct_away_all_advanced,ast_pct_away_l2w_advanced,ast_pct_home_all_advanced,ast_pct_home_l2w_advanced,days_since_last_game_away,days_since_last_game_home,efg_pct_away_all_advanced,efg_pct_away_l2w_advanced,efg_pct_home_all_advanced,efg_pct_home_l2w_advanced,opp_ast_away_all_opponent,opp_ast_away_l2w_opponent,opp_ast_home_all_opponent,opp_ast_home_l2w_opponent,opp_blk_away_all_opponent,opp_blk_away_l2w_opponent,opp_blk_home_all_opponent,opp_blk_home_l2w_opponent,opp_fga_away_all_opponent,opp_fga_away_l2w_opponent,opp_fga_home_all_opponent,opp_fga_home_l2w_opponent,opp_fgm_away_all_opponent,opp_fgm_away_l2w_opponent,opp_fgm_home_all_opponent,opp_fgm_home_l2w_opponent,opp_fta_away_all_opponent,opp_fta_away_l2w_opponent,opp_fta_home_all_opponent,opp_fta_home_l2w_opponent,opp_ftm_away_all_opponent,opp_ftm_away_l2w_opponent,opp_ftm_home_all_opponent,opp_ftm_home_l2w_opponent,opp_pfd_away_all_opponent,opp_pfd_away_l2w_opponent,opp_pfd_home_all_opponent,opp_pfd_home_l2w_opponent,opp_pts_away_all_opponent,opp_pts_away_l2w_opponent,opp_pts_home_all_opponent,opp_pts_home_l2w_opponent,opp_reb_away_all_opponent,opp_reb_away_l2w_opponent,opp_reb_home_all_opponent,opp_reb_home_l2w_opponent,opp_stl_away_all_opponent,opp_stl_away_l2w_opponent,opp_stl_home_all_opponent,opp_stl_home_l2w_opponent,opp_tov_away_all_opponent,opp_tov_away_l2w_opponent,opp_tov_home_all_opponent,opp_tov_home_l2w_opponent,reb_pct_away_all_advanced,reb_pct_away_l2w_advanced,reb_pct_home_all_advanced,reb_pct_home_l2w_advanced,dreb_pct_away_all_advanced,dreb_pct_away_l2w_advanced,dreb_pct_home_all_advanced,dreb_pct_home_l2w_advanced,opp_blka_away_all_opponent,opp_blka_away_l2w_opponent,opp_blka_home_all_opponent,opp_blka_home_l2w_opponent,opp_dreb_away_all_opponent,opp_dreb_away_l2w_opponent,opp_dreb_home_all_opponent,opp_dreb_home_l2w_opponent,opp_fg3a_away_all_opponent,opp_fg3a_away_l2w_opponent,opp_fg3a_home_all_opponent,opp_fg3a_home_l2w_opponent,opp_fg3m_away_all_opponent,opp_fg3m_away_l2w_opponent,opp_fg3m_home_all_opponent,opp_fg3m_home_l2w_opponent,opp_oreb_away_all_opponent,opp_oreb_away_l2w_opponent,opp_oreb_home_all_opponent,opp_oreb_home_l2w_opponent,oreb_pct_away_all_advanced,oreb_pct_away_l2w_advanced,oreb_pct_home_all_advanced,oreb_pct_home_l2w_advanced,w_pct_away_all_traditional,w_pct_away_l2w_traditional,w_pct_home_all_traditional,w_pct_home_l2w_traditional,ast_ratio_away_all_advanced,ast_ratio_away_l2w_advanced,ast_ratio_home_all_advanced,ast_ratio_home_l2w_advanced,def_rating_away_all_advanced,def_rating_away_l2w_advanced,def_rating_home_all_advanced,def_rating_home_l2w_advanced,efg_pct_away_all_fourfactors,efg_pct_away_l2w_fourfactors,efg_pct_home_all_fourfactors,efg_pct_home_l2w_fourfactors,net_rating_away_all_advanced,net_rating_away_l2w_advanced,net_rating_home_all_advanced,net_rating_home_l2w_advanced,off_rating_away_all_advanced,off_rating_away_l2w_advanced,off_rating_home_all_advanced,off_rating_home_l2w_advanced,opp_fg_pct_away_all_opponent,opp_fg_pct_away_l2w_opponent,opp_fg_pct_home_all_opponent,opp_fg_pct_home_l2w_opponent,opp_ft_pct_away_all_opponent,opp_ft_pct_away_l2w_opponent,opp_ft_pct_home_all_opponent,opp_ft_pct_home_l2w_opponent,pace_per40_away_all_advanced,pace_per40_away_l2w_advanced,pace_per40_home_all_advanced,pace_per40_home_l2w_advanced,pie_zscore_away_all_advanced,pie_zscore_away_l2w_advanced,pie_zscore_home_all_advanced,pie_zscore_home_l2w_advanced,plus_minus_away_all_opponent,plus_minus_away_l2w_opponent,plus_minus_home_all_opponent,plus_minus_home_l2w_opponent,tm_tov_pct_away_all_advanced,tm_tov_pct_away_l2w_advanced,tm_tov_pct_home_all_advanced,tm_tov_pct_home_l2w_advanced,away_team_last_5_games_result,fta_rate_away_all_fourfactors,fta_rate_away_l2w_fourfactors,fta_rate_home_all_fourfactors,fta_rate_home_l2w_fourfactors,home_team_last_5_games_result,l_zscore_away_all_traditional,l_zscore_away_l2w_traditional,l_zscore_home_all_traditional,l_zscore_home_l2w_traditional,opp_fg3_pct_away_all_opponent,opp_fg3_pct_away_l2w_opponent,opp_fg3_pct_home_all_opponent,opp_fg3_pct_home_l2w_opponent,oreb_pct_away_all_fourfactors,oreb_pct_away_l2w_fourfactors,oreb_pct_home_all_fourfactors,oreb_pct_home_l2w_fourfactors,pace_zscore_away_all_advanced,pace_zscore_away_l2w_advanced,pace_zscore_home_all_advanced,pace_zscore_home_l2w_advanced,poss_zscore_away_all_advanced,poss_zscore_away_l2w_advanced,poss_zscore_home_all_advanced,poss_zscore_home_l2w_advanced,w_zscore_away_all_traditional,w_zscore_away_l2w_traditional,w_zscore_home_all_traditional,w_zscore_home_l2w_traditional,e_def_rating_away_all_advanced,e_def_rating_away_l2w_advanced,e_def_rating_home_all_advanced,e_def_rating_home_l2w_advanced,e_net_rating_away_all_advanced,e_net_rating_away_l2w_advanced,e_net_rating_home_all_advanced,e_net_rating_home_l2w_advanced,e_off_rating_away_all_advanced,e_off_rating_away_l2w_advanced,e_off_rating_home_all_advanced,e_off_rating_home_l2w_advanced,gp_zscore_away_all_traditional,gp_zscore_away_l2w_traditional,gp_zscore_home_all_traditional,gp_zscore_home_l2w_traditional,ast_to_zscore_away_all_advanced,ast_to_zscore_away_l2w_advanced,ast_to_zscore_home_all_advanced,ast_to_zscore_home_l2w_advanced,away_team_avg_point_diff_last_5,e_pace_zscore_away_all_advanced,e_pace_zscore_away_l2w_advanced,e_pace_zscore_home_all_advanced,e_pace_zscore_home_l2w_advanced,home_team_avg_point_diff_last_5,min_zscore_away_all_traditional,min_zscore_away_l2w_traditional,min_zscore_home_all_traditional,min_zscore_home_l2w_traditional,opp_pf_zscore_away_all_opponent,opp_pf_zscore_away_l2w_opponent,opp_pf_zscore_home_all_opponent,opp_pf_zscore_home_l2w_opponent,plus_minus_away_all_traditional,plus_minus_away_l2w_traditional,plus_minus_home_all_traditional,plus_minus_home_l2w_traditional,pts_zscore_away_all_traditional,pts_zscore_away_l2w_traditional,pts_zscore_home_all_traditional,pts_zscore_home_l2w_traditional,tm_tov_pct_away_all_fourfactors,tm_tov_pct_away_l2w_fourfactors,tm_tov_pct_home_all_fourfactors,tm_tov_pct_home_l2w_fourfactors,ts_pct_zscore_away_all_advanced,ts_pct_zscore_away_l2w_advanced,ts_pct_zscore_home_all_advanced,ts_pct_zscore_home_l2w_advanced,ast_pct_zscore_away_all_advanced,ast_pct_zscore_away_l2w_advanced,ast_pct_zscore_home_all_advanced,ast_pct_zscore_home_l2w_advanced,efg_pct_zscore_away_all_advanced,efg_pct_zscore_away_l2w_advanced,efg_pct_zscore_home_all_advanced,efg_pct_zscore_home_l2w_advanced,opp_ast_zscore_away_all_opponent,opp_ast_zscore_away_l2w_opponent,opp_ast_zscore_home_all_opponent,opp_ast_zscore_home_l2w_opponent,opp_blk_zscore_away_all_opponent,opp_blk_zscore_away_l2w_opponent,opp_blk_zscore_home_all_opponent,opp_blk_zscore_home_l2w_opponent,opp_efg_pct_away_all_fourfactors,opp_efg_pct_away_l2w_fourfactors,opp_efg_pct_home_all_fourfactors,opp_efg_pct_home_l2w_fourfactors,opp_fga_zscore_away_all_opponent,opp_fga_zscore_away_l2w_opponent,opp_fga_zscore_home_all_opponent,opp_fga_zscore_home_l2w_opponent,opp_fgm_zscore_away_all_opponent,opp_fgm_zscore_away_l2w_opponent,opp_fgm_zscore_home_all_opponent,opp_fgm_zscore_home_l2w_opponent,opp_fta_zscore_away_all_opponent,opp_fta_zscore_away_l2w_opponent,opp_fta_zscore_home_all_opponent,opp_fta_zscore_home_l2w_opponent,opp_ftm_zscore_away_all_opponent,opp_ftm_zscore_away_l2w_opponent,opp_ftm_zscore_home_all_opponent,opp_ftm_zscore_home_l2w_opponent,opp_pfd_zscore_away_all_opponent,opp_pfd_zscore_away_l2w_opponent,opp_pfd_zscore_home_all_opponent,opp_pfd_zscore_home_l2w_opponent,opp_pts_zscore_away_all_opponent,opp_pts_zscore_away_l2w_opponent,opp_pts_zscore_home_all_opponent,opp_pts_zscore_home_l2w_opponent,opp_reb_zscore_away_all_opponent,opp_reb_zscore_away_l2w_opponent,opp_reb_zscore_home_all_opponent,opp_reb_zscore_home_l2w_opponent,opp_stl_zscore_away_all_opponent,opp_stl_zscore_away_l2w_opponent,opp_stl_zscore_home_all_opponent,opp_stl_zscore_home_l2w_opponent,opp_tov_pct_away_all_fourfactors,opp_tov_pct_away_l2w_fourfactors,opp_tov_pct_home_all_fourfactors,opp_tov_pct_home_l2w_fourfactors,opp_tov_zscore_away_all_opponent,opp_tov_zscore_away_l2w_opponent,opp_tov_zscore_home_all_opponent,opp_tov_zscore_home_l2w_opponent,pie_percentile_away_all_advanced,pie_percentile_away_l2w_advanced,pie_percentile_home_all_advanced,pie_percentile_home_l2w_advanced,reb_pct_zscore_away_all_advanced,reb_pct_zscore_away_l2w_advanced,reb_pct_zscore_home_all_advanced,reb_pct_zscore_home_l2w_advanced,dreb_pct_zscore_away_all_advanced,dreb_pct_zscore_away_l2w_advanced,dreb_pct_zscore_home_all_advanced,dreb_pct_zscore_home_l2w_advanced,l_percentile_away_all_traditional,l_percentile_away_l2w_traditional,l_percentile_home_all_traditional,l_percentile_home_l2w_traditional,opp_blka_zscore_away_all_opponent,opp_blka_zscore_away_l2w_opponent,opp_blka_zscore_home_all_opponent,opp_blka_zscore_home_l2w_opponent,opp_dreb_zscore_away_all_opponent,opp_dreb_zscore_away_l2w_opponent,opp_dreb_zscore_home_all_opponent,opp_dreb_zscore_home_l2w_opponent,opp_fg3a_zscore_away_all_opponent,opp_fg3a_zscore_away_l2w_opponent,opp_fg3a_zscore_home_all_opponent,opp_fg3a_zscore_home_l2w_opponent,opp_fg3m_zscore_away_all_opponent,opp_fg3m_zscore_away_l2w_opponent,opp_fg3m_zscore_home_all_opponent,opp_fg3m_zscore_home_l2w_opponent,opp_fta_rate_away_all_fourfactors,opp_fta_rate_away_l2w_fourfactors,opp_fta_rate_home_all_fourfactors,opp_fta_rate_home_l2w_fourfactors,opp_oreb_pct_away_all_fourfactors,opp_oreb_pct_away_l2w_fourfactors,opp_oreb_pct_home_all_fourfactors,opp_oreb_pct_home_l2w_fourfactors,opp_oreb_zscore_away_all_opponent,opp_oreb_zscore_away_l2w_opponent,opp_oreb_zscore_home_all_opponent,opp_oreb_zscore_home_l2w_opponent,oreb_pct_zscore_away_all_advanced,oreb_pct_zscore_away_l2w_advanced,oreb_pct_zscore_home_all_advanced,oreb_pct_zscore_home_l2w_advanced,pace_percentile_away_all_advanced,pace_percentile_away_l2w_advanced,pace_percentile_home_all_advanced,pace_percentile_home_l2w_advanced,poss_percentile_away_all_advanced,poss_percentile_away_l2w_advanced,poss_percentile_home_all_advanced,poss_percentile_home_l2w_advanced,w_pct_zscore_away_all_traditional,w_pct_zscore_away_l2w_traditional,w_pct_zscore_home_all_traditional,w_pct_zscore_home_l2w_traditional,w_percentile_away_all_traditional,w_percentile_away_l2w_traditional,w_percentile_home_all_traditional,w_percentile_home_l2w_traditional,ast_ratio_zscore_away_all_advanced,ast_ratio_zscore_away_l2w_advanced,ast_ratio_zscore_home_all_advanced,ast_ratio_zscore_home_l2w_advanced,gp_percentile_away_all_traditional,gp_percentile_away_l2w_traditional,gp_percentile_home_all_traditional,gp_percentile_home_l2w_traditional,ast_to_percentile_away_all_advanced,ast_to_percentile_away_l2w_advanced,ast_to_percentile_home_all_advanced,ast_to_percentile_home_l2w_advanced,def_rating_zscore_away_all_advanced,def_rating_zscore_away_l2w_advanced,def_rating_zscore_home_all_advanced,def_rating_zscore_home_l2w_advanced,e_pace_percentile_away_all_advanced,e_pace_percentile_away_l2w_advanced,e_pace_percentile_home_all_advanced,e_pace_percentile_home_l2w_advanced,efg_pct_zscore_away_all_fourfactors,efg_pct_zscore_away_l2w_fourfactors,efg_pct_zscore_home_all_fourfactors,efg_pct_zscore_home_l2w_fourfactors,min_percentile_away_all_traditional,min_percentile_away_l2w_traditional,min_percentile_home_all_traditional,min_percentile_home_l2w_traditional,net_rating_zscore_away_all_advanced,net_rating_zscore_away_l2w_advanced,net_rating_zscore_home_all_advanced,net_rating_zscore_home_l2w_advanced,off_rating_zscore_away_all_advanced,off_rating_zscore_away_l2w_advanced,off_rating_zscore_home_all_advanced,off_rating_zscore_home_l2w_advanced,opp_fg_pct_zscore_away_all_opponent,opp_fg_pct_zscore_away_l2w_opponent,opp_fg_pct_zscore_home_all_opponent,opp_fg_pct_zscore_home_l2w_opponent,opp_ft_pct_zscore_away_all_opponent,opp_ft_pct_zscore_away_l2w_opponent,opp_ft_pct_zscore_home_all_opponent,opp_ft_pct_zscore_home_l2w_opponent,opp_pf_percentile_away_all_opponent,opp_pf_percentile_away_l2w_opponent,opp_pf_percentile_home_all_opponent,opp_pf_percentile_home_l2w_opponent,pace_per40_zscore_away_all_advanced,pace_per40_zscore_away_l2w_advanced,pace_per40_zscore_home_all_advanced,pace_per40_zscore_home_l2w_advanced,plus_minus_zscore_away_all_opponent,plus_minus_zscore_away_l2w_opponent,plus_minus_zscore_home_all_opponent,plus_minus_zscore_home_l2w_opponent,pts_percentile_away_all_traditional,pts_percentile_away_l2w_traditional,pts_percentile_home_all_traditional,pts_percentile_home_l2w_traditional,tm_tov_pct_zscore_away_all_advanced,tm_tov_pct_zscore_away_l2w_advanced,tm_tov_pct_zscore_home_all_advanced,tm_tov_pct_zscore_home_l2w_advanced,ts_pct_percentile_away_all_advanced,ts_pct_percentile_away_l2w_advanced,ts_pct_percentile_home_all_advanced,ts_pct_percentile_home_l2w_advanced,ast_pct_percentile_away_all_advanced,ast_pct_percentile_away_l2w_advanced,ast_pct_percentile_home_all_advanced,ast_pct_percentile_home_l2w_advanced,efg_pct_percentile_away_all_advanced,efg_pct_percentile_away_l2w_advanced,efg_pct_percentile_home_all_advanced,efg_pct_percentile_home_l2w_advanced,fta_rate_zscore_away_all_fourfactors,fta_rate_zscore_away_l2w_fourfactors,fta_rate_zscore_home_all_fourfactors,fta_rate_zscore_home_l2w_fourfactors,opp_ast_percentile_away_all_opponent,opp_ast_percentile_away_l2w_opponent,opp_ast_percentile_home_all_opponent,opp_ast_percentile_home_l2w_opponent,opp_blk_percentile_away_all_opponent,opp_blk_percentile_away_l2w_opponent,opp_blk_percentile_home_all_opponent,opp_blk_percentile_home_l2w_opponent,opp_fg3_pct_zscore_away_all_opponent,opp_fg3_pct_zscore_away_l2w_opponent,opp_fg3_pct_zscore_home_all_opponent,opp_fg3_pct_zscore_home_l2w_opponent,opp_fga_percentile_away_all_opponent,opp_fga_percentile_away_l2w_opponent,opp_fga_percentile_home_all_opponent,opp_fga_percentile_home_l2w_opponent,opp_fgm_percentile_away_all_opponent,opp_fgm_percentile_away_l2w_opponent,opp_fgm_percentile_home_all_opponent,opp_fgm_percentile_home_l2w_opponent,opp_fta_percentile_away_all_opponent,opp_fta_percentile_away_l2w_opponent,opp_fta_percentile_home_all_opponent,opp_fta_percentile_home_l2w_opponent,opp_ftm_percentile_away_all_opponent,opp_ftm_percentile_away_l2w_opponent,opp_ftm_percentile_home_all_opponent,opp_ftm_percentile_home_l2w_opponent,opp_pfd_percentile_away_all_opponent,opp_pfd_percentile_away_l2w_opponent,opp_pfd_percentile_home_all_opponent,opp_pfd_percentile_home_l2w_opponent,opp_pts_percentile_away_all_opponent,opp_pts_percentile_away_l2w_opponent,opp_pts_percentile_home_all_opponent,opp_pts_percentile_home_l2w_opponent,opp_reb_percentile_away_all_opponent,opp_reb_percentile_away_l2w_opponent,opp_reb_percentile_home_all_opponent,opp_reb_percentile_home_l2w_opponent,opp_stl_percentile_away_all_opponent,opp_stl_percentile_away_l2w_opponent,opp_stl_percentile_home_all_opponent,opp_stl_percentile_home_l2w_opponent,opp_tov_percentile_away_all_opponent,opp_tov_percentile_away_l2w_opponent,opp_tov_percentile_home_all_opponent,opp_tov_percentile_home_l2w_opponent,oreb_pct_zscore_away_all_fourfactors,oreb_pct_zscore_away_l2w_fourfactors,oreb_pct_zscore_home_all_fourfactors,oreb_pct_zscore_home_l2w_fourfactors,reb_pct_percentile_away_all_advanced,reb_pct_percentile_away_l2w_advanced,reb_pct_percentile_home_all_advanced,reb_pct_percentile_home_l2w_advanced,dreb_pct_percentile_away_all_advanced,dreb_pct_percentile_away_l2w_advanced,dreb_pct_percentile_home_all_advanced,dreb_pct_percentile_home_l2w_advanced,e_def_rating_zscore_away_all_advanced,e_def_rating_zscore_away_l2w_advanced,e_def_rating_zscore_home_all_advanced,e_def_rating_zscore_home_l2w_advanced,e_net_rating_zscore_away_all_advanced,e_net_rating_zscore_away_l2w_advanced,e_net_rating_zscore_home_all_advanced,e_net_rating_zscore_home_l2w_advanced,e_off_rating_zscore_away_all_advanced,e_off_rating_zscore_away_l2w_advanced,e_off_rating_zscore_home_all_advanced,e_off_rating_zscore_home_l2w_advanced,opp_blka_percentile_away_all_opponent,opp_blka_percentile_away_l2w_opponent,opp_blka_percentile_home_all_opponent,opp_blka_percentile_home_l2w_opponent,opp_dreb_percentile_away_all_opponent,opp_dreb_percentile_away_l2w_opponent,opp_dreb_percentile_home_all_opponent,opp_dreb_percentile_home_l2w_opponent,opp_fg3a_percentile_away_all_opponent,opp_fg3a_percentile_away_l2w_opponent,opp_fg3a_percentile_home_all_opponent,opp_fg3a_percentile_home_l2w_opponent,opp_fg3m_percentile_away_all_opponent,opp_fg3m_percentile_away_l2w_opponent,opp_fg3m_percentile_home_all_opponent,opp_fg3m_percentile_home_l2w_opponent,opp_oreb_percentile_away_all_opponent,opp_oreb_percentile_away_l2w_opponent,opp_oreb_percentile_home_all_opponent,opp_oreb_percentile_home_l2w_opponent,oreb_pct_percentile_away_all_advanced,oreb_pct_percentile_away_l2w_advanced,oreb_pct_percentile_home_all_advanced,oreb_pct_percentile_home_l2w_advanced,w_pct_percentile_away_all_traditional,w_pct_percentile_away_l2w_traditional,w_pct_percentile_home_all_traditional,w_pct_percentile_home_l2w_traditional,ast_ratio_percentile_away_all_advanced,ast_ratio_percentile_away_l2w_advanced,ast_ratio_percentile_home_all_advanced,ast_ratio_percentile_home_l2w_advanced,plus_minus_zscore_away_all_traditional,plus_minus_zscore_away_l2w_traditional,plus_minus_zscore_home_all_traditional,plus_minus_zscore_home_l2w_traditional,tm_tov_pct_zscore_away_all_fourfactors,tm_tov_pct_zscore_away_l2w_fourfactors,tm_tov_pct_zscore_home_all_fourfactors,tm_tov_pct_zscore_home_l2w_fourfactors,def_rating_percentile_away_all_advanced,def_rating_percentile_away_l2w_advanced,def_rating_percentile_home_all_advanced,def_rating_percentile_home_l2w_advanced,efg_pct_percentile_away_all_fourfactors,efg_pct_percentile_away_l2w_fourfactors,efg_pct_percentile_home_all_fourfactors,efg_pct_percentile_home_l2w_fourfactors,net_rating_percentile_away_all_advanced,net_rating_percentile_away_l2w_advanced,net_rating_percentile_home_all_advanced,net_rating_percentile_home_l2w_advanced,off_rating_percentile_away_all_advanced,off_rating_percentile_away_l2w_advanced,off_rating_percentile_home_all_advanced,off_rating_percentile_home_l2w_advanced,opp_efg_pct_zscore_away_all_fourfactors,opp_efg_pct_zscore_away_l2w_fourfactors,opp_efg_pct_zscore_home_all_fourfactors,opp_efg_pct_zscore_home_l2w_fourfactors,opp_fg_pct_percentile_away_all_opponent,opp_fg_pct_percentile_away_l2w_opponent,opp_fg_pct_percentile_home_all_opponent,opp_fg_pct_percentile_home_l2w_opponent,opp_ft_pct_percentile_away_all_opponent,opp_ft_pct_percentile_away_l2w_opponent,opp_ft_pct_percentile_home_all_opponent,opp_ft_pct_percentile_home_l2w_opponent,opp_tov_pct_zscore_away_all_fourfactors,opp_tov_pct_zscore_away_l2w_fourfactors,opp_tov_pct_zscore_home_all_fourfactors,opp_tov_pct_zscore_home_l2w_fourfactors,pace_per40_percentile_away_all_advanced,pace_per40_percentile_away_l2w_advanced,pace_per40_percentile_home_all_advanced,pace_per40_percentile_home_l2w_advanced,plus_minus_percentile_away_all_opponent,plus_minus_percentile_away_l2w_opponent,plus_minus_percentile_home_all_opponent,plus_minus_percentile_home_l2w_opponent,tm_tov_pct_percentile_away_all_advanced,tm_tov_pct_percentile_away_l2w_advanced,tm_tov_pct_percentile_home_all_advanced,tm_tov_pct_percentile_home_l2w_advanced,fta_rate_percentile_away_all_fourfactors,fta_rate_percentile_away_l2w_fourfactors,fta_rate_percentile_home_all_fourfactors,fta_rate_percentile_home_l2w_fourfactors,opp_fg3_pct_percentile_away_all_opponent,opp_fg3_pct_percentile_away_l2w_opponent,opp_fg3_pct_percentile_home_all_opponent,opp_fg3_pct_percentile_home_l2w_opponent,opp_fta_rate_zscore_away_all_fourfactors,opp_fta_rate_zscore_away_l2w_fourfactors,opp_fta_rate_zscore_home_all_fourfactors,opp_fta_rate_zscore_home_l2w_fourfactors,opp_oreb_pct_zscore_away_all_fourfactors,opp_oreb_pct_zscore_away_l2w_fourfactors,opp_oreb_pct_zscore_home_all_fourfactors,opp_oreb_pct_zscore_home_l2w_fourfactors,oreb_pct_percentile_away_all_fourfactors,oreb_pct_percentile_away_l2w_fourfactors,oreb_pct_percentile_home_all_fourfactors,oreb_pct_percentile_home_l2w_fourfactors,e_def_rating_percentile_away_all_advanced,e_def_rating_percentile_away_l2w_advanced,e_def_rating_percentile_home_all_advanced,e_def_rating_percentile_home_l2w_advanced,e_net_rating_percentile_away_all_advanced,e_net_rating_percentile_away_l2w_advanced,e_net_rating_percentile_home_all_advanced,e_net_rating_percentile_home_l2w_advanced,e_off_rating_percentile_away_all_advanced,e_off_rating_percentile_away_l2w_advanced,e_off_rating_percentile_home_all_advanced,e_off_rating_percentile_home_l2w_advanced,plus_minus_percentile_away_all_traditional,plus_minus_percentile_away_l2w_traditional,plus_minus_percentile_home_all_traditional,plus_minus_percentile_home_l2w_traditional,tm_tov_pct_percentile_away_all_fourfactors,tm_tov_pct_percentile_away_l2w_fourfactors,tm_tov_pct_percentile_home_all_fourfactors,tm_tov_pct_percentile_home_l2w_fourfactors,opp_efg_pct_percentile_away_all_fourfactors,opp_efg_pct_percentile_away_l2w_fourfactors,opp_efg_pct_percentile_home_all_fourfactors,opp_efg_pct_percentile_home_l2w_fourfactors,opp_tov_pct_percentile_away_all_fourfactors,opp_tov_pct_percentile_away_l2w_fourfactors,opp_tov_pct_percentile_home_all_fourfactors,opp_tov_pct_percentile_home_l2w_fourfactors,opp_fta_rate_percentile_away_all_fourfactors,opp_fta_rate_percentile_away_l2w_fourfactors,opp_fta_rate_percentile_home_all_fourfactors,opp_fta_rate_percentile_home_l2w_fourfactors,opp_oreb_pct_percentile_away_all_fourfactors,opp_oreb_pct_percentile_away_l2w_fourfactors,opp_oreb_pct_percentile_home_all_fourfactors,opp_oreb_pct_percentile_home_l2w_fourfactors
0,20231028SACGSW,2023-10-28 02:00:00,SAC,GSW,,,,False,2023-08-24 17:44:35,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,20231103CHIBKN,2023-11-03 23:00:00,CHI,BKN,,,,False,2023-08-24 17:44:35,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,20231103INDCLE,2023-11-03 23:00:00,IND,CLE,,,,False,2023-08-24 17:44:35,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,20231103MILNYK,2023-11-03 23:30:00,MIL,NYK,,,,False,2023-08-24 17:44:35,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,20231226PHXDAL,2023-12-26 03:00:00,PHX,DAL,,,,False,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,20231226DENGSW,2023-12-26 03:00:00,DEN,GSW,,,,False,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,20231226MIAPHI,2023-12-26 03:00:00,MIA,PHI,,,,False,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,20231226NYKMIL,2023-12-26 03:00:00,NYK,MIL,,,,False,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,20231128MIAMIL,2023-11-28 23:30:00,MIA,MIL,,,,False,2023-08-24 17:44:35,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,20231027MILPHI,2023-10-27 00:30:00,MIL,PHI,,,,False,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Data Preparation

#### Drop Non-Completed Games and Games with No Line

In [9]:
df = df[df["game_completed"] == True]
df = df.dropna(subset=["open_line"])

### Create Targets

In [10]:
df = ModelSetup.add_targets(df)

### Select Features

In [11]:
training_seasons = [x for x in range(2010, 2022)]
training_dates, testing_dates = ModelSetup.choose_dates(training_seasons, [2022], "Reg")
print("Training Dates:")
print(training_dates)
print("Testing Dates:")
print(testing_dates)

Training Dates:
('2010-10-26', '2022-04-10')
Testing Dates:
('2022-10-18', '2023-04-09')


In [12]:
for col in df.columns:
    print(col)

game_id
game_datetime
home_team
away_team
open_line
home_score
away_score
game_completed
odds_last_update
scores_last_update
season
away_ATL
away_BKN
away_BOS
away_CHA
away_CHI
away_CLE
away_DAL
away_DEN
away_DET
away_GSW
away_HOU
away_IND
away_LAC
away_LAL
away_MEM
away_MIA
away_MIL
away_MIN
away_NOP
away_NYK
away_OKC
away_ORL
away_PHI
away_PHX
away_POR
away_SAC
away_SAS
away_TOR
away_UTA
away_WAS
elo1_pre
elo2_pre
home_ATL
home_BKN
home_BOS
home_CHA
home_CHI
home_CLE
home_DAL
home_DEN
home_DET
home_GSW
home_HOU
home_IND
home_LAC
home_LAL
home_MEM
home_MIA
home_MIL
home_MIN
home_NOP
home_NYK
home_OKC
home_ORL
home_PHI
home_PHX
home_POR
home_SAC
home_SAS
home_TOR
home_UTA
home_WAS
538_prob1
elo_prob1
elo_prob2
last_5_hv
streak_hv
win_pct_hv
raptor1_pre
raptor2_pre
season_type
raptor_prob1
raptor_prob2
rest_diff_hv
carm_elo1_pre
carm_elo2_pre
day_of_season
point_diff_hv
carm_elo_prob1
carm_elo_prob2
away_team_last_5
away_team_streak
home_team_last_5
home_team_streak
away_team_win_pct
ho

In [13]:
features_to_use = [
    "open_line",
    "rest_diff_hv",
    "day_of_season",
    "last_5_hv",
    "538_prob1",
    "elo_prob1",
    "streak_hv",
    "point_diff_last_5_hv",
    "point_diff_hv",
    "win_pct_hv",
    "plus_minus_home_l2w_traditional",
    "net_rating_home_l2w_advanced",
    "plus_minus_home_l2w_opponent",
    "plus_minus_zscore_home_l2w_traditional",
    "net_rating_zscore_home_l2w_advanced",
    "plus_minus_zscore_home_l2w_opponent",
    "e_net_rating_home_l2w_advanced",
    "e_net_rating_zscore_home_l2w_advanced",
    "plus_minus_percentile_home_l2w_opponent",
    "plus_minus_percentile_home_l2w_traditional",
    "net_rating_percentile_home_l2w_advanced",
    "plus_minus_away_l2w_traditional",
    "plus_minus_away_l2w_opponent",
    "w_pct_zscore_home_l2w_traditional",
    "e_net_rating_percentile_home_l2w_advanced",
    "e_net_rating_away_l2w_advanced",
    "pie_percentile_home_l2w_advanced",
    "e_net_rating_zscore_away_l2w_advanced",
    "net_rating_zscore_away_l2w_advanced",
    "pie_home_l2w_advanced",
]

In [14]:
df.dropna(subset=features_to_use, inplace=True)

In [15]:
training_df, testing_df, model_report = ModelSetup.create_datasets(
    df, "reg", features_to_use, training_dates, testing_dates, create_report=True
)

In [16]:
print("Training Shape: ", training_df.shape)
print("Testing Shape: ", testing_df.shape)

Training Shape:  (14226, 33)
Testing Shape:  (1199, 33)


### Baselines

In [17]:
training_baseline_via_vegas = model_report["ind_baseline_train"]
testing_baseline_via_vegas = model_report["ind_baseline_test"]

training_baseline_via_mean = model_report["dep_baseline_train"]
testing_baseline_via_mean = model_report["dep_baseline_test"]

print(f"Training Baseline via Vegas: {training_baseline_via_vegas:.2f}")
print(f"Testing Baseline via Vegas: {testing_baseline_via_vegas:.2f}")
print(f"Training Baseline via Mean: {training_baseline_via_mean:.2f}")
print(f"Testing Baseline via Mean: {testing_baseline_via_mean:.2f}")

Training Baseline via Vegas: 9.60
Testing Baseline via Vegas: 9.73
Training Baseline via Mean: 11.05
Testing Baseline via Mean: 10.85


<a id=Regression></a>

## Regression

In [18]:
py_reg = RegressionExperiment()

<a id=setup></a>

### Setup and Preprocessing

The setup process involves a lot of options. Reference the docs below:   
https://pycaret.readthedocs.io/en/latest/api/regression.html#module-pycaret.regression

In [19]:
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

setup_params_reg = {
    "log_experiment": True,
    "log_profile": False,
    "log_plots": False,
    "experiment_name": f"REG_1_{timestamp}",
    "data": training_df,
    "test_data": testing_df,
    "target": "REG_TARGET",
    "preprocess": False,
    "normalize": False,  # zscore
    "transformation": False,  # yeo-johnson power transform to make data more Gaussian
    "remove_outliers": False,  # using SVD
    "remove_multicollinearity": False,
    "feature_selection": False,
    "pca": False,
    "pca_components": 10,
    "numeric_features": [],
    "ignore_features": ["game_id", "vegas_open_hv"],
}

In [20]:
py_reg.setup(**setup_params_reg)

Unnamed: 0,Description,Value
0,Session id,3415
1,Target,REG_TARGET
2,Target type,Regression
3,Original data shape,"(15425, 33)"
4,Transformed data shape,"(15425, 31)"
5,Transformed train set shape,"(14226, 31)"
6,Transformed test set shape,"(1199, 31)"
7,Ignore features,2
8,Numeric features,30


2023/09/06 00:34:59 INFO mlflow.tracking.fluent: Experiment with name 'REG_1_20230906003456' does not exist. Creating a new experiment.


<pycaret.regression.oop.RegressionExperiment at 0x7fd7605c95a0>

<a id=compare></a>

### Compare Models

In [21]:
best_model_reg = py_reg.compare_models(turbo=True, sort="MAE", exclude=["catboost"])

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lightgbm,Light Gradient Boosting Machine,6.5205,69.7228,8.3405,0.6349,0.7933,0.9894,0.552
rf,Random Forest Regressor,6.5979,71.9,8.4681,0.6238,0.7997,0.9914,11.787
et,Extra Trees Regressor,6.6417,72.6604,8.5112,0.62,0.8028,0.9951,4.338
br,Bayesian Ridge,6.6652,72.2267,8.4926,0.6209,0.8238,0.9767,0.178
ridge,Ridge Regression,6.667,72.2706,8.4952,0.6207,0.8229,0.978,0.151
lr,Linear Regression,6.6803,72.5575,8.5121,0.619,0.8231,0.9817,0.489
xgboost,Extreme Gradient Boosting,6.8277,76.3336,8.7239,0.6004,0.8077,1.0607,2.921
gbr,Gradient Boosting Regressor,6.8575,77.0346,8.7648,0.5969,0.8381,0.9657,4.664
huber,Huber Regressor,6.9646,85.4127,9.228,0.5511,0.8349,0.9949,0.438
en,Elastic Net,6.9884,81.8533,9.037,0.5708,0.8564,0.9306,0.239


Processing:   0%|          | 0/81 [00:00<?, ?it/s]

In [22]:
print(best_model_reg)

LGBMRegressor(random_state=3415)


<a id=create></a>

### Create Selected Model

In [23]:
model_reg = py_reg.create_model("lr")

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,6.3109,64.7711,8.048,0.6032,0.7915,0.9603
1,6.619,72.8266,8.5338,0.5695,0.8279,1.0318
2,6.4598,67.5063,8.2162,0.6133,0.8278,1.0435
3,6.5258,70.4075,8.3909,0.6145,0.8229,0.9211
4,6.5465,68.2214,8.2596,0.6299,0.8346,0.9117
5,6.8083,74.4468,8.6283,0.615,0.8148,0.9885
6,6.8215,74.913,8.6552,0.616,0.8241,1.0754
7,6.4756,69.3032,8.3249,0.6553,0.8215,0.9126
8,7.2014,83.6786,9.1476,0.6321,0.8344,1.0374
9,7.0342,79.5005,8.9163,0.6412,0.8312,0.935


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

<a id=tune></a>

### Tune Selected Model

In [24]:
tuned_model_reg = py_reg.tune_model(model_reg)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,6.3109,64.7711,8.048,0.6032,0.7915,0.9603
1,6.619,72.8266,8.5338,0.5695,0.8279,1.0318
2,6.4598,67.5063,8.2162,0.6133,0.8278,1.0435
3,6.5258,70.4075,8.3909,0.6145,0.8229,0.9211
4,6.5465,68.2214,8.2596,0.6299,0.8346,0.9117
5,6.8083,74.4468,8.6283,0.615,0.8148,0.9885
6,6.8215,74.913,8.6552,0.616,0.8241,1.0754
7,6.4756,69.3032,8.3249,0.6553,0.8215,0.9126
8,7.2014,83.6786,9.1476,0.6321,0.8344,1.0374
9,7.0342,79.5005,8.9163,0.6412,0.8312,0.935


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [25]:
model_report["details"] = tuned_model_reg.get_params()

<a id=evaluate></a>

### Evaluate Model

https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.evaluate_model

In [26]:
py_reg.evaluate_model(tuned_model_reg)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.interpret_model

In [27]:
# py_reg.interpret_model(tuned_model_reg)

In [28]:
train_predictions_reg = py_reg.predict_model(tuned_model_reg, data=training_df)
train_prediction_metrics = py_reg.pull()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Linear Regression,6.6452,71.7823,8.4724,0.6256,0.821,0.9744


In [29]:
model_report["train_mae"] = train_prediction_metrics["MAE"][0]
model_report["train_r2"] = train_prediction_metrics["R2"][0]

In [30]:
test_predictions_reg = py_reg.predict_model(tuned_model_reg, data=testing_df)
test_prediction_metrics = py_reg.pull()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Linear Regression,6.7787,73.241,8.5581,0.6023,0.8111,1.1426


In [31]:
model_report["test_mae"] = test_prediction_metrics["MAE"][0]
model_report["test_r2"] = test_prediction_metrics["R2"][0]

In [32]:
train_acc_reg, train_closer_to_target_reg, train_prediction_df_reg = evaluate_reg_model(
    train_predictions_reg, "vegas_open_hv", "REG_TARGET", "prediction_label"
)
test_acc_reg, test_closer_to_target_reg, test_prediction_df_reg = evaluate_reg_model(
    test_predictions_reg, "vegas_open_hv", "REG_TARGET", "prediction_label"
)

Prediction is closer to target in 65.44% of cases
Accuracy: 0.7625
Prediction is closer to target in 62.64% of cases
Accuracy: 0.7598


In [33]:
model_report["train_acc_reg"] = train_acc_reg
model_report["test_acc_reg"] = test_acc_reg
model_report["train_ctt"] = train_closer_to_target_reg
model_report["test_ctt"] = test_closer_to_target_reg

In [34]:
roi_results_reg = calculate_roi(test_prediction_df_reg, "actual_side", "pred_side")
roi_results_reg

Unnamed: 0,Label,Total ROI,Average ROI per Bet
0,"All Bets, Even Amount",62300,51.96
1,"All Bets, Typical Odds",54101,45.12


In [35]:
model_report["roi_all_bets_even_amount_avg"] = roi_results_reg[
    roi_results_reg["Label"] == "All Bets, Even Amount"
]["Average ROI per Bet"].iloc[0]
model_report["roi_all_bets_typical_odds_avg"] = roi_results_reg[
    roi_results_reg["Label"] == "All Bets, Typical Odds"
]["Average ROI per Bet"].iloc[0]

<a id=finalize_and_store></a>

### Model Finalization and Storage

In [36]:
final_model_reg = py_reg.finalize_model(tuned_model_reg)

In [37]:
platform = "pycaret"
problem_type = "reg"
model_type = "linreg"
datetime_str = model_report["datetime"].strftime("%Y_%m_%d_%H_%M_%S")

model_id = f"{platform}_{problem_type}_{model_type}_{datetime_str}"
model_id

'pycaret_reg_linreg_2023_09_06_00_34_34'

In [38]:
py_reg.save_model(final_model_reg, f"../models/AutoML/{model_id}")

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=FastMemory(location=/tmp/joblib),
          steps=[('placeholder', None),
                 ('actual_estimator', LinearRegression(n_jobs=-1))]),
 '../models/AutoML/pycaret_reg_linreg_2023_09_06_00_34_34.pkl')

In [39]:
model_report["platform"] = platform
model_report["model_type"] = model_type
model_report["model_id"] = model_id

In [40]:
model_report

{'datetime': Timestamp('2023-09-06 00:34:34.185988'),
 'problem_type': 'reg',
 'target': 'REG_TARGET',
 'features': ['open_line',
  'rest_diff_hv',
  'day_of_season',
  'last_5_hv',
  '538_prob1',
  'elo_prob1',
  'streak_hv',
  'point_diff_last_5_hv',
  'point_diff_hv',
  'win_pct_hv',
  'plus_minus_home_l2w_traditional',
  'net_rating_home_l2w_advanced',
  'plus_minus_home_l2w_opponent',
  'plus_minus_zscore_home_l2w_traditional',
  'net_rating_zscore_home_l2w_advanced',
  'plus_minus_zscore_home_l2w_opponent',
  'e_net_rating_home_l2w_advanced',
  'e_net_rating_zscore_home_l2w_advanced',
  'plus_minus_percentile_home_l2w_opponent',
  'plus_minus_percentile_home_l2w_traditional',
  'net_rating_percentile_home_l2w_advanced',
  'plus_minus_away_l2w_traditional',
  'plus_minus_away_l2w_opponent',
  'w_pct_zscore_home_l2w_traditional',
  'e_net_rating_percentile_home_l2w_advanced',
  'e_net_rating_away_l2w_advanced',
  'pie_percentile_home_l2w_advanced',
  'e_net_rating_zscore_away_l2w_a

In [41]:
save_model_report(model_report)