# PyCaret AutoML

PyCaret
* Main Site - https://pycaret.org/
* Docs - https://pycaret.readthedocs.io/en/latest/

## Table of Contents

* [Regression](#Regression)
    * [Setup and Preprocessing](#setup)  
    * [Compare Models](#compare)  
    * [Create Model](#create)  
    * [Tune Model](#tune)  
    * [Evaluate Model](#evaluate)  
    * [Finalize and Store Model](#finalize_and_store)
* [Classification](#Classification)
    * [Setup and Preprocessing](#setup_cls)  
    * [Compare Models](#compare_cls)  
    * [Create Model](#create_cls)  
    * [Tune Model](#tune_cls)  
    * [Evaluate Model](#evaluate_cls)  
    * [Finalize and Store Model](#finalize_and_store_cls)

## Imports and Global Settings

In [1]:
import sys
import datetime
import pandas as pd
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from pycaret import regression as py_reg
from pycaret import classification as py_cls

sys.path.append('../')
from passkeys import RDS_ENDPOINT, RDS_PASSWORD

# Pandas Settings
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.options.display.max_info_columns = 200
pd.options.display.precision = 5

## Load Model Training Data

In [2]:
username = 'postgres'
password = RDS_PASSWORD
endpoint = RDS_ENDPOINT
database = 'nba_betting'
port = '5432'

connection = create_engine(f'postgresql+psycopg2://{username}:{password}@{endpoint}/{database}').connect()

In [3]:
df = pd.read_sql_table('model_training_data', connection)

### Restrict to previous years games only

In [4]:
df = df[df['league_year_end'] != 23]

In [5]:
df.sort_values('game_id', ascending=False).head()

Unnamed: 0,game_id,CLS_TARGET_home_margin_GT_home_spread,REG_TARGET_actual_home_margin,home_team_num,away_team_num,league_year_end,fd_line_home,dk_line_home,covers_consensus_home,home_spread,gp,win,loss,w_pct,mins,pts,fgm,fga,fg_pct,fg3m,fg3a,fg3_pct,ftm,fta,ft_pct,oreb,dreb,reb,ast,tov,stl,blk,blka,pf,pfd,p_m,gp_rank,gp_zscore,win_rank,win_zscore,loss_rank,loss_zscore,w_pct_rank,w_pct_zscore,mins_rank,mins_zscore,pts_rank,pts_zscore,fgm_rank,fgm_zscore,fga_rank,fga_zscore,fg_pct_rank,fg_pct_zscore,fg3m_rank,fg3m_zscore,fg3a_rank,fg3a_zscore,fg3_pct_rank,fg3_pct_zscore,ftm_rank,ftm_zscore,fta_rank,fta_zscore,ft_pct_rank,ft_pct_zscore,oreb_rank,oreb_zscore,dreb_rank,dreb_zscore,reb_rank,reb_zscore,ast_rank,ast_zscore,tov_rank,tov_zscore,stl_rank,stl_zscore,blk_rank,blk_zscore,blka_rank,blka_zscore,pf_rank,pf_zscore,pfd_rank,pfd_zscore,p_m_rank,p_m_zscore,gp_opp,win_opp,loss_opp,w_pct_opp,mins_opp,pts_opp,fgm_opp,fga_opp,fg_pct_opp,fg3m_opp,fg3a_opp,fg3_pct_opp,ftm_opp,fta_opp,ft_pct_opp,oreb_opp,dreb_opp,reb_opp,ast_opp,tov_opp,stl_opp,blk_opp,blka_opp,pf_opp,pfd_opp,p_m_opp,gp_rank_opp,gp_zscore_opp,win_rank_opp,win_zscore_opp,loss_rank_opp,loss_zscore_opp,w_pct_rank_opp,w_pct_zscore_opp,mins_rank_opp,mins_zscore_opp,pts_rank_opp,pts_zscore_opp,fgm_rank_opp,fgm_zscore_opp,fga_rank_opp,fga_zscore_opp,fg_pct_rank_opp,fg_pct_zscore_opp,fg3m_rank_opp,fg3m_zscore_opp,fg3a_rank_opp,fg3a_zscore_opp,fg3_pct_rank_opp,fg3_pct_zscore_opp,ftm_rank_opp,ftm_zscore_opp,fta_rank_opp,fta_zscore_opp,ft_pct_rank_opp,ft_pct_zscore_opp,oreb_rank_opp,oreb_zscore_opp,dreb_rank_opp,dreb_zscore_opp,reb_rank_opp,reb_zscore_opp,ast_rank_opp,ast_zscore_opp,tov_rank_opp,tov_zscore_opp,stl_rank_opp,stl_zscore_opp,blk_rank_opp,blk_zscore_opp,blka_rank_opp,blka_zscore_opp,pf_rank_opp,pf_zscore_opp,pfd_rank_opp,pfd_zscore_opp,p_m_rank_opp,p_m_zscore_opp,offrtg,defrtg,netrtg,ast_pct,ast_v_tov,ast_ratio,oreb_pct,dreb_pct,reb_pct,tov_pct,efg_pct,ts_pct,pace,pie,poss,offrtg_rank,offrtg_zscore,defrtg_rank,defrtg_zscore,netrtg_rank,netrtg_zscore,ast_pct_rank,ast_pct_zscore,ast_v_tov_rank,ast_v_tov_zscore,ast_ratio_rank,ast_ratio_zscore,oreb_pct_rank,oreb_pct_zscore,dreb_pct_rank,dreb_pct_zscore,reb_pct_rank,reb_pct_zscore,tov_pct_rank,tov_pct_zscore,efg_pct_rank,efg_pct_zscore,ts_pct_rank,ts_pct_zscore,pace_rank,pace_zscore,pie_rank,pie_zscore,poss_rank,poss_zscore,offrtg_opp,defrtg_opp,netrtg_opp,ast_pct_opp,ast_v_tov_opp,ast_ratio_opp,oreb_pct_opp,dreb_pct_opp,reb_pct_opp,tov_pct_opp,efg_pct_opp,ts_pct_opp,pace_opp,pie_opp,poss_opp,offrtg_rank_opp,offrtg_zscore_opp,defrtg_rank_opp,defrtg_zscore_opp,netrtg_rank_opp,netrtg_zscore_opp,ast_pct_rank_opp,ast_pct_zscore_opp,ast_v_tov_rank_opp,ast_v_tov_zscore_opp,ast_ratio_rank_opp,ast_ratio_zscore_opp,oreb_pct_rank_opp,oreb_pct_zscore_opp,dreb_pct_rank_opp,dreb_pct_zscore_opp,reb_pct_rank_opp,reb_pct_zscore_opp,tov_pct_rank_opp,tov_pct_zscore_opp,efg_pct_rank_opp,efg_pct_zscore_opp,ts_pct_rank_opp,ts_pct_zscore_opp,...,fga_c3_opp,fg_pct_c3_opp,fgm_atb3_opp,fga_atb3_opp,fg_pct_atb3_opp,fgm_ra_rank_opp,fgm_ra_zscore_opp,fga_ra_rank_opp,fga_ra_zscore_opp,fg_pct_ra_rank_opp,fg_pct_ra_zscore_opp,fgm_paint_rank_opp,fgm_paint_zscore_opp,fga_paint_rank_opp,fga_paint_zscore_opp,fg_pct_paint_rank_opp,fg_pct_paint_zscore_opp,fgm_mr_rank_opp,fgm_mr_zscore_opp,fga_mr_rank_opp,fga_mr_zscore_opp,fg_pct_mr_rank_opp,fg_pct_mr_zscore_opp,fgm_lc3_rank_opp,fgm_lc3_zscore_opp,fga_lc3_rank_opp,fga_lc3_zscore_opp,fg_pct_lc3_rank_opp,fg_pct_lc3_zscore_opp,fgm_rc3_rank_opp,fgm_rc3_zscore_opp,fga_rc3_rank_opp,fga_rc3_zscore_opp,fg_pct_rc3_rank_opp,fg_pct_rc3_zscore_opp,fgm_c3_rank_opp,fgm_c3_zscore_opp,fga_c3_rank_opp,fga_c3_zscore_opp,fg_pct_c3_rank_opp,fg_pct_c3_zscore_opp,fgm_atb3_rank_opp,fgm_atb3_zscore_opp,fga_atb3_rank_opp,fga_atb3_zscore_opp,fg_pct_atb3_rank_opp,fg_pct_atb3_zscore_opp,opp_fgm_ra,opp_fga_ra,opp_fg_pct_ra,opp_fgm_paint,opp_fga_paint,opp_fg_pct_paint,opp_fgm_mr,opp_fga_mr,opp_fg_pct_mr,opp_fgm_lc3,opp_fga_lc3,opp_fg_pct_lc3,opp_fgm_rc3,opp_fga_rc3,opp_fg_pct_rc3,opp_fgm_c3,opp_fga_c3,opp_fg_pct_c3,opp_fgm_atb3,opp_fga_atb3,opp_fg_pct_atb3,opp_fgm_ra_rank,opp_fgm_ra_zscore,opp_fga_ra_rank,opp_fga_ra_zscore,opp_fg_pct_ra_rank,opp_fg_pct_ra_zscore,opp_fgm_paint_rank,opp_fgm_paint_zscore,opp_fga_paint_rank,opp_fga_paint_zscore,opp_fg_pct_paint_rank,opp_fg_pct_paint_zscore,opp_fgm_mr_rank,opp_fgm_mr_zscore,opp_fga_mr_rank,opp_fga_mr_zscore,opp_fg_pct_mr_rank,opp_fg_pct_mr_zscore,opp_fgm_lc3_rank,opp_fgm_lc3_zscore,opp_fga_lc3_rank,opp_fga_lc3_zscore,opp_fg_pct_lc3_rank,opp_fg_pct_lc3_zscore,opp_fgm_rc3_rank,opp_fgm_rc3_zscore,opp_fga_rc3_rank,opp_fga_rc3_zscore,opp_fg_pct_rc3_rank,opp_fg_pct_rc3_zscore,opp_fgm_c3_rank,opp_fgm_c3_zscore,opp_fga_c3_rank,opp_fga_c3_zscore,opp_fg_pct_c3_rank,opp_fg_pct_c3_zscore,opp_fgm_atb3_rank,opp_fgm_atb3_zscore,opp_fga_atb3_rank,opp_fga_atb3_zscore,opp_fg_pct_atb3_rank,opp_fg_pct_atb3_zscore,opp_fgm_ra_opp,opp_fga_ra_opp,opp_fg_pct_ra_opp,opp_fgm_paint_opp,opp_fga_paint_opp,opp_fg_pct_paint_opp,opp_fgm_mr_opp,opp_fga_mr_opp,opp_fg_pct_mr_opp,opp_fgm_lc3_opp,opp_fga_lc3_opp,opp_fg_pct_lc3_opp,opp_fgm_rc3_opp,opp_fga_rc3_opp,opp_fg_pct_rc3_opp,opp_fgm_c3_opp,opp_fga_c3_opp,opp_fg_pct_c3_opp,opp_fgm_atb3_opp,opp_fga_atb3_opp,opp_fg_pct_atb3_opp,opp_fgm_ra_rank_opp,opp_fgm_ra_zscore_opp,opp_fga_ra_rank_opp,opp_fga_ra_zscore_opp,opp_fg_pct_ra_rank_opp,opp_fg_pct_ra_zscore_opp,opp_fgm_paint_rank_opp,opp_fgm_paint_zscore_opp,opp_fga_paint_rank_opp,opp_fga_paint_zscore_opp,opp_fg_pct_paint_rank_opp,opp_fg_pct_paint_zscore_opp,opp_fgm_mr_rank_opp,opp_fgm_mr_zscore_opp,opp_fga_mr_rank_opp,opp_fga_mr_zscore_opp,opp_fg_pct_mr_rank_opp,opp_fg_pct_mr_zscore_opp,opp_fgm_lc3_rank_opp,opp_fgm_lc3_zscore_opp,opp_fga_lc3_rank_opp,opp_fga_lc3_zscore_opp,opp_fg_pct_lc3_rank_opp,opp_fg_pct_lc3_zscore_opp,opp_fgm_rc3_rank_opp,opp_fgm_rc3_zscore_opp,opp_fga_rc3_rank_opp,opp_fga_rc3_zscore_opp,opp_fg_pct_rc3_rank_opp,opp_fg_pct_rc3_zscore_opp,opp_fgm_c3_rank_opp,opp_fgm_c3_zscore_opp,opp_fga_c3_rank_opp,opp_fga_c3_zscore_opp,opp_fg_pct_c3_rank_opp,opp_fg_pct_c3_zscore_opp,opp_fgm_atb3_rank_opp,opp_fgm_atb3_zscore_opp,opp_fga_atb3_rank_opp,opp_fga_atb3_zscore_opp,opp_fg_pct_atb3_rank_opp,opp_fg_pct_atb3_zscore_opp,screen_ast,screen_ast_pts,deflections,off_loose_ball_rec,def_loose_ball_rec,loose_ball_rec,pct_loose_ball_rec_off,pct_loose_ball_rec_def,charges_drawn,contested_2pt,contested_3pt,contested_shots,screen_ast_rank,screen_ast_zscore,screen_ast_pts_rank,screen_ast_pts_zscore,deflections_rank,deflections_zscore,off_loose_ball_rec_rank,off_loose_ball_rec_zscore,def_loose_ball_rec_rank,def_loose_ball_rec_zscore,loose_ball_rec_rank,loose_ball_rec_zscore,pct_loose_ball_rec_off_rank,pct_loose_ball_rec_off_zscore,pct_loose_ball_rec_def_rank,pct_loose_ball_rec_def_zscore,charges_drawn_rank,charges_drawn_zscore,contested_2pt_rank,contested_2pt_zscore,contested_3pt_rank,contested_3pt_zscore,contested_shots_rank,contested_shots_zscore,screen_ast_opp,screen_ast_pts_opp,deflections_opp,off_loose_ball_rec_opp,def_loose_ball_rec_opp,loose_ball_rec_opp,pct_loose_ball_rec_off_opp,pct_loose_ball_rec_def_opp,charges_drawn_opp,contested_2pt_opp,contested_3pt_opp,contested_shots_opp,screen_ast_rank_opp,screen_ast_zscore_opp,screen_ast_pts_rank_opp,screen_ast_pts_zscore_opp,deflections_rank_opp,deflections_zscore_opp,off_loose_ball_rec_rank_opp,off_loose_ball_rec_zscore_opp,def_loose_ball_rec_rank_opp,def_loose_ball_rec_zscore_opp,loose_ball_rec_rank_opp,loose_ball_rec_zscore_opp,pct_loose_ball_rec_off_rank_opp,pct_loose_ball_rec_off_zscore_opp,pct_loose_ball_rec_def_rank_opp,pct_loose_ball_rec_def_zscore_opp,charges_drawn_rank_opp,charges_drawn_zscore_opp,contested_2pt_rank_opp,contested_2pt_zscore_opp,contested_3pt_rank_opp,contested_3pt_zscore_opp,contested_shots_rank_opp,contested_shots_zscore_opp,elo1_pre,elo2_pre,elo_prob1,elo_prob2,day_of_season
420,20220616BOSGSW,False,-13.0,3,10,22,,,,-4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1727.2643,1693.749,0.68321,0.31679,240
419,20220613GSWBOS,True,10.0,10,3,22,,,,-4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1685.1737,1735.8397,0.57052,0.42948,237
418,20220610BOSGSW,False,-10.0,3,10,22,,,,-4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1754.0458,1666.9677,0.74591,0.25409,234
417,20220608BOSGSW,True,16.0,3,10,22,,,,-3.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1747.4094,1673.6039,0.73116,0.26884,232
415,20220605GSWBOS,True,19.0,10,3,22,,,,-4.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1657.4373,1763.5762,0.49117,0.50883,229


<a id='basic_data_overview'></a>

## Basic Data Overview

In [6]:
df.info(verbose=True, show_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10195 entries, 0 to 10208
Data columns (total 921 columns):
 #    Column                                 Non-Null Count  Dtype  
---   ------                                 --------------  -----  
 0    game_id                                10195 non-null  object 
 1    CLS_TARGET_home_margin_GT_home_spread  10195 non-null  bool   
 2    REG_TARGET_actual_home_margin          10195 non-null  float64
 3    home_team_num                          10195 non-null  int64  
 4    away_team_num                          10195 non-null  int64  
 5    league_year_end                        10195 non-null  int64  
 6    fd_line_home                           0 non-null      object 
 7    dk_line_home                           0 non-null      object 
 8    covers_consensus_home                  0 non-null      object 
 9    home_spread                            10195 non-null  float64
 10   gp                                     9396 non-null   o

In [7]:
df.head()

Unnamed: 0,game_id,CLS_TARGET_home_margin_GT_home_spread,REG_TARGET_actual_home_margin,home_team_num,away_team_num,league_year_end,fd_line_home,dk_line_home,covers_consensus_home,home_spread,gp,win,loss,w_pct,mins,pts,fgm,fga,fg_pct,fg3m,fg3a,fg3_pct,ftm,fta,ft_pct,oreb,dreb,reb,ast,tov,stl,blk,blka,pf,pfd,p_m,gp_rank,gp_zscore,win_rank,win_zscore,loss_rank,loss_zscore,w_pct_rank,w_pct_zscore,mins_rank,mins_zscore,pts_rank,pts_zscore,fgm_rank,fgm_zscore,fga_rank,fga_zscore,fg_pct_rank,fg_pct_zscore,fg3m_rank,fg3m_zscore,fg3a_rank,fg3a_zscore,fg3_pct_rank,fg3_pct_zscore,ftm_rank,ftm_zscore,fta_rank,fta_zscore,ft_pct_rank,ft_pct_zscore,oreb_rank,oreb_zscore,dreb_rank,dreb_zscore,reb_rank,reb_zscore,ast_rank,ast_zscore,tov_rank,tov_zscore,stl_rank,stl_zscore,blk_rank,blk_zscore,blka_rank,blka_zscore,pf_rank,pf_zscore,pfd_rank,pfd_zscore,p_m_rank,p_m_zscore,gp_opp,win_opp,loss_opp,w_pct_opp,mins_opp,pts_opp,fgm_opp,fga_opp,fg_pct_opp,fg3m_opp,fg3a_opp,fg3_pct_opp,ftm_opp,fta_opp,ft_pct_opp,oreb_opp,dreb_opp,reb_opp,ast_opp,tov_opp,stl_opp,blk_opp,blka_opp,pf_opp,pfd_opp,p_m_opp,gp_rank_opp,gp_zscore_opp,win_rank_opp,win_zscore_opp,loss_rank_opp,loss_zscore_opp,w_pct_rank_opp,w_pct_zscore_opp,mins_rank_opp,mins_zscore_opp,pts_rank_opp,pts_zscore_opp,fgm_rank_opp,fgm_zscore_opp,fga_rank_opp,fga_zscore_opp,fg_pct_rank_opp,fg_pct_zscore_opp,fg3m_rank_opp,fg3m_zscore_opp,fg3a_rank_opp,fg3a_zscore_opp,fg3_pct_rank_opp,fg3_pct_zscore_opp,ftm_rank_opp,ftm_zscore_opp,fta_rank_opp,fta_zscore_opp,ft_pct_rank_opp,ft_pct_zscore_opp,oreb_rank_opp,oreb_zscore_opp,dreb_rank_opp,dreb_zscore_opp,reb_rank_opp,reb_zscore_opp,ast_rank_opp,ast_zscore_opp,tov_rank_opp,tov_zscore_opp,stl_rank_opp,stl_zscore_opp,blk_rank_opp,blk_zscore_opp,blka_rank_opp,blka_zscore_opp,pf_rank_opp,pf_zscore_opp,pfd_rank_opp,pfd_zscore_opp,p_m_rank_opp,p_m_zscore_opp,offrtg,defrtg,netrtg,ast_pct,ast_v_tov,ast_ratio,oreb_pct,dreb_pct,reb_pct,tov_pct,efg_pct,ts_pct,pace,pie,poss,offrtg_rank,offrtg_zscore,defrtg_rank,defrtg_zscore,netrtg_rank,netrtg_zscore,ast_pct_rank,ast_pct_zscore,ast_v_tov_rank,ast_v_tov_zscore,ast_ratio_rank,ast_ratio_zscore,oreb_pct_rank,oreb_pct_zscore,dreb_pct_rank,dreb_pct_zscore,reb_pct_rank,reb_pct_zscore,tov_pct_rank,tov_pct_zscore,efg_pct_rank,efg_pct_zscore,ts_pct_rank,ts_pct_zscore,pace_rank,pace_zscore,pie_rank,pie_zscore,poss_rank,poss_zscore,offrtg_opp,defrtg_opp,netrtg_opp,ast_pct_opp,ast_v_tov_opp,ast_ratio_opp,oreb_pct_opp,dreb_pct_opp,reb_pct_opp,tov_pct_opp,efg_pct_opp,ts_pct_opp,pace_opp,pie_opp,poss_opp,offrtg_rank_opp,offrtg_zscore_opp,defrtg_rank_opp,defrtg_zscore_opp,netrtg_rank_opp,netrtg_zscore_opp,ast_pct_rank_opp,ast_pct_zscore_opp,ast_v_tov_rank_opp,ast_v_tov_zscore_opp,ast_ratio_rank_opp,ast_ratio_zscore_opp,oreb_pct_rank_opp,oreb_pct_zscore_opp,dreb_pct_rank_opp,dreb_pct_zscore_opp,reb_pct_rank_opp,reb_pct_zscore_opp,tov_pct_rank_opp,tov_pct_zscore_opp,efg_pct_rank_opp,efg_pct_zscore_opp,ts_pct_rank_opp,ts_pct_zscore_opp,...,fga_c3_opp,fg_pct_c3_opp,fgm_atb3_opp,fga_atb3_opp,fg_pct_atb3_opp,fgm_ra_rank_opp,fgm_ra_zscore_opp,fga_ra_rank_opp,fga_ra_zscore_opp,fg_pct_ra_rank_opp,fg_pct_ra_zscore_opp,fgm_paint_rank_opp,fgm_paint_zscore_opp,fga_paint_rank_opp,fga_paint_zscore_opp,fg_pct_paint_rank_opp,fg_pct_paint_zscore_opp,fgm_mr_rank_opp,fgm_mr_zscore_opp,fga_mr_rank_opp,fga_mr_zscore_opp,fg_pct_mr_rank_opp,fg_pct_mr_zscore_opp,fgm_lc3_rank_opp,fgm_lc3_zscore_opp,fga_lc3_rank_opp,fga_lc3_zscore_opp,fg_pct_lc3_rank_opp,fg_pct_lc3_zscore_opp,fgm_rc3_rank_opp,fgm_rc3_zscore_opp,fga_rc3_rank_opp,fga_rc3_zscore_opp,fg_pct_rc3_rank_opp,fg_pct_rc3_zscore_opp,fgm_c3_rank_opp,fgm_c3_zscore_opp,fga_c3_rank_opp,fga_c3_zscore_opp,fg_pct_c3_rank_opp,fg_pct_c3_zscore_opp,fgm_atb3_rank_opp,fgm_atb3_zscore_opp,fga_atb3_rank_opp,fga_atb3_zscore_opp,fg_pct_atb3_rank_opp,fg_pct_atb3_zscore_opp,opp_fgm_ra,opp_fga_ra,opp_fg_pct_ra,opp_fgm_paint,opp_fga_paint,opp_fg_pct_paint,opp_fgm_mr,opp_fga_mr,opp_fg_pct_mr,opp_fgm_lc3,opp_fga_lc3,opp_fg_pct_lc3,opp_fgm_rc3,opp_fga_rc3,opp_fg_pct_rc3,opp_fgm_c3,opp_fga_c3,opp_fg_pct_c3,opp_fgm_atb3,opp_fga_atb3,opp_fg_pct_atb3,opp_fgm_ra_rank,opp_fgm_ra_zscore,opp_fga_ra_rank,opp_fga_ra_zscore,opp_fg_pct_ra_rank,opp_fg_pct_ra_zscore,opp_fgm_paint_rank,opp_fgm_paint_zscore,opp_fga_paint_rank,opp_fga_paint_zscore,opp_fg_pct_paint_rank,opp_fg_pct_paint_zscore,opp_fgm_mr_rank,opp_fgm_mr_zscore,opp_fga_mr_rank,opp_fga_mr_zscore,opp_fg_pct_mr_rank,opp_fg_pct_mr_zscore,opp_fgm_lc3_rank,opp_fgm_lc3_zscore,opp_fga_lc3_rank,opp_fga_lc3_zscore,opp_fg_pct_lc3_rank,opp_fg_pct_lc3_zscore,opp_fgm_rc3_rank,opp_fgm_rc3_zscore,opp_fga_rc3_rank,opp_fga_rc3_zscore,opp_fg_pct_rc3_rank,opp_fg_pct_rc3_zscore,opp_fgm_c3_rank,opp_fgm_c3_zscore,opp_fga_c3_rank,opp_fga_c3_zscore,opp_fg_pct_c3_rank,opp_fg_pct_c3_zscore,opp_fgm_atb3_rank,opp_fgm_atb3_zscore,opp_fga_atb3_rank,opp_fga_atb3_zscore,opp_fg_pct_atb3_rank,opp_fg_pct_atb3_zscore,opp_fgm_ra_opp,opp_fga_ra_opp,opp_fg_pct_ra_opp,opp_fgm_paint_opp,opp_fga_paint_opp,opp_fg_pct_paint_opp,opp_fgm_mr_opp,opp_fga_mr_opp,opp_fg_pct_mr_opp,opp_fgm_lc3_opp,opp_fga_lc3_opp,opp_fg_pct_lc3_opp,opp_fgm_rc3_opp,opp_fga_rc3_opp,opp_fg_pct_rc3_opp,opp_fgm_c3_opp,opp_fga_c3_opp,opp_fg_pct_c3_opp,opp_fgm_atb3_opp,opp_fga_atb3_opp,opp_fg_pct_atb3_opp,opp_fgm_ra_rank_opp,opp_fgm_ra_zscore_opp,opp_fga_ra_rank_opp,opp_fga_ra_zscore_opp,opp_fg_pct_ra_rank_opp,opp_fg_pct_ra_zscore_opp,opp_fgm_paint_rank_opp,opp_fgm_paint_zscore_opp,opp_fga_paint_rank_opp,opp_fga_paint_zscore_opp,opp_fg_pct_paint_rank_opp,opp_fg_pct_paint_zscore_opp,opp_fgm_mr_rank_opp,opp_fgm_mr_zscore_opp,opp_fga_mr_rank_opp,opp_fga_mr_zscore_opp,opp_fg_pct_mr_rank_opp,opp_fg_pct_mr_zscore_opp,opp_fgm_lc3_rank_opp,opp_fgm_lc3_zscore_opp,opp_fga_lc3_rank_opp,opp_fga_lc3_zscore_opp,opp_fg_pct_lc3_rank_opp,opp_fg_pct_lc3_zscore_opp,opp_fgm_rc3_rank_opp,opp_fgm_rc3_zscore_opp,opp_fga_rc3_rank_opp,opp_fga_rc3_zscore_opp,opp_fg_pct_rc3_rank_opp,opp_fg_pct_rc3_zscore_opp,opp_fgm_c3_rank_opp,opp_fgm_c3_zscore_opp,opp_fga_c3_rank_opp,opp_fga_c3_zscore_opp,opp_fg_pct_c3_rank_opp,opp_fg_pct_c3_zscore_opp,opp_fgm_atb3_rank_opp,opp_fgm_atb3_zscore_opp,opp_fga_atb3_rank_opp,opp_fga_atb3_zscore_opp,opp_fg_pct_atb3_rank_opp,opp_fg_pct_atb3_zscore_opp,screen_ast,screen_ast_pts,deflections,off_loose_ball_rec,def_loose_ball_rec,loose_ball_rec,pct_loose_ball_rec_off,pct_loose_ball_rec_def,charges_drawn,contested_2pt,contested_3pt,contested_shots,screen_ast_rank,screen_ast_zscore,screen_ast_pts_rank,screen_ast_pts_zscore,deflections_rank,deflections_zscore,off_loose_ball_rec_rank,off_loose_ball_rec_zscore,def_loose_ball_rec_rank,def_loose_ball_rec_zscore,loose_ball_rec_rank,loose_ball_rec_zscore,pct_loose_ball_rec_off_rank,pct_loose_ball_rec_off_zscore,pct_loose_ball_rec_def_rank,pct_loose_ball_rec_def_zscore,charges_drawn_rank,charges_drawn_zscore,contested_2pt_rank,contested_2pt_zscore,contested_3pt_rank,contested_3pt_zscore,contested_shots_rank,contested_shots_zscore,screen_ast_opp,screen_ast_pts_opp,deflections_opp,off_loose_ball_rec_opp,def_loose_ball_rec_opp,loose_ball_rec_opp,pct_loose_ball_rec_off_opp,pct_loose_ball_rec_def_opp,charges_drawn_opp,contested_2pt_opp,contested_3pt_opp,contested_shots_opp,screen_ast_rank_opp,screen_ast_zscore_opp,screen_ast_pts_rank_opp,screen_ast_pts_zscore_opp,deflections_rank_opp,deflections_zscore_opp,off_loose_ball_rec_rank_opp,off_loose_ball_rec_zscore_opp,def_loose_ball_rec_rank_opp,def_loose_ball_rec_zscore_opp,loose_ball_rec_rank_opp,loose_ball_rec_zscore_opp,pct_loose_ball_rec_off_rank_opp,pct_loose_ball_rec_off_zscore_opp,pct_loose_ball_rec_def_rank_opp,pct_loose_ball_rec_def_zscore_opp,charges_drawn_rank_opp,charges_drawn_zscore_opp,contested_2pt_rank_opp,contested_2pt_zscore_opp,contested_3pt_rank_opp,contested_3pt_zscore_opp,contested_shots_rank_opp,contested_shots_zscore_opp,elo1_pre,elo2_pre,elo_prob1,elo_prob2,day_of_season
0,20141028LALHOU,False,-18.0,14,11,15,,,,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1421.9985,1596.4641,0.39445,0.60555,0
1,20141028NOPORL,True,17.0,19,22,15,,,,-9.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1457.2185,1359.4265,0.75742,0.24258,0
2,20141028SASDAL,False,1.0,26,7,15,,,,-3.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1699.5024,1592.0101,0.76753,0.23247,0
3,20141029SACGSW,False,-18.0,27,10,15,,,,4.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1431.1567,1590.7976,0.415,0.585,1
4,20141029NYKCHI,False,-24.0,20,5,15,,,,4.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1534.4419,1547.2185,0.62295,0.37705,1


## Data Prep Functions

In [8]:
REGULAR_SEASON_DATES_ADJUSTED = {
    "2013-2014": (datetime.date(2013, 11, 12), datetime.date(2014, 4, 2)),
    "2014-2015": (datetime.date(2014, 11, 11), datetime.date(2015, 4, 1)),
    "2015-2016": (datetime.date(2015, 11, 10), datetime.date(2016, 3, 30)),
    "2016-2017": (datetime.date(2016, 11, 8), datetime.date(2017, 3, 29)),
    "2017-2018": (datetime.date(2017, 10, 31), datetime.date(2018, 3, 28)),
    "2018-2019": (datetime.date(2018, 10, 30), datetime.date(2019, 3, 27)),
    "2019-2020": (datetime.date(2019, 11, 5), datetime.date(2020, 2, 26)),
    "2020-2021": (datetime.date(2021, 1, 5), datetime.date(2021, 5, 2)),
    "2021-2022": (datetime.date(2021, 11, 2), datetime.date(2022, 3, 27)),
    "2022-2023": (datetime.date(2022, 11, 1), datetime.date(2023, 3, 26)),
}

In [9]:
def prepare_data_pycaret(
    df,
    model_type,
    feature_types=["main"],
    random_state=17,
    test_size=0.2,
    time_based_split=False,
    print_shape=False,
):
    """
    Preprocesses the given data frame for use in a machine learning model.

    Args:
        df (pandas DataFrame): The data to preprocess.
        model_type (str): The type of model to train, either "CLS" for classification or "REG" for regression.
        feature_types (list of str, optional): The types of features to include in the training data. Can be any combination of "main", "rank", "zscore", "other", or "all". Defaults to ["main"].
        random_state (int, optional): The random state to use for splitting the data into train and test sets. Defaults to 17.
        test_size (float, optional): The proportion of the data to use for the test set when splitting. Defaults to 0.2.
        time_based_split (bool, optional): Whether to split the data into train and test sets based on a time-based split (i.e. training data comes before a certain date and test data comes after that date). Defaults to False.
        print_shape (bool, optional): Whether to print the shapes of the resulting train and test sets. Defaults to False.

    Returns:
        tuple of numpy ndarrays: The resulting train and test sets
    """

    # Restrict Dates to Regular Season minus First and Last 2 Weeks
    df["game_date"] = pd.to_datetime(df["game_id"].str[:8], format="%Y%m%d")
    mask = pd.Series(False, index=df.index)
    for season, (start_date, end_date) in REGULAR_SEASON_DATES_ADJUSTED.items():
        season_mask = (df["game_date"] >= pd.to_datetime(start_date)) & (
            df["game_date"] <= pd.to_datetime(end_date)
        )
        mask |= season_mask
    df = df[mask]

    # Determine the target column
    target = None
    if model_type == "CLS":
        target = "CLS_TARGET_home_margin_GT_home_spread"
    elif model_type == "REG":
        target = "REG_TARGET_actual_home_margin"
    else:
        raise ValueError("model_type must be either 'CLS' or 'REG'")

    # Determine Features to Use
    drop_columns = [
        "fd_line_home",
        "dk_line_home",
        "cover_consensus_home",
        "game_id",
        "game_date",
        "REG_TARGET_actual_home_margin",
        "CLS_TARGET_home_margin_GT_home_spread",
    ]

    # FEATURE OPTIONS (main, rank, zscore, other, all)
    main_features = [
        "home_team_num",
        "away_team_num",
        "home_spread",
        "league_year_end",
        "day_of_season",
        "elo1_pre",
        "elo2_pre",
        "elo_prob1",
        "elo_prob2",
    ]
    rank_features = [feature for feature in list(df) if "rank" in feature]
    zscore_features = [feature for feature in list(df) if "zscore" in feature]
    other_features = [
        feature
        for feature in list(df)
        if feature not in [target] + main_features + drop_columns
    ]
    all_features = main_features + other_features

    features_to_use = []
    if "all" in feature_types:
        features_to_use = all_features
    elif "main" in feature_types:
        features_to_use = main_features
        if "other" in feature_types and (
            "rank" in feature_types or "zscore" in feature_types
        ):
            raise ValueError("other_features can only be added to main features.")
        elif "other" in feature_types:
            features_to_use += other_features
        elif "other" not in feature_types:
            if "rank" in feature_types:
                features_to_use += rank_features
            if "zscore" in feature_types:
                features_to_use += zscore_features
    else:
        raise ValueError(
            "Feature_types must be one of 'all', 'main', 'rank', 'zscore', 'other'."
        )
    df = df[[target] + features_to_use]

    # Convert Data Types
    float_cols = [
        col
        for col in df.columns
        if col
        not in [
            "game_id",
            "CLS_TARGET_home_margin_GT_home_spread",
            "REG_TARGET_actual_home_margin",
        ]
    ]
    df[float_cols] = df[float_cols].astype("float32")

    # Drop NA Values
    df = df.dropna()

    # Split Data
    train_data, test_data = None, None
    if time_based_split:
        test_data = df[df["league_year_end"] == 22]
        train_data = df[df["league_year_end"] < 22]
    else:
        train_data, test_data = train_test_split(
            df,
            test_size=test_size,
            random_state=random_state,
        )

    if print_shape:
        print(f"Train data shape: {train_data.shape}")
        print(f"Test data shape: {test_data.shape}")
    return train_data, test_data

<a id=Regression></a>

## Regression

<a id=setup></a>

### Setup and Preprocessing

In [10]:
train_data_reg, test_data_reg = prepare_data_pycaret(
        df,
        "REG",
        feature_types=["main"],
        random_state=17,
        test_size=0.2,
        time_based_split=True,
        print_shape=True,
    )

Train data shape: (6742, 10)
Test data shape: (1016, 10)


The setup process involves a lot of options. Reference the docs below:   
https://pycaret.readthedocs.io/en/latest/api/regression.html#module-pycaret.regression

In [11]:
setup_params = {'log_experiment': True,
                'log_profile': False,
                'log_plots': False,
                'experiment_name': 'NBA_Betting_REG_Main_Features',
                'data': train_data_reg,
                'test_data': test_data_reg,
                'target': 'REG_TARGET_actual_home_margin',
                'preprocess': False,
                'normalize': False,        # zscore
                'transformation': False,   # yeo-johnson power transform to make data more Gaussian
                'remove_outliers': False,  # using SVD
                'remove_multicollinearity': False,
                'feature_selection': False,
                'pca': False,
                'pca_components': 10,
                'numeric_features': [],
                'ignore_features': []
               }

In [12]:
nba_betting_regression = py_reg.setup(**setup_params)

Unnamed: 0,Description,Value
0,Session id,8600
1,Target,REG_TARGET_actual_home_margin
2,Target type,Regression
3,Original data shape,"(7758, 10)"
4,Transformed data shape,"(7758, 10)"
5,Transformed train set shape,"(6742, 10)"
6,Transformed test set shape,"(1016, 10)"
7,Numeric features,9


<a id=compare></a>

### Compare Models

In [13]:
best_3_models = py_reg.compare_models(n_select=3)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
ridge,Ridge Regression,9.6916,154.1301,12.3988,0.2061,0.9914,1.148,0.013
lr,Linear Regression,9.6932,154.1438,12.3994,0.206,0.993,1.1474,0.257
lasso,Lasso Regression,9.684,154.1763,12.4001,0.206,1.0026,1.1409,0.013
en,Elastic Net,9.6841,154.166,12.3997,0.206,1.002,1.1422,0.012
llar,Lasso Least Angle Regression,9.684,154.1769,12.4002,0.206,1.0027,1.1409,0.013
br,Bayesian Ridge,9.6829,154.1798,12.4004,0.2059,0.9852,1.1514,0.013
lar,Least Angle Regression,9.6948,154.1813,12.4009,0.2058,0.9913,1.1492,0.014
huber,Huber Regressor,9.712,154.9394,12.4287,0.2025,0.9893,1.1605,0.019
gbr,Gradient Boosting Regressor,9.799,156.5971,12.4992,0.1931,1.0721,1.1526,0.672
ada,AdaBoost Regressor,9.8202,157.511,12.5346,0.1885,1.0985,1.1374,0.167


Processing:   0%|          | 0/83 [00:00<?, ?it/s]

<a id=create></a>

### Create Selected Model

In [14]:
model = py_reg.create_model('lr')

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,9.3694,144.5383,12.0224,0.2139,0.9219,1.1889
1,9.0979,132.1788,11.4969,0.2414,0.9717,1.0727
2,9.223,137.5587,11.7285,0.2415,0.9601,1.1042
3,9.5327,148.7009,12.1943,0.2056,1.0027,1.1561
4,9.819,158.1365,12.5752,0.1681,0.9847,1.1605
5,9.2775,140.6305,11.8588,0.2066,0.9834,1.1486
6,9.9615,167.7876,12.9533,0.2067,1.0226,1.1559
7,9.668,158.3711,12.5846,0.2097,1.0064,1.2214
8,10.2252,166.7299,12.9124,0.1692,1.0027,1.1386
9,10.7579,186.8056,13.6677,0.1977,1.0735,1.1275


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

<a id=tune></a>

### Tune Selected Model

In [15]:
tuned_model = py_reg.tune_model(model)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,9.3694,144.5383,12.0224,0.2139,0.9219,1.1889
1,9.0979,132.1788,11.4969,0.2414,0.9717,1.0727
2,9.223,137.5587,11.7285,0.2415,0.9601,1.1042
3,9.5327,148.7009,12.1943,0.2056,1.0027,1.1561
4,9.819,158.1365,12.5752,0.1681,0.9847,1.1605
5,9.2775,140.6305,11.8588,0.2066,0.9834,1.1486
6,9.9615,167.7876,12.9533,0.2067,1.0226,1.1559
7,9.668,158.3711,12.5846,0.2097,1.0064,1.2214
8,10.2252,166.7299,12.9124,0.1692,1.0027,1.1386
9,10.7579,186.8056,13.6677,0.1977,1.0735,1.1275


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 2 candidates, totalling 20 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


<a id=evaluate></a>

### Evaluate Model

https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.evaluate_model

In [16]:
py_reg.evaluate_model(tuned_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.interpret_model

In [17]:
# py_reg.interpret_model(tuned_model)

<a id=finalize_and_store></a>

### Model Finalization and Storage

In [18]:
final_model = py_reg.finalize_model(tuned_model)

In [19]:
# py_reg.save_model(final_model, '../models/AutoML/vlastd_Rank_Lasso_Reg_PyCaret')

<a id=Classification></a>

## Classification

<a id=setup_cls></a>

### Setup and Preprocessing

In [20]:
train_data_cls, test_data_cls = prepare_data_pycaret(
        df,
        "CLS",
        feature_types=["main"],
        random_state=17,
        test_size=0.2,
        time_based_split=True,
        print_shape=True,
    )

Train data shape: (6742, 10)
Test data shape: (1016, 10)


The setup process involves a lot of options. Reference the docs below:   
https://pycaret.readthedocs.io/en/latest/api/regression.html#module-pycaret.regression

In [21]:
setup_params = {'log_experiment': True,
                'log_profile': False,
                'log_plots': False,
                'experiment_name': 'NBA_Betting_CLS_Main_Features',
                'data': train_data_cls,
                'test_data': test_data_cls,
                'target': 'CLS_TARGET_home_margin_GT_home_spread',
                'preprocess': False,
                'normalize': False,        # zscore
                'transformation': False,   # yeo-johnson power transform to make data more Gaussian
                'remove_outliers': False,  # using SVD
                'remove_multicollinearity': False,
                'polynomial_features': False,
                'feature_selection': False,
                'pca': False,
                'pca_components': 10,
                'numeric_features': [],
                'ignore_features': []
               }

In [22]:
nba_betting_classification = py_cls.setup(**setup_params)

Unnamed: 0,Description,Value
0,Session id,8742
1,Target,CLS_TARGET_home_margin_GT_home_spread
2,Target type,Binary
3,Original data shape,"(7758, 10)"
4,Transformed data shape,"(7758, 10)"
5,Transformed train set shape,"(6742, 10)"
6,Transformed test set shape,"(1016, 10)"
7,Numeric features,9


<a id=compare_cls></a>

### Compare Models

In [23]:
best_3_models = py_cls.compare_models(n_select=3)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.5113,0.5,0.0,0.0,0.0,0.0,0.0,0.012
et,Extra Trees Classifier,0.5099,0.5042,0.4352,0.5106,0.4414,0.0165,0.0207,0.537
ridge,Ridge Classifier,0.5085,0.0,0.2883,0.4926,0.356,0.0072,0.0072,0.016
lda,Linear Discriminant Analysis,0.5083,0.5005,0.2877,0.4919,0.3554,0.0069,0.0068,0.02
nb,Naive Bayes,0.505,0.5024,0.2795,0.4905,0.3531,0.0002,0.0008,0.015
lr,Logistic Regression,0.5007,0.482,0.144,0.4642,0.1836,-0.0145,-0.0191,0.027
ada,Ada Boost Classifier,0.499,0.4995,0.3815,0.4787,0.4171,-0.0073,-0.0094,0.216
rf,Random Forest Classifier,0.4973,0.4957,0.4352,0.4825,0.4529,-0.0082,-0.0088,0.849
lightgbm,Light Gradient Boosting Machine,0.4972,0.4894,0.3802,0.5006,0.396,-0.0109,-0.0088,0.145
svm,SVM - Linear Kernel,0.496,0.0,0.5351,0.2902,0.372,-0.0066,-0.0064,0.065


Processing:   0%|          | 0/67 [00:00<?, ?it/s]

<a id=create_cls></a>

### Create Selected Model

In [None]:
model = py_cls.create_model('lightgbm')

<a id=tune_cls></a>

### Tune Selected Model

In [None]:
tuned_model = py_cls.tune_model(model)

<a id=evaluate_cls></a>

### Evaluate Model

https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.evaluate_model

In [None]:
py_cls.evaluate_model(tuned_model)

https://pycaret.readthedocs.io/en/latest/api/regression.html#pycaret.regression.interpret_model

In [None]:
# py_cls.interpret_model(tuned_model)

<a id=finalize_and_store_cls></a>

### Model Finalization and Storage

In [None]:
final_model = py_cls.finalize_model(tuned_model)

In [None]:
# py_cls.save_model(final_model, '../models/AutoML/vlastd_rank_LR_CLS_PyCaret')