This notebook takes the hybrid approach as per **Hybrid Basketball Game Outcome Prediction Model by
Integrating Data Mining Methods for the National
Basketball Association**, using a dataframe with the last 11 seasons with team's averages, no rival averages present or league-wide ranks, the dataframe used when training a Random Forest, a Decision Tree and XGBoost on both regression and classification (points and win/loss) has, for each statistic, an individual lag of 1-4 and an overall lag of the last 4 games. The features have no normalization.

In [1]:
import pandas as pd
pd.options.display.max_columns = None

games = pd.read_csv('/kaggle/input/trulylastelevenseasons/games_from_last_eleven_seasons_with_averages (1).csv')
games.head()

Unnamed: 0.1,Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Season,AVG_MIN,AVG_FGM,AVG_FGA,AVG_FG_PCT,AVG_FG3M,AVG_FG3A,AVG_FG3_PCT,AVG_FTM,AVG_FTA,AVG_FT_PCT,AVG_OREB,AVG_DREB,AVG_REB,AVG_AST,AVG_STL,AVG_BLK,AVG_TOV,AVG_PF,AVG_PTS
0,0,1610612737,21401217,2015-04-15,ATL @ CHI,L,60,22,0.732,240,36,92,0.391,8,29,0.276,5,7,0.714,10,32,42,15,10,10,15,19,85,2014-15,240.617284,38.08642,81.567901,0.468123,10.0,26.209877,0.381432,16.592593,21.333333,0.777284,8.703704,31.839506,40.54321,25.876543,9.061728,4.567901,13.432099,17.753086,102.765432
1,1,1610612737,21401205,2015-04-13,ATL vs. NYK,L,60,21,0.741,240,39,88,0.443,11,32,0.344,19,23,0.826,12,25,37,27,10,3,8,17,108,2014-15,240.625,38.075,81.4875,0.468438,9.9875,26.1375,0.3819,16.5625,21.3125,0.776675,8.6625,31.925,40.5875,25.8625,9.05,4.5875,13.5,17.7625,102.7
2,2,1610612737,21401198,2015-04-12,ATL @ WAS,L,60,20,0.75,240,36,87,0.414,9,29,0.31,18,24,0.75,7,31,38,24,10,2,11,22,99,2014-15,240.632911,38.101266,81.417722,0.469127,10.0,26.101266,0.38281,16.544304,21.278481,0.777013,8.683544,31.936709,40.620253,25.886076,9.037975,4.620253,13.531646,17.708861,102.746835
3,3,1610612737,21401176,2015-04-10,ATL vs. CHA,W,60,19,0.759,240,41,82,0.5,10,28,0.357,12,16,0.75,9,35,44,31,10,4,13,18,104,2014-15,240.641026,38.064103,81.410256,0.468731,10.0,26.076923,0.383141,16.602564,21.346154,0.777359,8.679487,31.897436,40.576923,25.820513,9.025641,4.628205,13.538462,17.705128,102.730769
4,4,1610612737,21401163,2015-04-08,ATL @ BKN,W,59,19,0.756,240,43,86,0.5,11,28,0.393,17,25,0.68,10,30,40,35,11,4,11,18,114,2014-15,240.649351,38.0,81.350649,0.468325,9.987013,26.051948,0.383013,16.597403,21.298701,0.778623,8.662338,31.922078,40.584416,25.701299,9.0,4.636364,13.571429,17.701299,102.584416


In [2]:
# Let's calculate 2P%
def calc_2PCT(df):
    df['2PM'] = df['FGM'] - df['FG3M']
    df['2PA'] = df['FGA'] - df['FG3A']
    df['2P_PCT'] = df['2PM'] / df['2PA']

def matchup(matchup: str):
    if '@' in matchup:
        return matchup[6:]
    else:
        return matchup[7:]

calc_2PCT(games)
games['Team'] = games['MATCHUP'].astype(str).str[:3]
games['Rival'] = games['MATCHUP'].apply(matchup)
games['Home'] = games['MATCHUP'].apply(lambda x: True if 'vs' in x else False)
games.drop(columns=['MATCHUP', 'Unnamed: 0'], inplace=True)
games.head()

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,WL,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Season,AVG_MIN,AVG_FGM,AVG_FGA,AVG_FG_PCT,AVG_FG3M,AVG_FG3A,AVG_FG3_PCT,AVG_FTM,AVG_FTA,AVG_FT_PCT,AVG_OREB,AVG_DREB,AVG_REB,AVG_AST,AVG_STL,AVG_BLK,AVG_TOV,AVG_PF,AVG_PTS,2PM,2PA,2P_PCT,Team,Rival,Home
0,1610612737,21401217,2015-04-15,L,60,22,0.732,240,36,92,0.391,8,29,0.276,5,7,0.714,10,32,42,15,10,10,15,19,85,2014-15,240.617284,38.08642,81.567901,0.468123,10.0,26.209877,0.381432,16.592593,21.333333,0.777284,8.703704,31.839506,40.54321,25.876543,9.061728,4.567901,13.432099,17.753086,102.765432,28,63,0.444444,ATL,CHI,False
1,1610612737,21401205,2015-04-13,L,60,21,0.741,240,39,88,0.443,11,32,0.344,19,23,0.826,12,25,37,27,10,3,8,17,108,2014-15,240.625,38.075,81.4875,0.468438,9.9875,26.1375,0.3819,16.5625,21.3125,0.776675,8.6625,31.925,40.5875,25.8625,9.05,4.5875,13.5,17.7625,102.7,28,56,0.5,ATL,NYK,True
2,1610612737,21401198,2015-04-12,L,60,20,0.75,240,36,87,0.414,9,29,0.31,18,24,0.75,7,31,38,24,10,2,11,22,99,2014-15,240.632911,38.101266,81.417722,0.469127,10.0,26.101266,0.38281,16.544304,21.278481,0.777013,8.683544,31.936709,40.620253,25.886076,9.037975,4.620253,13.531646,17.708861,102.746835,27,58,0.465517,ATL,WAS,False
3,1610612737,21401176,2015-04-10,W,60,19,0.759,240,41,82,0.5,10,28,0.357,12,16,0.75,9,35,44,31,10,4,13,18,104,2014-15,240.641026,38.064103,81.410256,0.468731,10.0,26.076923,0.383141,16.602564,21.346154,0.777359,8.679487,31.897436,40.576923,25.820513,9.025641,4.628205,13.538462,17.705128,102.730769,31,54,0.574074,ATL,CHA,True
4,1610612737,21401163,2015-04-08,W,59,19,0.756,240,43,86,0.5,11,28,0.393,17,25,0.68,10,30,40,35,11,4,11,18,114,2014-15,240.649351,38.0,81.350649,0.468325,9.987013,26.051948,0.383013,16.597403,21.298701,0.778623,8.662338,31.922078,40.584416,25.701299,9.0,4.636364,13.571429,17.701299,102.584416,32,58,0.551724,ATL,BKN,False


In [3]:
def lagged(df, stats):
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
    df = df.sort_values(by=['Team_ID', 'GAME_DATE'])
    lags = 4
    for stat in stats:
        for lag in range(1, lags + 1):
            df[f"{stat}_lag{lag}"] = df.groupby('Team_ID')[stat].shift(lag)
    df = df.dropna()
    for stat in stats:
        df[f"lagged_{stat}"] = (df[f"{stat}_lag1"] + df[f"{stat}_lag2"] + df[f"{stat}_lag3"] + df[f"{stat}_lag4"]) / 4
    return df
games = lagged(games, ['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FT_PCT', 'FTM', 'FTA', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', '2P_PCT'])
games.head()

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,WL,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Season,AVG_MIN,AVG_FGM,AVG_FGA,AVG_FG_PCT,AVG_FG3M,AVG_FG3A,AVG_FG3_PCT,AVG_FTM,AVG_FTA,AVG_FT_PCT,AVG_OREB,AVG_DREB,AVG_REB,AVG_AST,AVG_STL,AVG_BLK,AVG_TOV,AVG_PF,AVG_PTS,2PM,2PA,2P_PCT,Team,Rival,Home,FGM_lag1,FGM_lag2,FGM_lag3,FGM_lag4,FGA_lag1,FGA_lag2,FGA_lag3,FGA_lag4,FG_PCT_lag1,FG_PCT_lag2,FG_PCT_lag3,FG_PCT_lag4,FG3M_lag1,FG3M_lag2,FG3M_lag3,FG3M_lag4,FG3A_lag1,FG3A_lag2,FG3A_lag3,FG3A_lag4,FG3_PCT_lag1,FG3_PCT_lag2,FG3_PCT_lag3,FG3_PCT_lag4,FT_PCT_lag1,FT_PCT_lag2,FT_PCT_lag3,FT_PCT_lag4,FTM_lag1,FTM_lag2,FTM_lag3,FTM_lag4,FTA_lag1,FTA_lag2,FTA_lag3,FTA_lag4,OREB_lag1,OREB_lag2,OREB_lag3,OREB_lag4,DREB_lag1,DREB_lag2,DREB_lag3,DREB_lag4,REB_lag1,REB_lag2,REB_lag3,REB_lag4,AST_lag1,AST_lag2,AST_lag3,AST_lag4,STL_lag1,STL_lag2,STL_lag3,STL_lag4,BLK_lag1,BLK_lag2,BLK_lag3,BLK_lag4,TOV_lag1,TOV_lag2,TOV_lag3,TOV_lag4,PF_lag1,PF_lag2,PF_lag3,PF_lag4,2P_PCT_lag1,2P_PCT_lag2,2P_PCT_lag3,2P_PCT_lag4,lagged_FGM,lagged_FGA,lagged_FG_PCT,lagged_FG3M,lagged_FG3A,lagged_FG3_PCT,lagged_FT_PCT,lagged_FTM,lagged_FTA,lagged_OREB,lagged_DREB,lagged_REB,lagged_AST,lagged_STL,lagged_BLK,lagged_TOV,lagged_PF,lagged_2P_PCT
77,1610612737,21400084,2014-11-08,W,2,3,0.4,240,33,81,0.407,9,22,0.409,28,36,0.778,12,29,41,18,10,5,8,17,103,2014-15,252.5,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,103.75,24,59,0.40678,ATL,NYK,True,43.0,38.0,35.0,40.0,93.0,92.0,69.0,80.0,0.462,0.413,0.507,0.5,13.0,8.0,7.0,13.0,33.0,25.0,20.0,22.0,0.394,0.32,0.35,0.591,0.769,0.727,0.758,0.529,20.0,8.0,25.0,9.0,26.0,11.0,33.0,17.0,7.0,10.0,3.0,10.0,31.0,27.0,34.0,32.0,38.0,37.0,37.0,42.0,28.0,26.0,26.0,26.0,8.0,14.0,10.0,6.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,24.0,0.5,0.447761,0.571429,0.465517,39.0,83.5,0.4705,10.25,25.0,0.41375,0.69575,15.5,21.75,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,0.496177
76,1610612737,21400100,2014-11-10,W,3,3,0.5,240,27,71,0.38,10,27,0.37,27,28,0.964,9,29,38,20,7,3,15,16,91,2014-15,250.0,37.8,83.0,0.4578,10.0,24.4,0.4128,18.0,24.6,0.7122,8.4,30.6,39.0,24.8,9.6,5.4,13.8,23.8,103.6,17,44,0.386364,ATL,NYK,False,33.0,43.0,38.0,35.0,81.0,93.0,92.0,69.0,0.407,0.462,0.413,0.507,9.0,13.0,8.0,7.0,22.0,33.0,25.0,20.0,0.409,0.394,0.32,0.35,0.778,0.769,0.727,0.758,28.0,20.0,8.0,25.0,36.0,26.0,11.0,33.0,12.0,7.0,10.0,3.0,29.0,31.0,27.0,34.0,41.0,38.0,37.0,37.0,18.0,28.0,26.0,26.0,10.0,8.0,14.0,10.0,5.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,0.40678,0.5,0.447761,0.571429,37.25,83.75,0.44725,9.25,25.0,0.36825,0.758,20.25,26.5,8.0,30.25,38.25,24.5,10.5,4.75,13.0,23.75,0.481492
75,1610612737,21400110,2014-11-12,W,4,3,0.571,240,39,76,0.513,9,20,0.45,13,18,0.722,13,33,46,23,8,4,18,12,100,2014-15,248.333333,36.0,81.0,0.444833,10.0,24.833333,0.405667,19.5,25.166667,0.754167,8.5,30.333333,38.833333,24.0,9.166667,5.0,14.0,22.5,101.5,30,56,0.535714,ATL,UTA,True,27.0,33.0,43.0,38.0,71.0,81.0,93.0,92.0,0.38,0.407,0.462,0.413,10.0,9.0,13.0,8.0,27.0,22.0,33.0,25.0,0.37,0.409,0.394,0.32,0.964,0.778,0.769,0.727,27.0,28.0,20.0,8.0,28.0,36.0,26.0,11.0,9.0,12.0,7.0,10.0,29.0,29.0,31.0,27.0,38.0,41.0,38.0,37.0,20.0,18.0,28.0,26.0,7.0,10.0,8.0,14.0,3.0,5.0,3.0,5.0,15.0,8.0,19.0,13.0,16.0,17.0,33.0,25.0,0.386364,0.40678,0.5,0.447761,35.25,84.25,0.4155,10.0,26.75,0.37325,0.8095,20.75,25.25,9.5,29.0,38.5,23.0,9.75,4.0,13.75,22.75,0.435226
74,1610612737,21400124,2014-11-14,W,5,3,0.625,240,42,75,0.56,11,28,0.393,19,23,0.826,3,33,36,33,10,5,13,20,114,2014-15,247.142857,36.428571,80.285714,0.454571,9.857143,24.142857,0.412,18.571429,24.142857,0.749571,9.142857,30.714286,39.857143,23.857143,9.0,4.857143,14.571429,21.0,101.285714,31,47,0.659574,ATL,MIA,True,39.0,27.0,33.0,43.0,76.0,71.0,81.0,93.0,0.513,0.38,0.407,0.462,9.0,10.0,9.0,13.0,20.0,27.0,22.0,33.0,0.45,0.37,0.409,0.394,0.722,0.964,0.778,0.769,13.0,27.0,28.0,20.0,18.0,28.0,36.0,26.0,13.0,9.0,12.0,7.0,33.0,29.0,29.0,31.0,46.0,38.0,41.0,38.0,23.0,20.0,18.0,28.0,8.0,7.0,10.0,8.0,4.0,3.0,5.0,3.0,18.0,15.0,8.0,19.0,12.0,16.0,17.0,33.0,0.535714,0.386364,0.40678,0.5,35.5,80.25,0.4405,10.25,25.5,0.40575,0.80825,22.0,27.0,10.25,30.5,40.75,22.25,8.25,3.75,15.0,19.5,0.457214
73,1610612737,21400133,2014-11-15,L,5,4,0.556,240,40,90,0.444,3,22,0.136,11,13,0.846,11,26,37,26,6,8,18,12,94,2014-15,246.25,37.125,79.625,0.46775,10.0,24.625,0.409625,18.625,24.0,0.759125,8.375,31.0,39.375,25.0,9.125,4.875,14.375,20.875,102.875,37,68,0.544118,ATL,CLE,False,42.0,39.0,27.0,33.0,75.0,76.0,71.0,81.0,0.56,0.513,0.38,0.407,11.0,9.0,10.0,9.0,28.0,20.0,27.0,22.0,0.393,0.45,0.37,0.409,0.826,0.722,0.964,0.778,19.0,13.0,27.0,28.0,23.0,18.0,28.0,36.0,3.0,13.0,9.0,12.0,33.0,33.0,29.0,29.0,36.0,46.0,38.0,41.0,33.0,23.0,20.0,18.0,10.0,8.0,7.0,10.0,5.0,4.0,3.0,5.0,13.0,18.0,15.0,8.0,20.0,12.0,16.0,17.0,0.659574,0.535714,0.386364,0.40678,35.25,75.75,0.465,9.75,24.25,0.4055,0.8225,21.75,26.25,9.25,31.0,40.25,23.5,8.75,4.25,13.5,16.25,0.497108


In [4]:
games['Year'] = games['GAME_DATE'].dt.year
games['Month'] = games['GAME_DATE'].dt.month
games['Day'] = games['GAME_DATE'].dt.day
games.head()

  games['Year'] = games['GAME_DATE'].dt.year
  games['Month'] = games['GAME_DATE'].dt.month
  games['Day'] = games['GAME_DATE'].dt.day


Unnamed: 0,Team_ID,Game_ID,GAME_DATE,WL,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Season,AVG_MIN,AVG_FGM,AVG_FGA,AVG_FG_PCT,AVG_FG3M,AVG_FG3A,AVG_FG3_PCT,AVG_FTM,AVG_FTA,AVG_FT_PCT,AVG_OREB,AVG_DREB,AVG_REB,AVG_AST,AVG_STL,AVG_BLK,AVG_TOV,AVG_PF,AVG_PTS,2PM,2PA,2P_PCT,Team,Rival,Home,FGM_lag1,FGM_lag2,FGM_lag3,FGM_lag4,FGA_lag1,FGA_lag2,FGA_lag3,FGA_lag4,FG_PCT_lag1,FG_PCT_lag2,FG_PCT_lag3,FG_PCT_lag4,FG3M_lag1,FG3M_lag2,FG3M_lag3,FG3M_lag4,FG3A_lag1,FG3A_lag2,FG3A_lag3,FG3A_lag4,FG3_PCT_lag1,FG3_PCT_lag2,FG3_PCT_lag3,FG3_PCT_lag4,FT_PCT_lag1,FT_PCT_lag2,FT_PCT_lag3,FT_PCT_lag4,FTM_lag1,FTM_lag2,FTM_lag3,FTM_lag4,FTA_lag1,FTA_lag2,FTA_lag3,FTA_lag4,OREB_lag1,OREB_lag2,OREB_lag3,OREB_lag4,DREB_lag1,DREB_lag2,DREB_lag3,DREB_lag4,REB_lag1,REB_lag2,REB_lag3,REB_lag4,AST_lag1,AST_lag2,AST_lag3,AST_lag4,STL_lag1,STL_lag2,STL_lag3,STL_lag4,BLK_lag1,BLK_lag2,BLK_lag3,BLK_lag4,TOV_lag1,TOV_lag2,TOV_lag3,TOV_lag4,PF_lag1,PF_lag2,PF_lag3,PF_lag4,2P_PCT_lag1,2P_PCT_lag2,2P_PCT_lag3,2P_PCT_lag4,lagged_FGM,lagged_FGA,lagged_FG_PCT,lagged_FG3M,lagged_FG3A,lagged_FG3_PCT,lagged_FT_PCT,lagged_FTM,lagged_FTA,lagged_OREB,lagged_DREB,lagged_REB,lagged_AST,lagged_STL,lagged_BLK,lagged_TOV,lagged_PF,lagged_2P_PCT,Year,Month,Day
77,1610612737,21400084,2014-11-08,W,2,3,0.4,240,33,81,0.407,9,22,0.409,28,36,0.778,12,29,41,18,10,5,8,17,103,2014-15,252.5,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,103.75,24,59,0.40678,ATL,NYK,True,43.0,38.0,35.0,40.0,93.0,92.0,69.0,80.0,0.462,0.413,0.507,0.5,13.0,8.0,7.0,13.0,33.0,25.0,20.0,22.0,0.394,0.32,0.35,0.591,0.769,0.727,0.758,0.529,20.0,8.0,25.0,9.0,26.0,11.0,33.0,17.0,7.0,10.0,3.0,10.0,31.0,27.0,34.0,32.0,38.0,37.0,37.0,42.0,28.0,26.0,26.0,26.0,8.0,14.0,10.0,6.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,24.0,0.5,0.447761,0.571429,0.465517,39.0,83.5,0.4705,10.25,25.0,0.41375,0.69575,15.5,21.75,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,0.496177,2014,11,8
76,1610612737,21400100,2014-11-10,W,3,3,0.5,240,27,71,0.38,10,27,0.37,27,28,0.964,9,29,38,20,7,3,15,16,91,2014-15,250.0,37.8,83.0,0.4578,10.0,24.4,0.4128,18.0,24.6,0.7122,8.4,30.6,39.0,24.8,9.6,5.4,13.8,23.8,103.6,17,44,0.386364,ATL,NYK,False,33.0,43.0,38.0,35.0,81.0,93.0,92.0,69.0,0.407,0.462,0.413,0.507,9.0,13.0,8.0,7.0,22.0,33.0,25.0,20.0,0.409,0.394,0.32,0.35,0.778,0.769,0.727,0.758,28.0,20.0,8.0,25.0,36.0,26.0,11.0,33.0,12.0,7.0,10.0,3.0,29.0,31.0,27.0,34.0,41.0,38.0,37.0,37.0,18.0,28.0,26.0,26.0,10.0,8.0,14.0,10.0,5.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,0.40678,0.5,0.447761,0.571429,37.25,83.75,0.44725,9.25,25.0,0.36825,0.758,20.25,26.5,8.0,30.25,38.25,24.5,10.5,4.75,13.0,23.75,0.481492,2014,11,10
75,1610612737,21400110,2014-11-12,W,4,3,0.571,240,39,76,0.513,9,20,0.45,13,18,0.722,13,33,46,23,8,4,18,12,100,2014-15,248.333333,36.0,81.0,0.444833,10.0,24.833333,0.405667,19.5,25.166667,0.754167,8.5,30.333333,38.833333,24.0,9.166667,5.0,14.0,22.5,101.5,30,56,0.535714,ATL,UTA,True,27.0,33.0,43.0,38.0,71.0,81.0,93.0,92.0,0.38,0.407,0.462,0.413,10.0,9.0,13.0,8.0,27.0,22.0,33.0,25.0,0.37,0.409,0.394,0.32,0.964,0.778,0.769,0.727,27.0,28.0,20.0,8.0,28.0,36.0,26.0,11.0,9.0,12.0,7.0,10.0,29.0,29.0,31.0,27.0,38.0,41.0,38.0,37.0,20.0,18.0,28.0,26.0,7.0,10.0,8.0,14.0,3.0,5.0,3.0,5.0,15.0,8.0,19.0,13.0,16.0,17.0,33.0,25.0,0.386364,0.40678,0.5,0.447761,35.25,84.25,0.4155,10.0,26.75,0.37325,0.8095,20.75,25.25,9.5,29.0,38.5,23.0,9.75,4.0,13.75,22.75,0.435226,2014,11,12
74,1610612737,21400124,2014-11-14,W,5,3,0.625,240,42,75,0.56,11,28,0.393,19,23,0.826,3,33,36,33,10,5,13,20,114,2014-15,247.142857,36.428571,80.285714,0.454571,9.857143,24.142857,0.412,18.571429,24.142857,0.749571,9.142857,30.714286,39.857143,23.857143,9.0,4.857143,14.571429,21.0,101.285714,31,47,0.659574,ATL,MIA,True,39.0,27.0,33.0,43.0,76.0,71.0,81.0,93.0,0.513,0.38,0.407,0.462,9.0,10.0,9.0,13.0,20.0,27.0,22.0,33.0,0.45,0.37,0.409,0.394,0.722,0.964,0.778,0.769,13.0,27.0,28.0,20.0,18.0,28.0,36.0,26.0,13.0,9.0,12.0,7.0,33.0,29.0,29.0,31.0,46.0,38.0,41.0,38.0,23.0,20.0,18.0,28.0,8.0,7.0,10.0,8.0,4.0,3.0,5.0,3.0,18.0,15.0,8.0,19.0,12.0,16.0,17.0,33.0,0.535714,0.386364,0.40678,0.5,35.5,80.25,0.4405,10.25,25.5,0.40575,0.80825,22.0,27.0,10.25,30.5,40.75,22.25,8.25,3.75,15.0,19.5,0.457214,2014,11,14
73,1610612737,21400133,2014-11-15,L,5,4,0.556,240,40,90,0.444,3,22,0.136,11,13,0.846,11,26,37,26,6,8,18,12,94,2014-15,246.25,37.125,79.625,0.46775,10.0,24.625,0.409625,18.625,24.0,0.759125,8.375,31.0,39.375,25.0,9.125,4.875,14.375,20.875,102.875,37,68,0.544118,ATL,CLE,False,42.0,39.0,27.0,33.0,75.0,76.0,71.0,81.0,0.56,0.513,0.38,0.407,11.0,9.0,10.0,9.0,28.0,20.0,27.0,22.0,0.393,0.45,0.37,0.409,0.826,0.722,0.964,0.778,19.0,13.0,27.0,28.0,23.0,18.0,28.0,36.0,3.0,13.0,9.0,12.0,33.0,33.0,29.0,29.0,36.0,46.0,38.0,41.0,33.0,23.0,20.0,18.0,10.0,8.0,7.0,10.0,5.0,4.0,3.0,5.0,13.0,18.0,15.0,8.0,20.0,12.0,16.0,17.0,0.659574,0.535714,0.386364,0.40678,35.25,75.75,0.465,9.75,24.25,0.4055,0.8225,21.75,26.25,9.25,31.0,40.25,23.5,8.75,4.25,13.5,16.25,0.497108,2014,11,15


**MIN-MAX NORMALIZATION**

In [5]:
# games.drop(['Team_ID', 'GAME_DATE'], inplace=True, axis=1)
# vec = games.columns
# vec = vec.to_list()
# vec.remove('PTS')
# for item in vec:
#     big, small = games[item].max(), games[item].min()
#     games[item] = (games[item] - small) / (big - small)

In [6]:
df = games.drop(columns=[
    'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 
    'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 
    'STL', 'BLK', 'TOV', 'PF', 'Season', 'GAME_DATE', 'Team_ID', 'Game_ID',
])
df.head()

Unnamed: 0,WL,PTS,AVG_MIN,AVG_FGM,AVG_FGA,AVG_FG_PCT,AVG_FG3M,AVG_FG3A,AVG_FG3_PCT,AVG_FTM,AVG_FTA,AVG_FT_PCT,AVG_OREB,AVG_DREB,AVG_REB,AVG_AST,AVG_STL,AVG_BLK,AVG_TOV,AVG_PF,AVG_PTS,2PM,2PA,2P_PCT,Team,Rival,Home,FGM_lag1,FGM_lag2,FGM_lag3,FGM_lag4,FGA_lag1,FGA_lag2,FGA_lag3,FGA_lag4,FG_PCT_lag1,FG_PCT_lag2,FG_PCT_lag3,FG_PCT_lag4,FG3M_lag1,FG3M_lag2,FG3M_lag3,FG3M_lag4,FG3A_lag1,FG3A_lag2,FG3A_lag3,FG3A_lag4,FG3_PCT_lag1,FG3_PCT_lag2,FG3_PCT_lag3,FG3_PCT_lag4,FT_PCT_lag1,FT_PCT_lag2,FT_PCT_lag3,FT_PCT_lag4,FTM_lag1,FTM_lag2,FTM_lag3,FTM_lag4,FTA_lag1,FTA_lag2,FTA_lag3,FTA_lag4,OREB_lag1,OREB_lag2,OREB_lag3,OREB_lag4,DREB_lag1,DREB_lag2,DREB_lag3,DREB_lag4,REB_lag1,REB_lag2,REB_lag3,REB_lag4,AST_lag1,AST_lag2,AST_lag3,AST_lag4,STL_lag1,STL_lag2,STL_lag3,STL_lag4,BLK_lag1,BLK_lag2,BLK_lag3,BLK_lag4,TOV_lag1,TOV_lag2,TOV_lag3,TOV_lag4,PF_lag1,PF_lag2,PF_lag3,PF_lag4,2P_PCT_lag1,2P_PCT_lag2,2P_PCT_lag3,2P_PCT_lag4,lagged_FGM,lagged_FGA,lagged_FG_PCT,lagged_FG3M,lagged_FG3A,lagged_FG3_PCT,lagged_FT_PCT,lagged_FTM,lagged_FTA,lagged_OREB,lagged_DREB,lagged_REB,lagged_AST,lagged_STL,lagged_BLK,lagged_TOV,lagged_PF,lagged_2P_PCT,Year,Month,Day
77,W,103,252.5,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,103.75,24,59,0.40678,ATL,NYK,True,43.0,38.0,35.0,40.0,93.0,92.0,69.0,80.0,0.462,0.413,0.507,0.5,13.0,8.0,7.0,13.0,33.0,25.0,20.0,22.0,0.394,0.32,0.35,0.591,0.769,0.727,0.758,0.529,20.0,8.0,25.0,9.0,26.0,11.0,33.0,17.0,7.0,10.0,3.0,10.0,31.0,27.0,34.0,32.0,38.0,37.0,37.0,42.0,28.0,26.0,26.0,26.0,8.0,14.0,10.0,6.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,24.0,0.5,0.447761,0.571429,0.465517,39.0,83.5,0.4705,10.25,25.0,0.41375,0.69575,15.5,21.75,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,0.496177,2014,11,8
76,W,91,250.0,37.8,83.0,0.4578,10.0,24.4,0.4128,18.0,24.6,0.7122,8.4,30.6,39.0,24.8,9.6,5.4,13.8,23.8,103.6,17,44,0.386364,ATL,NYK,False,33.0,43.0,38.0,35.0,81.0,93.0,92.0,69.0,0.407,0.462,0.413,0.507,9.0,13.0,8.0,7.0,22.0,33.0,25.0,20.0,0.409,0.394,0.32,0.35,0.778,0.769,0.727,0.758,28.0,20.0,8.0,25.0,36.0,26.0,11.0,33.0,12.0,7.0,10.0,3.0,29.0,31.0,27.0,34.0,41.0,38.0,37.0,37.0,18.0,28.0,26.0,26.0,10.0,8.0,14.0,10.0,5.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,0.40678,0.5,0.447761,0.571429,37.25,83.75,0.44725,9.25,25.0,0.36825,0.758,20.25,26.5,8.0,30.25,38.25,24.5,10.5,4.75,13.0,23.75,0.481492,2014,11,10
75,W,100,248.333333,36.0,81.0,0.444833,10.0,24.833333,0.405667,19.5,25.166667,0.754167,8.5,30.333333,38.833333,24.0,9.166667,5.0,14.0,22.5,101.5,30,56,0.535714,ATL,UTA,True,27.0,33.0,43.0,38.0,71.0,81.0,93.0,92.0,0.38,0.407,0.462,0.413,10.0,9.0,13.0,8.0,27.0,22.0,33.0,25.0,0.37,0.409,0.394,0.32,0.964,0.778,0.769,0.727,27.0,28.0,20.0,8.0,28.0,36.0,26.0,11.0,9.0,12.0,7.0,10.0,29.0,29.0,31.0,27.0,38.0,41.0,38.0,37.0,20.0,18.0,28.0,26.0,7.0,10.0,8.0,14.0,3.0,5.0,3.0,5.0,15.0,8.0,19.0,13.0,16.0,17.0,33.0,25.0,0.386364,0.40678,0.5,0.447761,35.25,84.25,0.4155,10.0,26.75,0.37325,0.8095,20.75,25.25,9.5,29.0,38.5,23.0,9.75,4.0,13.75,22.75,0.435226,2014,11,12
74,W,114,247.142857,36.428571,80.285714,0.454571,9.857143,24.142857,0.412,18.571429,24.142857,0.749571,9.142857,30.714286,39.857143,23.857143,9.0,4.857143,14.571429,21.0,101.285714,31,47,0.659574,ATL,MIA,True,39.0,27.0,33.0,43.0,76.0,71.0,81.0,93.0,0.513,0.38,0.407,0.462,9.0,10.0,9.0,13.0,20.0,27.0,22.0,33.0,0.45,0.37,0.409,0.394,0.722,0.964,0.778,0.769,13.0,27.0,28.0,20.0,18.0,28.0,36.0,26.0,13.0,9.0,12.0,7.0,33.0,29.0,29.0,31.0,46.0,38.0,41.0,38.0,23.0,20.0,18.0,28.0,8.0,7.0,10.0,8.0,4.0,3.0,5.0,3.0,18.0,15.0,8.0,19.0,12.0,16.0,17.0,33.0,0.535714,0.386364,0.40678,0.5,35.5,80.25,0.4405,10.25,25.5,0.40575,0.80825,22.0,27.0,10.25,30.5,40.75,22.25,8.25,3.75,15.0,19.5,0.457214,2014,11,14
73,L,94,246.25,37.125,79.625,0.46775,10.0,24.625,0.409625,18.625,24.0,0.759125,8.375,31.0,39.375,25.0,9.125,4.875,14.375,20.875,102.875,37,68,0.544118,ATL,CLE,False,42.0,39.0,27.0,33.0,75.0,76.0,71.0,81.0,0.56,0.513,0.38,0.407,11.0,9.0,10.0,9.0,28.0,20.0,27.0,22.0,0.393,0.45,0.37,0.409,0.826,0.722,0.964,0.778,19.0,13.0,27.0,28.0,23.0,18.0,28.0,36.0,3.0,13.0,9.0,12.0,33.0,33.0,29.0,29.0,36.0,46.0,38.0,41.0,33.0,23.0,20.0,18.0,10.0,8.0,7.0,10.0,5.0,4.0,3.0,5.0,13.0,18.0,15.0,8.0,20.0,12.0,16.0,17.0,0.659574,0.535714,0.386364,0.40678,35.25,75.75,0.465,9.75,24.25,0.4055,0.8225,21.75,26.25,9.25,31.0,40.25,23.5,8.75,4.25,13.5,16.25,0.497108,2014,11,15


In [7]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

LE = LabelEncoder()
df['Month_Sin'] = np.sin(2 * np.pi * games['Month'] / 12)
df['Month_Cos'] = np.cos(2 * np.pi * games['Month'] / 12)
df['Day_Sin'] = np.sin(2 * np.pi * games['Day'] / 31)
df['Day_Cos'] = np.cos(2 * np.pi * games['Day'] / 31)
df['Team'] = LE.fit_transform(games['Team'])
df['Rival'] = LE.fit_transform(games['Rival'])
df['WL'] = LE.fit_transform(games['WL'])
df.drop(['Month', 'Day'], axis = 1, inplace=True)
df.head()

  df['Month_Sin'] = np.sin(2 * np.pi * games['Month'] / 12)
  df['Month_Cos'] = np.cos(2 * np.pi * games['Month'] / 12)
  df['Day_Sin'] = np.sin(2 * np.pi * games['Day'] / 31)
  df['Day_Cos'] = np.cos(2 * np.pi * games['Day'] / 31)


Unnamed: 0,WL,PTS,AVG_MIN,AVG_FGM,AVG_FGA,AVG_FG_PCT,AVG_FG3M,AVG_FG3A,AVG_FG3_PCT,AVG_FTM,AVG_FTA,AVG_FT_PCT,AVG_OREB,AVG_DREB,AVG_REB,AVG_AST,AVG_STL,AVG_BLK,AVG_TOV,AVG_PF,AVG_PTS,2PM,2PA,2P_PCT,Team,Rival,Home,FGM_lag1,FGM_lag2,FGM_lag3,FGM_lag4,FGA_lag1,FGA_lag2,FGA_lag3,FGA_lag4,FG_PCT_lag1,FG_PCT_lag2,FG_PCT_lag3,FG_PCT_lag4,FG3M_lag1,FG3M_lag2,FG3M_lag3,FG3M_lag4,FG3A_lag1,FG3A_lag2,FG3A_lag3,FG3A_lag4,FG3_PCT_lag1,FG3_PCT_lag2,FG3_PCT_lag3,FG3_PCT_lag4,FT_PCT_lag1,FT_PCT_lag2,FT_PCT_lag3,FT_PCT_lag4,FTM_lag1,FTM_lag2,FTM_lag3,FTM_lag4,FTA_lag1,FTA_lag2,FTA_lag3,FTA_lag4,OREB_lag1,OREB_lag2,OREB_lag3,OREB_lag4,DREB_lag1,DREB_lag2,DREB_lag3,DREB_lag4,REB_lag1,REB_lag2,REB_lag3,REB_lag4,AST_lag1,AST_lag2,AST_lag3,AST_lag4,STL_lag1,STL_lag2,STL_lag3,STL_lag4,BLK_lag1,BLK_lag2,BLK_lag3,BLK_lag4,TOV_lag1,TOV_lag2,TOV_lag3,TOV_lag4,PF_lag1,PF_lag2,PF_lag3,PF_lag4,2P_PCT_lag1,2P_PCT_lag2,2P_PCT_lag3,2P_PCT_lag4,lagged_FGM,lagged_FGA,lagged_FG_PCT,lagged_FG3M,lagged_FG3A,lagged_FG3_PCT,lagged_FT_PCT,lagged_FTM,lagged_FTA,lagged_OREB,lagged_DREB,lagged_REB,lagged_AST,lagged_STL,lagged_BLK,lagged_TOV,lagged_PF,lagged_2P_PCT,Year,Month_Sin,Month_Cos,Day_Sin,Day_Cos
77,1,103,252.5,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,103.75,24,59,0.40678,0,19,True,43.0,38.0,35.0,40.0,93.0,92.0,69.0,80.0,0.462,0.413,0.507,0.5,13.0,8.0,7.0,13.0,33.0,25.0,20.0,22.0,0.394,0.32,0.35,0.591,0.769,0.727,0.758,0.529,20.0,8.0,25.0,9.0,26.0,11.0,33.0,17.0,7.0,10.0,3.0,10.0,31.0,27.0,34.0,32.0,38.0,37.0,37.0,42.0,28.0,26.0,26.0,26.0,8.0,14.0,10.0,6.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,24.0,0.5,0.447761,0.571429,0.465517,39.0,83.5,0.4705,10.25,25.0,0.41375,0.69575,15.5,21.75,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,0.496177,2014,-0.5,0.866025,0.998717,-0.050649
76,1,91,250.0,37.8,83.0,0.4578,10.0,24.4,0.4128,18.0,24.6,0.7122,8.4,30.6,39.0,24.8,9.6,5.4,13.8,23.8,103.6,17,44,0.386364,0,49,False,33.0,43.0,38.0,35.0,81.0,93.0,92.0,69.0,0.407,0.462,0.413,0.507,9.0,13.0,8.0,7.0,22.0,33.0,25.0,20.0,0.409,0.394,0.32,0.35,0.778,0.769,0.727,0.758,28.0,20.0,8.0,25.0,36.0,26.0,11.0,33.0,12.0,7.0,10.0,3.0,29.0,31.0,27.0,34.0,41.0,38.0,37.0,37.0,18.0,28.0,26.0,26.0,10.0,8.0,14.0,10.0,5.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,0.40678,0.5,0.447761,0.571429,37.25,83.75,0.44725,9.25,25.0,0.36825,0.758,20.25,26.5,8.0,30.25,38.25,24.5,10.5,4.75,13.0,23.75,0.481492,2014,-0.5,0.866025,0.897805,-0.440394
75,1,100,248.333333,36.0,81.0,0.444833,10.0,24.833333,0.405667,19.5,25.166667,0.754167,8.5,30.333333,38.833333,24.0,9.166667,5.0,14.0,22.5,101.5,30,56,0.535714,0,28,True,27.0,33.0,43.0,38.0,71.0,81.0,93.0,92.0,0.38,0.407,0.462,0.413,10.0,9.0,13.0,8.0,27.0,22.0,33.0,25.0,0.37,0.409,0.394,0.32,0.964,0.778,0.769,0.727,27.0,28.0,20.0,8.0,28.0,36.0,26.0,11.0,9.0,12.0,7.0,10.0,29.0,29.0,31.0,27.0,38.0,41.0,38.0,37.0,20.0,18.0,28.0,26.0,7.0,10.0,8.0,14.0,3.0,5.0,3.0,5.0,15.0,8.0,19.0,13.0,16.0,17.0,33.0,25.0,0.386364,0.40678,0.5,0.447761,35.25,84.25,0.4155,10.0,26.75,0.37325,0.8095,20.75,25.25,9.5,29.0,38.5,23.0,9.75,4.0,13.75,22.75,0.435226,2014,-0.5,0.866025,0.651372,-0.758758
74,1,114,247.142857,36.428571,80.285714,0.454571,9.857143,24.142857,0.412,18.571429,24.142857,0.749571,9.142857,30.714286,39.857143,23.857143,9.0,4.857143,14.571429,21.0,101.285714,31,47,0.659574,0,15,True,39.0,27.0,33.0,43.0,76.0,71.0,81.0,93.0,0.513,0.38,0.407,0.462,9.0,10.0,9.0,13.0,20.0,27.0,22.0,33.0,0.45,0.37,0.409,0.394,0.722,0.964,0.778,0.769,13.0,27.0,28.0,20.0,18.0,28.0,36.0,26.0,13.0,9.0,12.0,7.0,33.0,29.0,29.0,31.0,46.0,38.0,41.0,38.0,23.0,20.0,18.0,28.0,8.0,7.0,10.0,8.0,4.0,3.0,5.0,3.0,18.0,15.0,8.0,19.0,12.0,16.0,17.0,33.0,0.535714,0.386364,0.40678,0.5,35.5,80.25,0.4405,10.25,25.5,0.40575,0.80825,22.0,27.0,10.25,30.5,40.75,22.25,8.25,3.75,15.0,19.5,0.457214,2014,-0.5,0.866025,0.299363,-0.954139
73,0,94,246.25,37.125,79.625,0.46775,10.0,24.625,0.409625,18.625,24.0,0.759125,8.375,31.0,39.375,25.0,9.125,4.875,14.375,20.875,102.875,37,68,0.544118,0,35,False,42.0,39.0,27.0,33.0,75.0,76.0,71.0,81.0,0.56,0.513,0.38,0.407,11.0,9.0,10.0,9.0,28.0,20.0,27.0,22.0,0.393,0.45,0.37,0.409,0.826,0.722,0.964,0.778,19.0,13.0,27.0,28.0,23.0,18.0,28.0,36.0,3.0,13.0,9.0,12.0,33.0,33.0,29.0,29.0,36.0,46.0,38.0,41.0,33.0,23.0,20.0,18.0,10.0,8.0,7.0,10.0,5.0,4.0,3.0,5.0,13.0,18.0,15.0,8.0,20.0,12.0,16.0,17.0,0.659574,0.535714,0.386364,0.40678,35.25,75.75,0.465,9.75,24.25,0.4055,0.8225,21.75,26.25,9.25,31.0,40.25,23.5,8.75,4.25,13.5,16.25,0.497108,2014,-0.5,0.866025,0.101168,-0.994869


In [8]:
df.to_csv('hybrid_paper_NBA_10_seasons.csv', index=False)

In [9]:
from sklearn.model_selection import train_test_split

y = df[['PTS', 'WL']]
x = df.drop(['PTS', 'WL', '2PA', '2PM', '2P_PCT'], axis=1)
X_trainC, X_validC, y_trainC, y_validC = train_test_split(x, y['WL'], random_state=42)
X_trainR, X_validR, y_trainR, y_validR = train_test_split(x, y['PTS'], random_state=42)
x.head()

Unnamed: 0,AVG_MIN,AVG_FGM,AVG_FGA,AVG_FG_PCT,AVG_FG3M,AVG_FG3A,AVG_FG3_PCT,AVG_FTM,AVG_FTA,AVG_FT_PCT,AVG_OREB,AVG_DREB,AVG_REB,AVG_AST,AVG_STL,AVG_BLK,AVG_TOV,AVG_PF,AVG_PTS,Team,Rival,Home,FGM_lag1,FGM_lag2,FGM_lag3,FGM_lag4,FGA_lag1,FGA_lag2,FGA_lag3,FGA_lag4,FG_PCT_lag1,FG_PCT_lag2,FG_PCT_lag3,FG_PCT_lag4,FG3M_lag1,FG3M_lag2,FG3M_lag3,FG3M_lag4,FG3A_lag1,FG3A_lag2,FG3A_lag3,FG3A_lag4,FG3_PCT_lag1,FG3_PCT_lag2,FG3_PCT_lag3,FG3_PCT_lag4,FT_PCT_lag1,FT_PCT_lag2,FT_PCT_lag3,FT_PCT_lag4,FTM_lag1,FTM_lag2,FTM_lag3,FTM_lag4,FTA_lag1,FTA_lag2,FTA_lag3,FTA_lag4,OREB_lag1,OREB_lag2,OREB_lag3,OREB_lag4,DREB_lag1,DREB_lag2,DREB_lag3,DREB_lag4,REB_lag1,REB_lag2,REB_lag3,REB_lag4,AST_lag1,AST_lag2,AST_lag3,AST_lag4,STL_lag1,STL_lag2,STL_lag3,STL_lag4,BLK_lag1,BLK_lag2,BLK_lag3,BLK_lag4,TOV_lag1,TOV_lag2,TOV_lag3,TOV_lag4,PF_lag1,PF_lag2,PF_lag3,PF_lag4,2P_PCT_lag1,2P_PCT_lag2,2P_PCT_lag3,2P_PCT_lag4,lagged_FGM,lagged_FGA,lagged_FG_PCT,lagged_FG3M,lagged_FG3A,lagged_FG3_PCT,lagged_FT_PCT,lagged_FTM,lagged_FTA,lagged_OREB,lagged_DREB,lagged_REB,lagged_AST,lagged_STL,lagged_BLK,lagged_TOV,lagged_PF,lagged_2P_PCT,Year,Month_Sin,Month_Cos,Day_Sin,Day_Cos
77,252.5,39.0,83.5,0.4705,10.25,25.0,0.41375,15.5,21.75,0.69575,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,103.75,0,19,True,43.0,38.0,35.0,40.0,93.0,92.0,69.0,80.0,0.462,0.413,0.507,0.5,13.0,8.0,7.0,13.0,33.0,25.0,20.0,22.0,0.394,0.32,0.35,0.591,0.769,0.727,0.758,0.529,20.0,8.0,25.0,9.0,26.0,11.0,33.0,17.0,7.0,10.0,3.0,10.0,31.0,27.0,34.0,32.0,38.0,37.0,37.0,42.0,28.0,26.0,26.0,26.0,8.0,14.0,10.0,6.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,24.0,0.5,0.447761,0.571429,0.465517,39.0,83.5,0.4705,10.25,25.0,0.41375,0.69575,15.5,21.75,7.5,31.0,38.5,26.5,9.5,5.5,15.25,25.5,0.496177,2014,-0.5,0.866025,0.998717,-0.050649
76,250.0,37.8,83.0,0.4578,10.0,24.4,0.4128,18.0,24.6,0.7122,8.4,30.6,39.0,24.8,9.6,5.4,13.8,23.8,103.6,0,49,False,33.0,43.0,38.0,35.0,81.0,93.0,92.0,69.0,0.407,0.462,0.413,0.507,9.0,13.0,8.0,7.0,22.0,33.0,25.0,20.0,0.409,0.394,0.32,0.35,0.778,0.769,0.727,0.758,28.0,20.0,8.0,25.0,36.0,26.0,11.0,33.0,12.0,7.0,10.0,3.0,29.0,31.0,27.0,34.0,41.0,38.0,37.0,37.0,18.0,28.0,26.0,26.0,10.0,8.0,14.0,10.0,5.0,3.0,5.0,6.0,8.0,19.0,13.0,12.0,17.0,33.0,25.0,20.0,0.40678,0.5,0.447761,0.571429,37.25,83.75,0.44725,9.25,25.0,0.36825,0.758,20.25,26.5,8.0,30.25,38.25,24.5,10.5,4.75,13.0,23.75,0.481492,2014,-0.5,0.866025,0.897805,-0.440394
75,248.333333,36.0,81.0,0.444833,10.0,24.833333,0.405667,19.5,25.166667,0.754167,8.5,30.333333,38.833333,24.0,9.166667,5.0,14.0,22.5,101.5,0,28,True,27.0,33.0,43.0,38.0,71.0,81.0,93.0,92.0,0.38,0.407,0.462,0.413,10.0,9.0,13.0,8.0,27.0,22.0,33.0,25.0,0.37,0.409,0.394,0.32,0.964,0.778,0.769,0.727,27.0,28.0,20.0,8.0,28.0,36.0,26.0,11.0,9.0,12.0,7.0,10.0,29.0,29.0,31.0,27.0,38.0,41.0,38.0,37.0,20.0,18.0,28.0,26.0,7.0,10.0,8.0,14.0,3.0,5.0,3.0,5.0,15.0,8.0,19.0,13.0,16.0,17.0,33.0,25.0,0.386364,0.40678,0.5,0.447761,35.25,84.25,0.4155,10.0,26.75,0.37325,0.8095,20.75,25.25,9.5,29.0,38.5,23.0,9.75,4.0,13.75,22.75,0.435226,2014,-0.5,0.866025,0.651372,-0.758758
74,247.142857,36.428571,80.285714,0.454571,9.857143,24.142857,0.412,18.571429,24.142857,0.749571,9.142857,30.714286,39.857143,23.857143,9.0,4.857143,14.571429,21.0,101.285714,0,15,True,39.0,27.0,33.0,43.0,76.0,71.0,81.0,93.0,0.513,0.38,0.407,0.462,9.0,10.0,9.0,13.0,20.0,27.0,22.0,33.0,0.45,0.37,0.409,0.394,0.722,0.964,0.778,0.769,13.0,27.0,28.0,20.0,18.0,28.0,36.0,26.0,13.0,9.0,12.0,7.0,33.0,29.0,29.0,31.0,46.0,38.0,41.0,38.0,23.0,20.0,18.0,28.0,8.0,7.0,10.0,8.0,4.0,3.0,5.0,3.0,18.0,15.0,8.0,19.0,12.0,16.0,17.0,33.0,0.535714,0.386364,0.40678,0.5,35.5,80.25,0.4405,10.25,25.5,0.40575,0.80825,22.0,27.0,10.25,30.5,40.75,22.25,8.25,3.75,15.0,19.5,0.457214,2014,-0.5,0.866025,0.299363,-0.954139
73,246.25,37.125,79.625,0.46775,10.0,24.625,0.409625,18.625,24.0,0.759125,8.375,31.0,39.375,25.0,9.125,4.875,14.375,20.875,102.875,0,35,False,42.0,39.0,27.0,33.0,75.0,76.0,71.0,81.0,0.56,0.513,0.38,0.407,11.0,9.0,10.0,9.0,28.0,20.0,27.0,22.0,0.393,0.45,0.37,0.409,0.826,0.722,0.964,0.778,19.0,13.0,27.0,28.0,23.0,18.0,28.0,36.0,3.0,13.0,9.0,12.0,33.0,33.0,29.0,29.0,36.0,46.0,38.0,41.0,33.0,23.0,20.0,18.0,10.0,8.0,7.0,10.0,5.0,4.0,3.0,5.0,13.0,18.0,15.0,8.0,20.0,12.0,16.0,17.0,0.659574,0.535714,0.386364,0.40678,35.25,75.75,0.465,9.75,24.25,0.4055,0.8225,21.75,26.25,9.25,31.0,40.25,23.5,8.75,4.25,13.5,16.25,0.497108,2014,-0.5,0.866025,0.101168,-0.994869


In [10]:
from xgboost import XGBClassifier as c, XGBRegressor as r

xC = c()
xC.fit(X_trainC, y_trainC)
xR = r()
xR.fit(X_trainR, y_trainR)
xRp = xR.predict(X_validR)
xCp = xC.predict(X_validC)

In [11]:
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

tR = DecisionTreeRegressor(min_samples_leaf=25, random_state=42)
tR.fit(X_trainR, y_trainR)
tC = DecisionTreeClassifier(min_samples_leaf=25, random_state=42)
tC.fit(X_trainC, y_trainC)
tRp = tR.predict(X_validR)
tCp = tC.predict(X_validC)

In [12]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

def rf(xs, y, callback, n_estimators=40, max_samples=18085,
       max_features=0.5, min_samples_leaf=25, **kwargs):
    return callback(n_jobs=-1, n_estimators=n_estimators,
        max_samples=max_samples, max_features=max_features,
        min_samples_leaf=min_samples_leaf, oob_score=True).fit(xs, y)

rR = rf(X_trainR, y_trainR, RandomForestRegressor)
rC = rf(X_trainC, y_trainC, RandomForestClassifier)
rRp = rR.predict(X_validR)
rCp = rC.predict(X_validC)

In [13]:
len(y_validR), len(y_validC), len(xRp), len(xCp), len(tRp), len(tCp), len(rRp), len(rCp)

(6029, 6029, 6029, 6029, 6029, 6029, 6029, 6029)

In [14]:
tR.get_n_leaves(), tC.get_n_leaves()

(559, 552)

In [15]:
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import accuracy_score as acc

acc(y_validC, xCp), acc(y_validC, tCp), acc(y_validC, rCp), mape(y_validR, xRp), mape(y_validR, tRp), mape(y_validR, rRp)

(0.5737269862332062,
 0.5375684193066843,
 0.5981091391607232,
 0.09091293762859207,
 0.09781112649611859,
 0.08763936306603791)