In [667]:
import pandas as pd
from sklearn.linear_model import RidgeClassifier
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score


In [668]:
pd.set_option("display.max_rows", 50)

In [669]:
df = pd.read_csv("nba_games.csv", index_col=0)

In [670]:
df

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,tov%_max_opp,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won
0,240,37,99,0.374,10,33,0.303,15,23,0.652,...,43.6,40.4,300,98,BRK,125,1,2021,12/22/2020,False
1,240,42,92,0.457,15,35,0.429,26,32,0.813,...,50.0,32.1,267,120,GSW,99,0,2021,12/22/2020,True
2,240,44,93,0.473,14,40,0.350,14,19,0.737,...,100.0,35.9,166,118,LAL,109,1,2021,12/22/2020,True
3,240,38,81,0.469,9,29,0.310,24,31,0.774,...,20.9,40.2,154,114,LAC,116,0,2021,12/22/2020,False
4,240,46,90,0.511,14,35,0.400,15,18,0.833,...,33.3,39.2,203,126,BOS,122,1,2021,12/23/2020,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5185,240,33,80,0.413,9,37,0.243,13,19,0.684,...,33.3,28.1,203,106,PHI,96,0,2023,4/22/2023,False
5186,240,41,84,0.488,9,22,0.409,21,27,0.778,...,29.1,35.8,140,128,LAC,100,1,2023,4/22/2023,True
5187,240,40,92,0.435,12,37,0.324,8,10,0.800,...,25.8,30.3,152,117,PHO,112,0,2023,4/22/2023,False
5188,240,38,85,0.447,15,39,0.385,8,12,0.667,...,37.5,37.3,236,113,MIA,121,1,2023,4/22/2023,False


In [625]:
# df = df.sort_values("date")
# # df = df.reset_index(drop=True)

In [671]:
df

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,tov%_max_opp,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won
0,240,37,99,0.374,10,33,0.303,15,23,0.652,...,43.6,40.4,300,98,BRK,125,1,2021,12/22/2020,False
1,240,42,92,0.457,15,35,0.429,26,32,0.813,...,50.0,32.1,267,120,GSW,99,0,2021,12/22/2020,True
2,240,44,93,0.473,14,40,0.350,14,19,0.737,...,100.0,35.9,166,118,LAL,109,1,2021,12/22/2020,True
3,240,38,81,0.469,9,29,0.310,24,31,0.774,...,20.9,40.2,154,114,LAC,116,0,2021,12/22/2020,False
4,240,46,90,0.511,14,35,0.400,15,18,0.833,...,33.3,39.2,203,126,BOS,122,1,2021,12/23/2020,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5185,240,33,80,0.413,9,37,0.243,13,19,0.684,...,33.3,28.1,203,106,PHI,96,0,2023,4/22/2023,False
5186,240,41,84,0.488,9,22,0.409,21,27,0.778,...,29.1,35.8,140,128,LAC,100,1,2023,4/22/2023,True
5187,240,40,92,0.435,12,37,0.324,8,10,0.800,...,25.8,30.3,152,117,PHO,112,0,2023,4/22/2023,False
5188,240,38,85,0.447,15,39,0.385,8,12,0.667,...,37.5,37.3,236,113,MIA,121,1,2023,4/22/2023,False


In [672]:
def add_target(group):
    group["target"] = group["won"].shift(-1)
    return group



df = df.groupby("team", group_keys=False).apply(add_target)

In [681]:
df.tail(50)

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won,target
5140,240,38,98,0.388,5,29,0.172,18,22,0.818,...,35.1,148,105,BOS,112,1,2023,4/15/2023,False,False
5141,240,42,88,0.477,13,33,0.394,15,18,0.833,...,33.2,145,124,ATL,99,0,2023,4/15/2023,True,True
5142,240,37,88,0.42,8,29,0.276,19,22,0.864,...,35.9,157,121,CLE,97,1,2023,4/15/2023,True,False
5143,240,36,83,0.434,10,31,0.323,15,21,0.714,...,39.3,191,113,NYK,101,0,2023,4/15/2023,False,True
5144,240,39,70,0.557,13,29,0.448,10,15,0.667,...,32.0,250,124,PHI,121,1,2023,4/15/2023,False,False
5145,240,42,89,0.472,21,43,0.488,16,16,1.0,...,42.6,233,146,BRK,101,0,2023,4/15/2023,True,True
5146,240,43,90,0.478,16,50,0.32,21,27,0.778,...,33.5,206,125,SAC,126,1,2023,4/15/2023,False,False
5147,240,44,98,0.449,12,32,0.375,26,32,0.813,...,39.3,192,127,GSW,123,0,2023,4/15/2023,True,True
5148,240,30,81,0.37,11,36,0.306,9,16,0.563,...,37.6,180,95,DEN,109,1,2023,4/16/2023,False,False
5149,240,41,90,0.456,16,39,0.41,11,17,0.647,...,53.3,160,127,MIN,80,0,2023,4/16/2023,True,True


In [628]:
df[df["team"] == "PHI"]

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won,target
21,240,41,87,0.471,8,28,0.286,23,30,0.767,...,36.6,207,115,WAS,107,0,2021,12/23/2020,True,True
46,240,41,88,0.466,11,31,0.355,16,22,0.727,...,35.9,139,125,NYK,89,1,2021,12/26/2020,True,False
62,240,33,79,0.418,12,37,0.324,16,22,0.727,...,34.0,200,106,CLE,118,1,2021,12/27/2020,False,True
101,240,32,84,0.381,8,31,0.258,28,33,0.848,...,30.0,152,108,TOR,93,0,2021,12/29/2020,True,True
126,240,44,91,0.484,15,33,0.455,13,15,0.867,...,30.5,131,117,ORL,92,1,2021,12/31/2020,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5100,240,53,101,0.525,14,32,0.438,14,19,0.737,...,36.2,123,131,BRK,105,1,2023,4/9/2023,True,True
5145,240,42,89,0.472,21,43,0.488,16,16,1.000,...,42.6,233,146,BRK,101,0,2023,4/15/2023,True,True
5157,240,36,80,0.450,11,35,0.314,13,16,0.813,...,26.6,119,117,BRK,84,0,2023,4/17/2023,True,True
5172,240,40,82,0.488,13,31,0.419,9,13,0.692,...,42.4,229,125,BRK,97,1,2023,4/20/2023,True,True


In [629]:
df["target"][pd.isnull(df["target"])] = 2
df["target"] = df["target"].astype(int, errors="ignore")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["target"][pd.isnull(df["target"])] = 2


In [630]:
df["won"].value_counts()

False    2595
True     2595
Name: won, dtype: int64

In [631]:
df["target"].value_counts()

0    2580
1    2580
2      30
Name: target, dtype: int64

In [632]:
nulls = pd.isnull(df).sum()

In [633]:
nulls = nulls[nulls > 0]

In [634]:
valid_columns = df.columns[~df.columns.isin(nulls.index)]

In [635]:
valid_columns

Index(['mp', 'fg', 'fga', 'fg%', '3p', '3pa', '3p%', 'ft', 'fta', 'ft%',
       ...
       'usg%_max_opp', 'ortg_max_opp', 'drtg_max_opp', 'team_opp', 'total_opp',
       'home_opp', 'season', 'date', 'won', 'target'],
      dtype='object', length=143)

In [636]:
df = df[valid_columns].copy()

In [637]:
df


Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won,target
0,240,37,99,0.374,10,33,0.303,15,23,0.652,...,40.4,300,98,BRK,125,1,2021,12/22/2020,False,0
1,240,42,92,0.457,15,35,0.429,26,32,0.813,...,32.1,267,120,GSW,99,0,2021,12/22/2020,True,1
2,240,44,93,0.473,14,40,0.350,14,19,0.737,...,35.9,166,118,LAL,109,1,2021,12/22/2020,True,1
3,240,38,81,0.469,9,29,0.310,24,31,0.774,...,40.2,154,114,LAC,116,0,2021,12/22/2020,False,1
4,240,46,90,0.511,14,35,0.400,15,18,0.833,...,39.2,203,126,BOS,122,1,2021,12/23/2020,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5185,240,33,80,0.413,9,37,0.243,13,19,0.684,...,28.1,203,106,PHI,96,0,2023,4/22/2023,False,2
5186,240,41,84,0.488,9,22,0.409,21,27,0.778,...,35.8,140,128,LAC,100,1,2023,4/22/2023,True,2
5187,240,40,92,0.435,12,37,0.324,8,10,0.800,...,30.3,152,117,PHO,112,0,2023,4/22/2023,False,2
5188,240,38,85,0.447,15,39,0.385,8,12,0.667,...,37.3,236,113,MIA,121,1,2023,4/22/2023,False,2


In [638]:
rr = RidgeClassifier(alpha=1)

split = TimeSeriesSplit(n_splits=3)

sfs = SequentialFeatureSelector(rr, 
                                n_features_to_select=30, 
                                direction="forward",
                                cv=split,
                                n_jobs=1
                               )

In [639]:

removed_columns = ["season", "date", "won", "target", "team", "team_opp"]
selected_columns = df.columns[~df.columns.isin(removed_columns)]

In [640]:
scaler = MinMaxScaler()
df.loc[:, selected_columns] = scaler.fit_transform(df[selected_columns])

  df.loc[:, selected_columns] = scaler.fit_transform(df[selected_columns])


In [641]:
df

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won,target
0,0.0,0.350,0.611111,0.236585,0.296296,0.433962,0.372323,0.307692,0.422222,0.497110,...,0.234917,1.000000,0.168831,BRK,0.611765,1.0,2021,12/22/2020,False,0
1,0.0,0.475,0.481481,0.439024,0.481481,0.471698,0.579901,0.589744,0.622222,0.729769,...,0.128370,0.829897,0.454545,GSW,0.305882,0.0,2021,12/22/2020,True,1
2,0.0,0.525,0.500000,0.478049,0.444444,0.566038,0.449753,0.282051,0.333333,0.619942,...,0.177150,0.309278,0.428571,LAL,0.423529,1.0,2021,12/22/2020,True,1
3,0.0,0.375,0.277778,0.468293,0.259259,0.358491,0.383855,0.538462,0.600000,0.673410,...,0.232349,0.247423,0.376623,LAC,0.505882,0.0,2021,12/22/2020,False,1
4,0.0,0.575,0.444444,0.570732,0.444444,0.471698,0.532125,0.307692,0.311111,0.758671,...,0.219512,0.500000,0.532468,BOS,0.576471,1.0,2021,12/23/2020,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5185,0.0,0.250,0.259259,0.331707,0.259259,0.509434,0.273476,0.256410,0.333333,0.543353,...,0.077022,0.500000,0.272727,PHI,0.270588,0.0,2023,4/22/2023,False,2
5186,0.0,0.450,0.333333,0.514634,0.259259,0.226415,0.546952,0.461538,0.511111,0.679191,...,0.175866,0.175258,0.558442,LAC,0.317647,1.0,2023,4/22/2023,True,2
5187,0.0,0.425,0.481481,0.385366,0.370370,0.509434,0.406919,0.128205,0.133333,0.710983,...,0.105263,0.237113,0.415584,PHO,0.458824,0.0,2023,4/22/2023,False,2
5188,0.0,0.375,0.351852,0.414634,0.481481,0.547170,0.507414,0.128205,0.177778,0.518786,...,0.195122,0.670103,0.363636,MIA,0.564706,1.0,2023,4/22/2023,False,2


In [642]:
sfs.fit(df[selected_columns], df["target"])

In [643]:
predictors = list(selected_columns[sfs.get_support()])

In [644]:
predictors

['fg%',
 '3p',
 '3pa',
 '3p%',
 'pts',
 'efg%',
 'ast%',
 'usg%',
 'ortg',
 'stl_max',
 'blk_max',
 '+/-_max',
 'ts%_max',
 'efg%_max',
 'ftr_max',
 'stl%_max',
 'ortg_max',
 'drtg_max',
 'total',
 'fg_opp',
 'tov_opp',
 'blk%_opp',
 'usg%_opp',
 'drtg_opp',
 'ft%_max_opp',
 'orb_max_opp',
 '+/-_max_opp',
 '3par_max_opp',
 'orb%_max_opp',
 'tov%_max_opp']

In [645]:
def backtest(data, model, predictors, start=2, step=1):
    all_predictions = []
    
    seasons = sorted(data["season"].unique())
    
    for i in range(start, len(seasons), step):
        season = seasons[i]
        train = data[data["season"] < season]
        test = data[data["season"] == season]
        
        model.fit(train[predictors], train["target"])
        
        preds = model.predict(test[predictors])
        preds = pd.Series(preds, index=test.index)
        combined = pd.concat([test["target"], preds], axis=1)
        combined.columns = ["actual", "prediction"]
        
        all_predictions.append(combined)
    return pd.concat(all_predictions)

In [646]:
predictions = backtest(df, rr, predictors)

In [647]:
predictions

Unnamed: 0,actual,prediction
4988,0,1
4989,1,1
4990,1,0
4991,0,1
4992,1,0
...,...,...
5185,2,1
5186,2,1
5187,2,0
5188,2,0


In [648]:
accuracy_score(predictions["actual"], predictions["prediction"])

0.4158415841584158

In [649]:

df.groupby(["home"]).apply(lambda x: x[x["won"] == 1].shape[0] / x.shape[0])

home
0.0    0.452023
1.0    0.547977
dtype: float64

In [650]:
df

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won,target
0,0.0,0.350,0.611111,0.236585,0.296296,0.433962,0.372323,0.307692,0.422222,0.497110,...,0.234917,1.000000,0.168831,BRK,0.611765,1.0,2021,12/22/2020,False,0
1,0.0,0.475,0.481481,0.439024,0.481481,0.471698,0.579901,0.589744,0.622222,0.729769,...,0.128370,0.829897,0.454545,GSW,0.305882,0.0,2021,12/22/2020,True,1
2,0.0,0.525,0.500000,0.478049,0.444444,0.566038,0.449753,0.282051,0.333333,0.619942,...,0.177150,0.309278,0.428571,LAL,0.423529,1.0,2021,12/22/2020,True,1
3,0.0,0.375,0.277778,0.468293,0.259259,0.358491,0.383855,0.538462,0.600000,0.673410,...,0.232349,0.247423,0.376623,LAC,0.505882,0.0,2021,12/22/2020,False,1
4,0.0,0.575,0.444444,0.570732,0.444444,0.471698,0.532125,0.307692,0.311111,0.758671,...,0.219512,0.500000,0.532468,BOS,0.576471,1.0,2021,12/23/2020,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5185,0.0,0.250,0.259259,0.331707,0.259259,0.509434,0.273476,0.256410,0.333333,0.543353,...,0.077022,0.500000,0.272727,PHI,0.270588,0.0,2023,4/22/2023,False,2
5186,0.0,0.450,0.333333,0.514634,0.259259,0.226415,0.546952,0.461538,0.511111,0.679191,...,0.175866,0.175258,0.558442,LAC,0.317647,1.0,2023,4/22/2023,True,2
5187,0.0,0.425,0.481481,0.385366,0.370370,0.509434,0.406919,0.128205,0.133333,0.710983,...,0.105263,0.237113,0.415584,PHO,0.458824,0.0,2023,4/22/2023,False,2
5188,0.0,0.375,0.351852,0.414634,0.481481,0.547170,0.507414,0.128205,0.177778,0.518786,...,0.195122,0.670103,0.363636,MIA,0.564706,1.0,2023,4/22/2023,False,2


In [651]:
df_rolling = df[list(selected_columns) + ["won", "team", "season"]]

def find_team_averages(team):
    rolling = team.rolling(10).mean()
    return rolling

df_rolling = df_rolling.groupby(["team", "season"], group_keys=False).apply(find_team_averages)

  rolling = team.rolling(10).mean()


In [652]:
df_rolling

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,stl%_max_opp,blk%_max_opp,tov%_max_opp,usg%_max_opp,ortg_max_opp,drtg_max_opp,total_opp,home_opp,won,season
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5185,,,,,,,,,,,...,,,,,,,,,,
5186,,,,,,,,,,,...,,,,,,,,,,
5187,,,,,,,,,,,...,,,,,,,,,,
5188,,,,,,,,,,,...,,,,,,,,,,


In [653]:
rolling_cols = [f"{col}_10" for col in df_rolling.columns]
df_rolling.columns = rolling_cols
df = pd.concat([df, df_rolling], axis=1)

In [654]:
df = df.dropna()

In [655]:
df

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,stl%_max_opp_10,blk%_max_opp_10,tov%_max_opp_10,usg%_max_opp_10,ortg_max_opp_10,drtg_max_opp_10,total_opp_10,home_opp_10,won_10,season_10
243,0.0,0.450,0.370370,0.487805,0.370370,0.415094,0.490939,0.487179,0.577778,0.614162,...,0.0576,0.1087,0.428616,0.150193,0.509278,0.474026,0.449412,0.6,0.7,2021.0
246,0.0,0.425,0.314815,0.500000,0.444444,0.396226,0.617792,0.205128,0.200000,0.777457,...,0.0473,0.0781,0.588679,0.244801,0.391753,0.481818,0.425882,0.6,0.6,2021.0
250,0.0,0.575,0.462963,0.556098,0.296296,0.301887,0.507414,0.256410,0.222222,0.897399,...,0.0768,0.0871,0.403878,0.270988,0.442784,0.448052,0.555294,0.6,0.4,2021.0
251,0.0,0.450,0.277778,0.558537,0.370370,0.358491,0.555189,0.512821,0.511111,0.786127,...,0.0552,0.0738,0.320335,0.230809,0.351546,0.487013,0.395294,0.4,0.7,2021.0
252,0.0,0.375,0.277778,0.468293,0.444444,0.396226,0.617792,0.435897,0.377778,0.930636,...,0.0541,0.0729,0.462159,0.167908,0.390722,0.468831,0.451765,0.3,0.5,2021.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4984,0.0,0.200,0.166667,0.331707,0.333333,0.415094,0.439868,0.461538,0.600000,0.533237,...,0.0591,0.1113,0.483229,0.174711,0.389691,0.429870,0.322353,0.5,0.5,2022.0
4985,0.0,0.450,0.407407,0.460976,0.259259,0.566038,0.243822,0.256410,0.244444,0.807803,...,0.0716,0.1171,0.374109,0.321566,0.611340,0.519481,0.374118,0.4,0.7,2022.0
4986,0.0,0.375,0.481481,0.331707,0.629630,0.679245,0.553542,0.128205,0.088889,1.000000,...,0.0656,0.1152,0.444025,0.308601,0.596392,0.523377,0.377647,0.5,0.7,2022.0
4987,0.0,0.275,0.259259,0.360976,0.333333,0.339623,0.520593,0.205128,0.177778,0.880058,...,0.0572,0.1111,0.483229,0.172144,0.412887,0.418182,0.315294,0.5,0.5,2022.0


In [656]:
def shift_col(team, col_name):
    next_col = team[col_name].shift(-1)
    return next_col

def add_col(df, col_name):
    return df.groupby("team", group_keys=False).apply(lambda x: shift_col(x, col_name))

df["home_next"] = add_col(df, "home")
df["team_opp_next"] = add_col(df, "team_opp")
df["date_next"] = add_col(df, "date")

In [657]:
df

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,usg%_max_opp_10,ortg_max_opp_10,drtg_max_opp_10,total_opp_10,home_opp_10,won_10,season_10,home_next,team_opp_next,date_next
243,0.0,0.450,0.370370,0.487805,0.370370,0.415094,0.490939,0.487179,0.577778,0.614162,...,0.150193,0.509278,0.474026,0.449412,0.6,0.7,2021.0,1.0,ORL,1/15/2021
246,0.0,0.425,0.314815,0.500000,0.444444,0.396226,0.617792,0.205128,0.200000,0.777457,...,0.244801,0.391753,0.481818,0.425882,0.6,0.6,2021.0,1.0,CHI,1/10/2021
250,0.0,0.575,0.462963,0.556098,0.296296,0.301887,0.507414,0.256410,0.222222,0.897399,...,0.270988,0.442784,0.448052,0.555294,0.6,0.4,2021.0,0.0,LAC,1/10/2021
251,0.0,0.450,0.277778,0.558537,0.370370,0.358491,0.555189,0.512821,0.511111,0.786127,...,0.230809,0.351546,0.487013,0.395294,0.4,0.7,2021.0,0.0,HOU,1/10/2021
252,0.0,0.375,0.277778,0.468293,0.444444,0.396226,0.617792,0.435897,0.377778,0.930636,...,0.167908,0.390722,0.468831,0.451765,0.3,0.5,2021.0,1.0,OKC,1/10/2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4984,0.0,0.200,0.166667,0.331707,0.333333,0.415094,0.439868,0.461538,0.600000,0.533237,...,0.174711,0.389691,0.429870,0.322353,0.5,0.5,2022.0,1.0,GSW,6/16/2022
4985,0.0,0.450,0.407407,0.460976,0.259259,0.566038,0.243822,0.256410,0.244444,0.807803,...,0.321566,0.611340,0.519481,0.374118,0.4,0.7,2022.0,0.0,BOS,6/16/2022
4986,0.0,0.375,0.481481,0.331707,0.629630,0.679245,0.553542,0.128205,0.088889,1.000000,...,0.308601,0.596392,0.523377,0.377647,0.5,0.7,2022.0,,,
4987,0.0,0.275,0.259259,0.360976,0.333333,0.339623,0.520593,0.205128,0.177778,0.880058,...,0.172144,0.412887,0.418182,0.315294,0.5,0.5,2022.0,,,


In [658]:
df.loc[:, ['team', 'date', 'team_opp', 'team_opp_next', 'date_next']]

Unnamed: 0,team,date,team_opp,team_opp_next,date_next
243,BOS,1/8/2021,WAS,ORL,1/15/2021
246,LAC,1/8/2021,GSW,CHI,1/10/2021
250,CHI,1/8/2021,LAL,LAC,1/10/2021
251,LAL,1/8/2021,CHI,HOU,1/10/2021
252,BRK,1/8/2021,MEM,OKC,1/10/2021
...,...,...,...,...,...
4984,BOS,6/13/2022,GSW,GSW,6/16/2022
4985,GSW,6/13/2022,BOS,BOS,6/16/2022
4986,GSW,6/16/2022,BOS,,
4987,BOS,6/16/2022,GSW,,


In [659]:
full = df.merge(df[rolling_cols + ["team_opp_next", "date_next", "team"]], left_on=["team", "date_next"], right_on=["team_opp_next", "date_next"])


In [660]:
full


Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,tov%_max_opp_10_y,usg%_max_opp_10_y,ortg_max_opp_10_y,drtg_max_opp_10_y,total_opp_10_y,home_opp_10_y,won_10_y,season_10_y,team_opp_next_y,team_y
0,0.0,0.450,0.370370,0.487805,0.370370,0.415094,0.490939,0.487179,0.577778,0.614162,...,0.481551,0.210783,0.476804,0.401299,0.442353,0.5,0.5,2021.0,BOS,ORL
1,0.0,0.425,0.314815,0.500000,0.444444,0.396226,0.617792,0.205128,0.200000,0.777457,...,0.403878,0.270988,0.442784,0.448052,0.555294,0.6,0.4,2021.0,LAC,CHI
2,0.0,0.575,0.462963,0.556098,0.296296,0.301887,0.507414,0.256410,0.222222,0.897399,...,0.588679,0.244801,0.391753,0.481818,0.425882,0.6,0.6,2021.0,CHI,LAC
3,0.0,0.425,0.203704,0.590244,0.481481,0.433962,0.622735,0.384615,0.444444,0.638728,...,0.445807,0.158280,0.467010,0.367532,0.377647,0.5,0.5,2021.0,CHO,NYK
4,0.0,0.375,0.425926,0.365854,0.148148,0.396226,0.192751,0.333333,0.400000,0.605491,...,0.380294,0.213479,0.463918,0.542857,0.451765,0.4,0.6,2021.0,ORL,MIL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4389,0.0,0.500,0.425926,0.502439,0.407407,0.471698,0.484349,0.358974,0.444444,0.578035,...,0.457128,0.235173,0.523711,0.506494,0.420000,0.4,0.6,2022.0,BOS,GSW
4390,0.0,0.425,0.462963,0.397561,0.481481,0.622642,0.448105,0.230769,0.244444,0.710983,...,0.471908,0.170603,0.381959,0.472727,0.320000,0.5,0.6,2022.0,GSW,BOS
4391,0.0,0.275,0.351852,0.300000,0.481481,0.528302,0.523888,0.282051,0.333333,0.619942,...,0.431761,0.242875,0.529897,0.531169,0.376471,0.4,0.7,2022.0,BOS,GSW
4392,0.0,0.200,0.166667,0.331707,0.333333,0.415094,0.439868,0.461538,0.600000,0.533237,...,0.374109,0.321566,0.611340,0.519481,0.374118,0.4,0.7,2022.0,BOS,GSW


In [661]:
full[["team_x", "team_opp_next_x", "team_y", "team_opp_next_y", "date_next"]]

Unnamed: 0,team_x,team_opp_next_x,team_y,team_opp_next_y,date_next
0,BOS,ORL,ORL,BOS,1/15/2021
1,LAC,CHI,CHI,LAC,1/10/2021
2,CHI,LAC,LAC,CHI,1/10/2021
3,CHO,NYK,NYK,CHO,1/11/2021
4,ORL,MIL,MIL,ORL,1/11/2021
...,...,...,...,...,...
4389,BOS,GSW,GSW,BOS,6/10/2022
4390,GSW,BOS,BOS,GSW,6/13/2022
4391,BOS,GSW,GSW,BOS,6/13/2022
4392,BOS,GSW,GSW,BOS,6/16/2022


In [662]:
removed_columns = list(full.columns[full.dtypes == "object"]) + removed_columns

In [663]:
removed_columns

['team_x',
 'team_opp',
 'date',
 'team_opp_next_x',
 'date_next',
 'team_opp_next_y',
 'team_y',
 'season',
 'date',
 'won',
 'target',
 'team',
 'team_opp']

In [664]:
selected_columns = full.columns[~full.columns.isin(removed_columns)]
sfs.fit(full[selected_columns], full["target"])

In [665]:
predictors = list(selected_columns[sfs.get_support()])

In [666]:
predictors

['mp',
 'ts%',
 'usg%',
 '3p%_max',
 'trb%_max',
 'mp_opp',
 'usg%_opp',
 'orb_10_x',
 'usg%_10_x',
 'fg_max_10_x',
 'blk_max_10_x',
 '+/-_max_10_x',
 'blk%_max_10_x',
 'drtg_max_10_x',
 'ts%_opp_10_x',
 'stl%_opp_10_x',
 'usg%_opp_10_x',
 'fta_10_y',
 'ftr_10_y',
 'usg%_10_y',
 'fga_max_10_y',
 'tov_max_10_y',
 'pts_opp_10_y',
 'ast%_opp_10_y',
 'usg%_opp_10_y',
 'ftr_max_opp_10_y',
 'drb%_max_opp_10_y',
 'stl%_max_opp_10_y',
 'total_opp_10_y',
 'won_10_y']