In [2]:
import pandas as pd

# 1. Load the game.csv
game_df = pd.read_csv("game.csv", parse_dates=["game_date"])

# 2. Create long-format (one row per team per game)
home_df = game_df[[
    "game_id", "game_date", "team_name_home", "pts_home", "reb_home", "ast_home", "plus_minus_home"
]].rename(columns={
    "team_name_home": "team",
    "pts_home": "pts",
    "reb_home": "reb",
    "ast_home": "ast",
    "plus_minus_home": "plus_minus"
})
home_df["home_away"] = "home"

away_df = game_df[[
    "game_id", "game_date", "team_name_away", "pts_away", "reb_away", "ast_away", "plus_minus_away"
]].rename(columns={
    "team_name_away": "team",
    "pts_away": "pts",
    "reb_away": "reb",
    "ast_away": "ast",
    "plus_minus_away": "plus_minus"
})
away_df["home_away"] = "away"

# 3. Combine both sides into one dataset
long_df = pd.concat([home_df, away_df], ignore_index=True)

# 4. Sort and compute rolling stats
long_df = long_df.sort_values(["team", "game_date"])
for stat in ["pts", "reb", "ast", "plus_minus"]:
    long_df[f"{stat}_avg_last10"] = (
        long_df.groupby("team")[stat]
        .transform(lambda x: x.shift(1).rolling(window=10, min_periods=1).mean())
    )

# 5. Pivot back: merge rolling stats into original game_df
home_features = long_df[long_df["home_away"] == "home"][[
    "game_id", "pts_avg_last10", "reb_avg_last10", "ast_avg_last10", "plus_minus_avg_last10"
]].rename(columns=lambda x: f"home_{x}" if x != "game_id" else x)

away_features = long_df[long_df["home_away"] == "away"][[
    "game_id", "pts_avg_last10", "reb_avg_last10", "ast_avg_last10", "plus_minus_avg_last10"
]].rename(columns=lambda x: f"away_{x}" if x != "game_id" else x)

# 6. Merge new features into original game_df
final_df = game_df.merge(home_features, on="game_id", how="left")
final_df = final_df.merge(away_features, on="game_id", how="left")

# 7. Save if needed
final_df.to_csv("game_with_rolling_features.csv", index=False)

In [4]:
game = pd.read_csv("game_with_rolling_features.csv")
game.sample(5)
game.columns

Index(['season_id', 'team_id_home', 'team_abbreviation_home', 'team_name_home',
       'game_id', 'game_date', 'matchup_home', 'wl_home', 'min', 'fgm_home',
       'fga_home', 'fg_pct_home', 'fg3m_home', 'fg3a_home', 'fg3_pct_home',
       'ftm_home', 'fta_home', 'ft_pct_home', 'oreb_home', 'dreb_home',
       'reb_home', 'ast_home', 'stl_home', 'blk_home', 'tov_home', 'pf_home',
       'pts_home', 'plus_minus_home', 'video_available_home', 'team_id_away',
       'team_abbreviation_away', 'team_name_away', 'matchup_away', 'wl_away',
       'fgm_away', 'fga_away', 'fg_pct_away', 'fg3m_away', 'fg3a_away',
       'fg3_pct_away', 'ftm_away', 'fta_away', 'ft_pct_away', 'oreb_away',
       'dreb_away', 'reb_away', 'ast_away', 'stl_away', 'blk_away', 'tov_away',
       'pf_away', 'pts_away', 'plus_minus_away', 'video_available_away',
       'season_type', 'home_pts_avg_last10', 'home_reb_avg_last10',
       'home_ast_avg_last10', 'home_plus_minus_avg_last10',
       'away_pts_avg_last10', 'aw

In [None]:
rolling_df = pd.read_csv("game_with_rolling_features.csv", parse_dates=["game_date"])
selected_df = pd.read_csv("selected_game_features.csv", parse_dates=["Game Date"])
print(rolling_df.columns)
print(selected_df.columns)

Index(['season_id', 'team_id_home', 'team_abbreviation_home', 'team_name_home',
       'game_id', 'game_date', 'matchup_home', 'wl_home', 'min', 'fgm_home',
       'fga_home', 'fg_pct_home', 'fg3m_home', 'fg3a_home', 'fg3_pct_home',
       'ftm_home', 'fta_home', 'ft_pct_home', 'oreb_home', 'dreb_home',
       'reb_home', 'ast_home', 'stl_home', 'blk_home', 'tov_home', 'pf_home',
       'pts_home', 'plus_minus_home', 'video_available_home', 'team_id_away',
       'team_abbreviation_away', 'team_name_away', 'matchup_away', 'wl_away',
       'fgm_away', 'fga_away', 'fg_pct_away', 'fg3m_away', 'fg3a_away',
       'fg3_pct_away', 'ftm_away', 'fta_away', 'ft_pct_away', 'oreb_away',
       'dreb_away', 'reb_away', 'ast_away', 'stl_away', 'blk_away', 'tov_away',
       'pf_away', 'pts_away', 'plus_minus_away', 'video_available_away',
       'season_type', 'home_pts_avg_last10', 'home_reb_avg_last10',
       'home_ast_avg_last10', 'home_plus_minus_avg_last10',
       'away_pts_avg_last10', 'aw