### Data loader

This function loads a few weeks of the dataset, keeps only the useful columns, stitches all weeks together, and attaches the play context to both input and output frames so we can immediately compute features and our five pillars.

In [20]:
import pandas as pd
from pathlib import Path

def load_weeks_pandas(weeks, base="../data"):
    use_in = ["game_id","play_id","nfl_id","frame_id",
              "player_name","player_position","player_role","player_side",
              "x","y","s","a","o","dir",
              "num_frames_output","ball_land_x","ball_land_y"]
    use_out = ["game_id","play_id","nfl_id","frame_id","x","y"]
    supp_cols = ["game_id","play_id","season","week","pass_result",
                 "team_coverage_man_zone","pass_length","route_of_targeted_receiver",
                 "yards_gained","expected_points","expected_points_added"]

    inputs  = [pd.read_csv(f"{base}/input_2023_w{w:02d}.csv", usecols=use_in)  for w in weeks]
    outputs = [pd.read_csv(f"{base}/output_2023_w{w:02d}.csv", usecols=use_out) for w in weeks]

    input_df  = pd.concat(inputs,  ignore_index=True)
    output_df = pd.concat(outputs, ignore_index=True)
    supp_df   = pd.read_csv(f"{base}/supplementary_data.csv", usecols=supp_cols)

    # join play context into input/output
    input_df  = input_df.merge(supp_df,  on=["game_id","play_id"], how="left")
    output_df = output_df.merge(supp_df, on=["game_id","play_id"], how="left")

    return input_df, output_df, supp_df

# Example:
# inp, out, supp = load_weeks_pandas([1,2,3])


In [21]:
inp, out, supp = load_weeks_pandas([1,2,3])