In [1]:
import pandas as pd
import random
import os

In [2]:
# Load datasets
data_files_basepath = f"{os.getcwd()}\\resources\\data"

plays = pd.read_csv(f"{data_files_basepath}\\plays.csv")
players = pd.read_csv(f"{data_files_basepath}\\players.csv")

# Combine all tracking weeks into a single DataFrame (first 2 weeks)
tracking_files = [f'tracking_week_{i}.csv' for i in range(1, 10)]
tracking_list = []
for file in tracking_files:
    try:
        print(f"Starting processing for {file}...")
        df = pd.read_csv(f"{data_files_basepath}\\{file}", on_bad_lines='skip', engine='python')
        tracking_list.append(df)
    except Exception as e:
        print(f"Error reading {file}: {e}")
tracking = pd.concat(tracking_list, ignore_index=True)

tracking = tracking.dropna()

Starting processing for tracking_week_1.csv...
Starting processing for tracking_week_2.csv...
Starting processing for tracking_week_3.csv...
Starting processing for tracking_week_4.csv...
Starting processing for tracking_week_5.csv...
Starting processing for tracking_week_6.csv...
Starting processing for tracking_week_7.csv...
Starting processing for tracking_week_8.csv...
Starting processing for tracking_week_9.csv...


In [3]:
print(tracking.shape)

players_positions = players[['nflId', 'displayName','position']]
possession_tracker = plays[['gameId', "playId", "possessionTeam", "defensiveTeam", "offenseFormation"]]
snap_data = tracking[tracking['event'] == 'ball_snap']

play_data = pd.merge(snap_data, possession_tracker, on=['gameId', 'playId'])
full_data = pd.merge(play_data, players_positions, on=['nflId', 'displayName'])
clean_data = full_data.dropna().drop_duplicates()

offensive_data = clean_data[clean_data['club'] == clean_data['possessionTeam']][clean_data['position'].isin(['C', 'G', 'T', 'TE', 'WR', 'RB', 'FB', 'QB'])]


offensive_data

(2435796, 18)


  offensive_data = clean_data[clean_data['club'] == clean_data['possessionTeam']][clean_data['position'].isin(['C', 'G', 'T', 'TE', 'WR', 'RB', 'FB', 'QB'])]


Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,...,s,a,dis,o,dir,event,possessionTeam,defensiveTeam,offenseFormation,position
1,2022091200,64,39987.0,Geno Smith,114,SNAP,2022-09-13 00:16:14.8,7.0,SEA,right,...,0.89,1.79,0.08,79.67,218.22,ball_snap,SEA,DEN,SINGLEBACK,QB
2,2022091200,64,41310.0,Gabe Jackson,114,SNAP,2022-09-13 00:16:14.8,66.0,SEA,right,...,0.88,1.33,0.09,134.48,197.49,ball_snap,SEA,DEN,SINGLEBACK,G
5,2022091200,64,42412.0,Tyler Lockett,114,SNAP,2022-09-13 00:16:14.8,16.0,SEA,right,...,0.87,1.71,0.08,82.82,84.72,ball_snap,SEA,DEN,SINGLEBACK,WR
9,2022091200,64,43537.0,Austin Blythe,114,SNAP,2022-09-13 00:16:14.8,63.0,SEA,right,...,1.31,1.48,0.14,74.04,193.14,ball_snap,SEA,DEN,SINGLEBACK,C
12,2022091200,64,46096.0,Rashaad Penny,114,SNAP,2022-09-13 00:16:14.8,20.0,SEA,right,...,0.69,1.36,0.07,96.33,164.77,ball_snap,SEA,DEN,SINGLEBACK,RB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351723,2022110300,3538,52608.0,Quez Watkins,172,SNAP,2022-11-04 03:03:47.4,16.0,PHI,left,...,0.00,0.00,0.00,262.21,159.49,ball_snap,PHI,HOU,I_FORM,WR
351724,2022110300,3538,53466.0,Landon Dickerson,172,SNAP,2022-11-04 03:03:47.4,69.0,PHI,left,...,0.30,1.97,0.02,279.34,293.14,ball_snap,PHI,HOU,I_FORM,G
351725,2022110300,3538,53579.0,Kenneth Gainwell,172,SNAP,2022-11-04 03:03:47.4,14.0,PHI,left,...,0.00,0.00,0.00,270.00,169.06,ball_snap,PHI,HOU,I_FORM,RB
351728,2022110300,3538,53946.0,Jack Stoll,172,SNAP,2022-11-04 03:03:47.4,89.0,PHI,left,...,0.02,0.40,0.00,299.90,341.44,ball_snap,PHI,HOU,I_FORM,TE


In [4]:
print(f"Available Formations: {offensive_data.offenseFormation.unique()}")
print(f"Available Positions: {offensive_data.position.unique()}")
print(f"Total Games: {len(offensive_data.gameId.unique())}")
print(f"Total Plays: {len(offensive_data)//11}")
print(f"Total Players: {len(offensive_data.nflId.unique())}")

reduced_data = offensive_data[['gameId', 'playId', 'nflId', 'playDirection', 'x', 'y', 'o', 'position', 'offenseFormation']]

Available Formations: ['SINGLEBACK' 'EMPTY' 'SHOTGUN' 'PISTOL' 'I_FORM' 'JUMBO' 'WILDCAT']
Available Positions: ['QB' 'G' 'WR' 'C' 'RB' 'TE' 'T' 'FB']
Total Games: 136
Total Plays: 15820
Total Players: 823


In [5]:
reduced_data.to_csv("resources\\reduced_data.csv")
