In [25]:
import pandas as pd
from raw_fetch import download_play_by_play, download_players, download_snap_counts, download_weekly_player_stats
from pipeline import raw_to_pp, join_gp_to_game_stats, pp_to_gp

In [2]:
download_players()

In [3]:
snap_count = pd.read_parquet('raw/snap_counts/snap_counts_2024.parquet')

In [4]:
players = pd.read_parquet('raw/players/players.parquet')

In [7]:
pbp = pd.read_parquet('raw/pbp/play_by_play_2024.parquet')

In [5]:
stats = pd.read_parquet('raw/player_stats/player_stats_2024.parquet')

In [27]:
pp = raw_to_pp(pbp)
player_game = pp_to_gp(pp)

In [28]:
game_en = join_gp_to_game_stats(player_game, stats)

In [35]:
game_en[game_en['rush_attempts'] != game_en['carries']][[
    'week', 'player_name_x', 'rush_attempts', 'carries']]

Unnamed: 0,week,player_name_x,rush_attempts,carries
1,1,J.Conner,17.0,16
4,1,J.Allen,8.0,9
5,1,K.Murray,4.0,5
25,1,P.Mahomes,1.0,2
82,1,G.Smith,1.0,4
106,1,J.Jacobs,17.0,16
125,1,J.Mixon,31.0,30
128,1,J.Taylor,17.0,16
137,1,C.Stroud,2.0,4
179,1,G.Edwards,12.0,11


In [6]:
# player with the highest wopr
qb_stats = stats[stats['position'] == 'QB'][[
    'week',
    'player_name',
    'attempts',
    'passing_epa',
    'completions',
    'passing_yards',
    'passing_air_yards',
    'passing_first_downs',
    'rushing_yards',
    'passing_yards_after_catch']]
qb_stats['rank'] = qb_stats['attempts'].rank(method='min', ascending=False)
qb_stats['completion_pct'] = qb_stats['completions'] / qb_stats['attempts']
qb_stats['yards_after_catch'] = qb_stats['passing_yards'] - \
    qb_stats['passing_air_yards']
# calculate week over week change of first downs
qb_stats['first_down_change'] = qb_stats.groupby(
    'player_name')['passing_first_downs'].diff()
qb_stats_sorted = qb_stats.sort_values('rushing_yards', ascending=False)
qb_stats_sorted[qb_stats_sorted['week'] == 4]

KeyError: "['game_id'] not in index"

In [19]:
qb_stats_sorted[qb_stats_sorted['player_name'].str.contains('S.Dar')]

Unnamed: 0,week,player_name,attempts,passing_epa,completions,passing_yards,passing_air_yards,passing_first_downs,passing_yards_after_catch,rank,completion_pct,yards_after_catch,first_down_change
364,1,S.Darnold,24,9.554716,19,208.0,195.0,8.0,68.0,98.0,0.791667,13.0,
365,2,S.Darnold,26,-1.314427,17,268.0,169.0,9.0,152.0,87.0,0.653846,99.0,1.0
366,3,S.Darnold,28,5.252612,17,181.0,191.0,12.0,67.0,75.0,0.607143,-10.0,3.0
367,4,S.Darnold,28,8.126838,20,275.0,289.0,11.0,115.0,75.0,0.714286,-14.0,-1.0


In [13]:
stats.columns

Index(['player_id', 'player_name', 'player_display_name', 'position',
       'position_group', 'headshot_url', 'recent_team', 'season', 'week',
       'season_type', 'opponent_team', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr', 'special_teams_tds', 'fantasy_points

In [16]:
snap_count.columns

Index(['game_id', 'pfr_game_id', 'season', 'game_type', 'week', 'player',
       'pfr_player_id', 'position', 'team', 'opponent', 'offense_snaps',
       'offense_pct', 'defense_snaps', 'defense_pct', 'st_snaps', 'st_pct'],
      dtype='object')

In [46]:
# player with the highest wopr
rb_stats = stats[stats['position'] == 'RB'][[
    'week',
    'player_name',
    'carries',
    'rushing_yards',
    'rushing_tds',
    'rushing_fumbles',
    'rushing_fumbles_lost',
    'rushing_first_downs',
    'rushing_epa',
    'rushing_2pt_conversions',
    'receptions',
    'targets'
]]
rb_stats['carries_rank'] = rb_stats['carries'].rank(
    method='min', ascending=False)
rb_stats['ypc'] = rb_stats['rushing_yards'] / rb_stats['carries']
# calculate week over week change of first downs
rb_stats['first_down_change'] = rb_stats.groupby(
    'player_name')['rushing_first_downs'].diff()
rb_stats_sorted = rb_stats.sort_values('rushing_epa', ascending=False)
rb_stats_sorted[rb_stats_sorted['week'] == 4].head(20)

Unnamed: 0,week,player_name,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,receptions,targets,carries_rank,ypc,first_down_change
114,4,D.Henry,24,199.0,1,1.0,0.0,8.0,8.636214,0,3,3,7.0,8.291667,0.0
256,4,J.McNichols,8,68.0,2,0.0,0.0,4.0,6.949106,0,1,1,146.0,8.5,4.0
924,4,T.Bigsby,7,90.0,0,0.0,0.0,2.0,5.491012,0,0,0,159.0,12.857143,1.0
947,4,C.Brown,15,80.0,2,0.0,0.0,4.0,5.176807,0,2,3,61.0,5.333333,0.0
828,4,B.Robinson,21,101.0,1,0.0,0.0,6.0,4.563953,0,3,3,17.0,4.809524,4.0
1072,4,J.Gibbs,14,78.0,2,0.0,0.0,5.0,3.966433,0,0,0,68.0,5.571429,1.0
858,4,K.Williams,19,94.0,1,0.0,0.0,6.0,3.755381,0,4,4,26.0,4.947368,-1.0
376,4,A.Mattison,5,60.0,0,0.0,0.0,3.0,3.578498,0,0,1,191.0,12.0,2.0
769,4,J.Ford,10,58.0,0,0.0,0.0,4.0,3.283787,0,7,7,112.0,5.8,0.0
1130,4,B.Irving,10,49.0,1,0.0,0.0,3.0,3.119077,0,1,2,112.0,4.9,0.0
