# Imports and Config

In [3]:
import nfl_data_py as nfl #Main library used for accessing NFL data
import matplotlib.pyplot as plt #Used for more dynamic visualization
import pandas as pd #Main library used for storing, manipulating, and processing data
pd.set_option('display.max_columns', None) #Allow more pandas dataframes to be shown during prints
from tqdm.notebook import tqdm, tqdm_notebook #Used to display a progress bar during length dataframe operations
tqdm.pandas() #Necessary function for using with pandas

In [4]:
# Set range for years to analyze (where first_year is inclusive, and last_year is exclusive
first_year = 2004
last_year = 2023
#Set number of games behind game to analyze to cover stats for 
game_coverage = 5

# Positional Data

## Raw PBP

In [5]:
pbp = nfl.import_pbp_data(range(first_year,last_year)) #Built in function collecting raw pbp data within year range
pbp = pbp[pbp["season_type"].isin(["REG"])].reset_index(drop=True) #Only take data from regular season (no post season)
pbp["year"] = pbp["game_id"].str[:4] #Use substring of game_id to create new column "year"
pbp["year"] = pbp["year"].astype(int) #Cast all elements of new column to int (rather than string)
display(pbp.head(1)) #Print sample of result
display(pbp.tail(1)) #Print sample of result

2004 done.
2005 done.
2006 done.
2007 done.
2008 done.
2009 done.
2010 done.
2011 done.
2012 done.
2013 done.
2014 done.
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
Downcasting floats.


Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe,nflverse_game_id,possession_team,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense,ngs_air_yards,time_to_throw,was_pressure,route,defense_man_zone_type,defense_coverage_type,year
0,1.0,2004_01_ARI_STL,2004091208,LA,ARI,REG,1,,,,,,2004-09-12,900.0,1800.0,3600.0,Half1,0.0,,0.0,1.0,,0.0,15:00,LA 30,0.0,,GAME,,,0.0,0.0,,0.0,0.0,0.0,,,,,,,,,,,3.0,3.0,,,,,,,,0.0,0.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028149,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,0.422024,0.577976,0.577976,0.422024,-0.0,-0.0,0.0,,,0.157772,0.842228,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,2004,,,1.0,1.0,First down,1.0,13:00:00,,Edward Jones Dome,"Temp: ° F, Wind: mph",10160000-0269-1882-ad83-9c5a60528a04,0,0.0,GAME_START,0.0,,,,1.0,Punt,,,,,,,,,,,,,,,,,,10,17,Home,7,27,11.0,46.0,1,dome,astroturf,,,Mike Martz,Dennis Green,STL00,Edward Jones Dome,0.0,0.0,,,,,,,0.0,0.0,,0.0,0.0,,,,,,,,,,,0.0,0.0,-0.0,,,,,,,,,,,,,,,,,,,,,,,,,,2004


Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe,nflverse_game_id,possession_team,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense,ngs_air_yards,time_to_throw,was_pressure,route,defense_man_zone_type,defense_coverage_type,year
877227,3757.0,2022_18_TEN_JAX,2023010701,JAX,TEN,REG,18,,,,,,2023-01-07,0.0,0.0,0.0,Half2,0.0,19.0,0.0,4.0,,0.0,00:00,,0.0,-4.0,END GAME,,,0.0,0.0,,0.0,0.0,0.0,,,,,,,,,,,2.0,1.0,,,,,,,,20.0,16.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,3.285143,-3.285143,-3.005784,3.005784,4.912541,-4.912541,,,,,2.340791,-2.340791,-6.620955,6.620955,2.970454,-2.970454,-8.391386,8.391386,,,1.0,0.0,,,,,,,1.0,-0.340631,0.340631,0.763074,-0.763074,,,,,-0.398616,0.398616,0.58716,-0.58716,-0.428301,0.428301,0.557185,-0.557185,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,2022,,,49.0,0.0,QB kneel,3757.0,20:15:00,,TIAA Bank Field,"Temp: 59° F, Humidity: 54%, Wind: SE 4 mph",10160000-0591-0032-9c44-9c9ce66dfcee,0,0.0,END_GAME,0.0,,,,19.0,End of half,,2.0,1:30,0.0,0.0,0.0,4.0,4.0,0.0,DOWNS,END_GAME,01:30,00:00,JAX 44,JAX 42,3708.0,3757.0,16,20,Home,4,36,6.0,39.5,1,outdoors,grass,56.0,4.0,Doug Pederson,Mike Vrabel,JAX00,TIAA Bank Stadium,0.0,,,,,,,,0.0,0.0,,0.0,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,,2022_18_TEN_JAX,,,,,,,,,,0.0,0.0,,,,,,,2022


## Raw Weekly

In [6]:
weekly = nfl.import_weekly_data(range(first_year,last_year)) #Built in function collecting raw weekly data within year range
weekly = weekly[weekly["season_type"].isin(["REG"])].reset_index(drop=True) #Only take data from regular season (no post season)
weekly[0:5] #Print sample of result

Downcasting floats.


Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_lost,passing_air_yards,passing_yards_after_catch,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,00-0000007,,Rabih Abdullah,RB,RB,,NE,2004,2,REG,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,,0,,,1,4.0,0,0.0,0.0,1.0,0.921181,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0,,,,,0.0,0.4,0.4
1,00-0000007,,Rabih Abdullah,RB,RB,,NE,2004,5,REG,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,,0,,,5,4.0,1,0.0,0.0,1.0,-1.904898,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0,,,,,0.0,6.4,6.4
2,00-0000007,,Rabih Abdullah,RB,RB,,NE,2004,6,REG,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,,0,,,0,0.0,0,0.0,0.0,0.0,,0,1,1,9.0,0,0.0,0.0,0.0,0.0,0.0,-0.472901,0,0.0,0.034483,0.0,0.0,0.0,0.9,1.9
3,00-0000007,,Rabih Abdullah,RB,RB,,NE,2004,10,REG,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,,0,,,4,-5.0,0,0.0,0.0,0.0,-3.20626,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0,,,,,0.0,-0.5,-0.5
4,00-0000007,,Rabih Abdullah,RB,RB,,NE,2004,16,REG,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,,0,,,1,5.0,0,0.0,0.0,1.0,-0.778663,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0,,,,,0.0,0.5,0.5


In [8]:
def date_and_team_to_other_team_vectorized(row, pbp):
    # Subset pbp DataFrame based on 'season' and 'week'
    curr_game = pbp[(pbp['year'] == row['season']) & (pbp['week'] == row['week'])]

    # Identify rows where the team is the away team
    away_team_rows = curr_game['away_team'] == row['recent_team']
    if away_team_rows.any():
        return curr_game.loc[away_team_rows, 'home_team'].iloc[0]

    # Identify rows where the team is the home team
    home_team_rows = curr_game['home_team'] == row['recent_team']
    if home_team_rows.any():
        return curr_game.loc[home_team_rows, 'away_team'].iloc[0]

    # Return an empty string if no match is found (bye week)
    return ''

# Apply the function using tqdm_apply for progress tracking
tqdm.pandas()
weekly['opponent_team'] = weekly.progress_apply(date_and_team_to_other_team_vectorized, args=(pbp,), axis=1)

# Print sample of the result
print(weekly[['season', 'week', 'recent_team', 'opponent_team']].head())

  0%|          | 0/95019 [00:00<?, ?it/s]

   season  week recent_team opponent_team
0    2004     2          NE           ARI
1    2004     5          NE           MIA
2    2004     6          NE           SEA
3    2004    10          NE           BUF
4    2004    16          NE           NYJ


In [9]:
'''#Input year, week, and team of player
#Output what team they faced at that time. Necessary because data before 2022 does not contain opposing team information
def date_and_team_to_other_team(year, week, team):
    
    curr_game = pbp[pbp["week"].isin([week])] #Isolate pbp data to only the week of the game currently being played
    curr_game = curr_game[curr_game["year"].isin([year])] #Isolate pbp data to only the year of the game currently being played
    
    away_check = curr_game[curr_game["away_team"].isin([team])].reset_index(drop=True) #Option 1: Input team is the AWAY TEAM in pbp data
    home_check = curr_game[curr_game["home_team"].isin([team])].reset_index(drop=True) #Option 2: Input team is the HOME TEAM in pbp data

    #If option 1, return cooresponding opposing team
    if home_check.empty and not away_check.empty:
        return away_check["home_team"].at[0]
        
    #Else if option 2, return cooresponding opposing team    
    elif away_check.empty and not home_check.empty:
        return home_check["away_team"].at[0] 

    #Else, must be a bye-week for the team since there is no pbp data in the criteria. Return an empty string
    return " "'''

'#Input year, week, and team of player\n#Output what team they faced at that time. Necessary because data before 2022 does not contain opposing team information\ndef date_and_team_to_other_team(year, week, team):\n    \n    curr_game = pbp[pbp["week"].isin([week])] #Isolate pbp data to only the week of the game currently being played\n    curr_game = curr_game[curr_game["year"].isin([year])] #Isolate pbp data to only the year of the game currently being played\n    \n    away_check = curr_game[curr_game["away_team"].isin([team])].reset_index(drop=True) #Option 1: Input team is the AWAY TEAM in pbp data\n    home_check = curr_game[curr_game["home_team"].isin([team])].reset_index(drop=True) #Option 2: Input team is the HOME TEAM in pbp data\n\n    #If option 1, return cooresponding opposing team\n    if home_check.empty and not away_check.empty:\n        return away_check["home_team"].at[0]\n        \n    #Else if option 2, return cooresponding opposing team    \n    elif away_check.empt

In [10]:
#Add opponent_team column to raw weekly data. Necessary for processing defensive performance. Uses a TQDM progress bar
#TODO: See if vectorization can speed up processing time. Currently takes a significant amount of time to run (approx. 3.5 minutes/year of data)
'''weekly["opponent_team"] = weekly.progress_apply(lambda row: date_and_team_to_other_team(row["season"], row["week"], row["recent_team"]), axis=1)
weekly[0:5] #Print sample of result'''

'weekly["opponent_team"] = weekly.progress_apply(lambda row: date_and_team_to_other_team(row["season"], row["week"], row["recent_team"]), axis=1)\nweekly[0:5] #Print sample of result'

## QB

In [11]:
qb = weekly[weekly["position"].isin(["QB"])].reset_index(drop=True) #Create new dataframe for relevant QB information
qb["label"] = qb.progress_apply(lambda row: row["player_id"] + ":" + str(row["season"]) + ":" + str(row["week"]).zfill(2) + ":" + row["recent_team"], axis=1)
qb = qb.loc[:, ["label",
                "player_id",
                "week",
                "season",
                "opponent_team",
                 "completions",
                 "attempts",
                 "passing_yards",
                 "passing_tds",
                 "interceptions",
                 "sacks",
                 "sack_fumbles",
                 "passing_air_yards",
                 "passing_yards_after_catch",
                 "passing_first_downs",
                 "passing_epa",
                 "passing_2pt_conversions",
                 "pacr",
                 "dakota",
                 "carries",
                 "rushing_yards",
                 "rushing_tds",
                 "rushing_fumbles",
                 "rushing_first_downs",
                 "rushing_epa",
                 "rushing_2pt_conversions",
                 "fantasy_points"]]
qb[0:5]

  0%|          | 0/11674 [00:00<?, ?it/s]

Unnamed: 0,label,player_id,week,season,opponent_team,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_fumbles,passing_air_yards,passing_yards_after_catch,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_first_downs,rushing_epa,rushing_2pt_conversions,fantasy_points
0,00-0000722:2004:08:HOU,00-0000722,8,2004,JAX,0,1,0.0,0,0.0,0.0,0,0.0,0.0,0.0,-1.062238,0,,,0,0.0,0,0.0,0.0,,0,0.0
1,00-0000722:2004:17:HOU,00-0000722,17,2004,CLE,1,1,16.0,0,0.0,0.0,0,0.0,0.0,1.0,2.153744,0,0.0,,0,0.0,0,0.0,0.0,,0,0.64
2,00-0001335:2004:10:PHI,00-0001335,10,2004,DAL,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,,0,,,2,-2.0,0,0.0,0.0,-0.572323,0,-0.2
3,00-0001335:2004:16:PHI,00-0001335,16,2004,LA,4,14,41.0,0,0.0,1.0,1,0.0,0.0,2.0,-8.568142,0,0.0,0.04035,1,8.0,0,0.0,0.0,0.480395,0,0.44
4,00-0001335:2004:17:PHI,00-0001335,17,2004,CIN,14,23,85.0,1,1.0,1.0,0,0.0,0.0,5.0,-10.627749,0,0.0,0.027942,0,0.0,0,0.0,0.0,,0,5.4


## RB

In [12]:
'''rb = weekly[weekly["position"].isin(["RB"])].reset_index(drop=True) #Create new dataframe for relevant RB information
rb["label"] = rb.progress_apply(lambda row: row["player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + row["recent_team"], axis=1)
rb = rb.loc[:, ["label",
                "player_id",
                 "season",
                 "week",
                 "carries",
                 "rushing_yards",
                 "rushing_tds",
                 "rushing_fumbles",
                 "rushing_first_downs",
                 "rushing_epa",
                 "rushing_2pt_conversions",
                 "receptions",
                 "targets",
                 "receiving_yards",
                 "receiving_tds",
                 "receiving_fumbles",
                 "receiving_air_yards",
                 "receiving_yards_after_catch",
                 "receiving_first_downs",
                 "receiving_epa",
                 "receiving_2pt_conversions",
                 "racr",
                 "target_share",
                 "air_yards_share",
                 "wopr",
                 "special_teams_tds", 
                 "fantasy_points",
                 "fantasy_points_ppr"]]
rb[0:5] #Print sample of result'''

'rb = weekly[weekly["position"].isin(["RB"])].reset_index(drop=True) #Create new dataframe for relevant RB information\nrb["label"] = rb.progress_apply(lambda row: row["player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + row["recent_team"], axis=1)\nrb = rb.loc[:, ["label",\n                "player_id",\n                 "season",\n                 "week",\n                 "carries",\n                 "rushing_yards",\n                 "rushing_tds",\n                 "rushing_fumbles",\n                 "rushing_first_downs",\n                 "rushing_epa",\n                 "rushing_2pt_conversions",\n                 "receptions",\n                 "targets",\n                 "receiving_yards",\n                 "receiving_tds",\n                 "receiving_fumbles",\n                 "receiving_air_yards",\n                 "receiving_yards_after_catch",\n                 "receiving_first_downs",\n                 "receiving_epa",\n                 "receiving

## WR

In [13]:
'''wr = weekly[weekly["position"].isin(["WR"])].reset_index(drop=True) #Create new dataframe for relevant WR information
wr["label"] = wr.progress_apply(lambda row: row["player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + row["recent_team"], axis=1)
wr = wr.loc[:, ["label",
                 "season",
                 "week",
                 "player_id",
                 "receptions",
                 "targets",
                 "receiving_yards",
                 "receiving_tds",
                 "receiving_fumbles",
                 "receiving_air_yards",
                 "receiving_yards_after_catch",
                 "receiving_first_downs",
                 "receiving_epa",
                 "receiving_2pt_conversions",
                 "racr",
                 "target_share",
                 "air_yards_share",
                 "wopr",
                 "special_teams_tds", 
                 "fantasy_points",
                 "fantasy_points_ppr"]]
wr[0:5] #Print sample of result'''

'wr = weekly[weekly["position"].isin(["WR"])].reset_index(drop=True) #Create new dataframe for relevant WR information\nwr["label"] = wr.progress_apply(lambda row: row["player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + row["recent_team"], axis=1)\nwr = wr.loc[:, ["label",\n                 "season",\n                 "week",\n                 "player_id",\n                 "receptions",\n                 "targets",\n                 "receiving_yards",\n                 "receiving_tds",\n                 "receiving_fumbles",\n                 "receiving_air_yards",\n                 "receiving_yards_after_catch",\n                 "receiving_first_downs",\n                 "receiving_epa",\n                 "receiving_2pt_conversions",\n                 "racr",\n                 "target_share",\n                 "air_yards_share",\n                 "wopr",\n                 "special_teams_tds", \n                 "fantasy_points",\n                 "fantasy_points_

## TE

In [14]:
'''te = weekly[weekly["position"].isin(["TE"])].reset_index(drop=True) #Create new dataframe for relevant TE information
te["label"] = te.progress_apply(lambda row: row["player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + row["recent_team"], axis=1)
te = te.loc[:, ["label", 
                 "season",
                 "week",
                 "player_id",
                 "receptions",
                 "targets",
                 "receiving_yards",
                 "receiving_tds",
                 "receiving_fumbles",
                 "receiving_air_yards",
                 "receiving_yards_after_catch",
                 "receiving_first_downs",
                 "receiving_epa",
                 "receiving_2pt_conversions",
                 "racr",
                 "target_share",
                 "air_yards_share",
                 "wopr",
                 "special_teams_tds", 
                 "fantasy_points",
                 "fantasy_points_ppr"]]
te[0:5] #Print sample of result'''

'te = weekly[weekly["position"].isin(["TE"])].reset_index(drop=True) #Create new dataframe for relevant TE information\nte["label"] = te.progress_apply(lambda row: row["player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + row["recent_team"], axis=1)\nte = te.loc[:, ["label", \n                 "season",\n                 "week",\n                 "player_id",\n                 "receptions",\n                 "targets",\n                 "receiving_yards",\n                 "receiving_tds",\n                 "receiving_fumbles",\n                 "receiving_air_yards",\n                 "receiving_yards_after_catch",\n                 "receiving_first_downs",\n                 "receiving_epa",\n                 "receiving_2pt_conversions",\n                 "racr",\n                 "target_share",\n                 "air_yards_share",\n                 "wopr",\n                 "special_teams_tds", \n                 "fantasy_points",\n                 "fantasy_points

## K

In [15]:
'''roster = nfl.import_seasonal_rosters(range(first_year,last_year)) #Built in function collecting raw roster data within year range
roster = roster[roster["depth_chart_position"].isin(["K"])].reset_index(drop=True) #Only take data from desired positions
roster = roster.loc[:, ["season",
                        "team",
                        "week",
                        "player_id"]]
roster #Print sample of results'''

'roster = nfl.import_seasonal_rosters(range(first_year,last_year)) #Built in function collecting raw roster data within year range\nroster = roster[roster["depth_chart_position"].isin(["K"])].reset_index(drop=True) #Only take data from desired positions\nroster = roster.loc[:, ["season",\n                        "team",\n                        "week",\n                        "player_id"]]\nroster #Print sample of results'

In [16]:
'''#Modular so it can be adjusted depending on league settings

def kicker_fg_fantasy_points(made, dist):
    if made:
        if dist>=50:
            return 5
        return 3
    return 0

def kicker_ep_fantasy_points(made):
    if made:
        return 1
    return 0'''

'#Modular so it can be adjusted depending on league settings\n\ndef kicker_fg_fantasy_points(made, dist):\n    if made:\n        if dist>=50:\n            return 5\n        return 3\n    return 0\n\ndef kicker_ep_fantasy_points(made):\n    if made:\n        return 1\n    return 0'

In [17]:
'''k_pbp = pbp[pbp["play_type"].isin(["field_goal","extra_point"])].reset_index(drop=True) #Create new dataframe for relevant K information
k_pbp = k_pbp[k_pbp["season_type"].isin(["REG"])]
#k_pbp["label"] = k_pbp.progress_apply(lambda row: row["kicker_player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + str(row["possession_team"]), axis=1)
k_pbp = k_pbp.loc[:, ["kicker_player_id",
                    "week",
                    "season",
                    "posteam",
                    "play_type",
                    "kick_distance",
                    "field_goal_result",
                    "extra_point_result"]]
print((k_pbp["posteam"]))

unique_kickers = k_pbp["kicker_player_id"].unique()

unique_kickers'''

'k_pbp = pbp[pbp["play_type"].isin(["field_goal","extra_point"])].reset_index(drop=True) #Create new dataframe for relevant K information\nk_pbp = k_pbp[k_pbp["season_type"].isin(["REG"])]\n#k_pbp["label"] = k_pbp.progress_apply(lambda row: row["kicker_player_id"] + ":" + str(row["week"]) + ":" + str(row["season"]) + ":" + str(row["possession_team"]), axis=1)\nk_pbp = k_pbp.loc[:, ["kicker_player_id",\n                    "week",\n                    "season",\n                    "posteam",\n                    "play_type",\n                    "kick_distance",\n                    "field_goal_result",\n                    "extra_point_result"]]\nprint((k_pbp["posteam"]))\n\nunique_kickers = k_pbp["kicker_player_id"].unique()\n\nunique_kickers'

In [18]:
'''kick_dict = {}

for x in unique_kickers:

    season = last_year-1 #Upper bound for season year
    week=18 #Upper bound for week in season
    
    while(season>=first_year): #Not exceeding accepted range
        
        while(week>0): #Not exceeding accepted week
            
            curr_kick = k_pbp[k_pbp["week"].isin([week])] #Isolate pbp data to only the week of the game currently being played
            #print("Week Filter:" + str(curr_kick))
            curr_kick = curr_kick[curr_kick["season"].isin([season])] #Isolate pbp data to only the year of the game currently being played
            #print("Season Filter:" + str(curr_kick))
            curr_kick = curr_kick[curr_kick["kicker_player_id"].isin([x])].reset_index(drop=True) #Isolate pbp data to only when the kicker is the input player id
            #print("ID Filter:" + str(curr_kick))
            
            if(not curr_kick.empty):

                #print(curr_kick)

                possession_team = curr_kick["posteam"].at[0]
                
                fgs = curr_kick[curr_kick["play_type"].isin(["field_goal"])].reset_index(drop=True)
                eps = curr_kick[curr_kick["play_type"].isin(["extra_point"])].reset_index(drop=True)
    
                fgs["field_goal_result"] = fgs.apply(lambda row: row["field_goal_result"]=="made", axis=1)
                eps["extra_point_result"] = eps.apply(lambda row: row["extra_point_result"]=="good", axis=1)
    
                season_num = curr_kick.at[0,"season"]
                week_num = curr_kick.at[0,"week"]
                num_fgs = len(fgs)
                num_eps = len(eps)
                avg_fg_dist = fgs["kick_distance"].mean()
                
                
    
                if len(fgs)!=0:
                    fg_pctg = fgs["field_goal_result"].sum()/len(fgs)
                else:
                    fg_pctg=0
                    
                if len(eps)!=0:
                    ep_pctg = eps["extra_point_result"].sum()/len(eps)
                else:
                    ep_pctg=0
    
                fgs["fantasy_points"] = fgs.apply(lambda row: kicker_fg_fantasy_points(row["field_goal_result"],row["kick_distance"]), axis=1)
                eps["fantasy_points"] = eps.apply(lambda row: kicker_ep_fantasy_points(row["extra_point_result"]), axis=1)
    
                fantasy_points = fgs["fantasy_points"].sum() + eps["fantasy_points"].sum()

                new_id = f"{x}:{str(week)}:{str(season)}:" + str(possession_team)

                kick_dict[new_id] = [len(fgs),len(eps),avg_fg_dist,fg_pctg,ep_pctg,fantasy_points,x,new_id]
                
    
            week-=1 #Repeat inner loop with data from a week ago  
    
        week=18 #Reset week counter
        season-=1 #Repeat outer loop with data from a season ago 

kick_dict'''

'kick_dict = {}\n\nfor x in unique_kickers:\n\n    season = last_year-1 #Upper bound for season year\n    week=18 #Upper bound for week in season\n    \n    while(season>=first_year): #Not exceeding accepted range\n        \n        while(week>0): #Not exceeding accepted week\n            \n            curr_kick = k_pbp[k_pbp["week"].isin([week])] #Isolate pbp data to only the week of the game currently being played\n            #print("Week Filter:" + str(curr_kick))\n            curr_kick = curr_kick[curr_kick["season"].isin([season])] #Isolate pbp data to only the year of the game currently being played\n            #print("Season Filter:" + str(curr_kick))\n            curr_kick = curr_kick[curr_kick["kicker_player_id"].isin([x])].reset_index(drop=True) #Isolate pbp data to only when the kicker is the input player id\n            #print("ID Filter:" + str(curr_kick))\n            \n            if(not curr_kick.empty):\n\n                #print(curr_kick)\n\n                possessi

In [19]:
'''k = pd.DataFrame.from_dict(kick_dict,orient="index")
k = k.rename(columns={0:"num_fgs",1:"num_eps",2:"avg_fg_dist",3:"fg_pctg",4:"ep_pctg",5:"fantasy_points",6:"player_id",7:"label"})
k'''

'k = pd.DataFrame.from_dict(kick_dict,orient="index")\nk = k.rename(columns={0:"num_fgs",1:"num_eps",2:"avg_fg_dist",3:"fg_pctg",4:"ep_pctg",5:"fantasy_points",6:"player_id",7:"label"})\nk'

## Defense

In [20]:
#Inputs team name
#Outputs performance history across team history in list format, where [0] is their most recent game and [len-1] is their first recorded game

def get_defensive_performance_history(team):
    
    season = last_year-1 #Upper bound for season year
    week=18 #Upper bound for week in season

    #pd.DataFrame(data={'season': season_num, 'week': week_num, 'defending_team': defending_team, 'offensive_team': offensive_team, 'interceptions': interceptions, 'sacks': sacks, 'sack_yards': sack_yards, 'sack_fumbles': sack_fumbles, 'sack_fumbles_recovered': sack_fumbles_recovered, 'receiving_fumbles': receiving_fumbles, 'receiving_fumbles_recovered': receiving_fumbles_recovered, 'rushing_yards_allowed': rushing_yards_allowed, 'passing_yards_allowed': passing_yards_allowed, 'passing_tds_allowed': passing_tds_allowed, 'rushing_tds_allowed': rushing_tds_allowed, 'special_teams_tds_allowed': special_teams_tds_allowed},index=[f'{season_num}-{week_num}-{defending_team}'])
    return_df = pd.DataFrame(columns=["season","week","defending_team","offensive_team","interceptions","sacks","sack_yards","sack_fumbles","sack_fumbles_recovered","receiving_fumbles","receiving_fumbles_recovered","rushing_yards_allowed","passing_yards_allowed","passing_tds_allowed","rushing_tds_allowed","special_teams_tds_allowed"])
    #display(return_df)

    while(season>=first_year): #Not exceeding accepted range
        
        while(week>0): #Not exceeding accepted week
            
            curr_week = weekly[weekly["week"].isin([week])] #Isolate weekly data to only the week of the game currently being played
            curr_week = curr_week[curr_week["season"].isin([season])] #Isolate weekly data to only the year of the game currently being played
            curr_week = curr_week[curr_week["opponent_team"].isin([team])].reset_index(drop=True) #Isolate weekly data to only when the defending team is the input team

            #display(curr_week)
            
            #Construct dataset using weekly data for current week, season, and defending team
            #Note: opponents stats now reflect the defensive performance, just inverted (E.G. rushing_yards now is rushing_yards allowed)
            if not curr_week.empty:
                season_num = curr_week.at[0,"season"]
                #print(season_num)
                week_num = curr_week.at[0,"week"]
                defending_team = curr_week.at[0,"opponent_team"]
                offensive_team = curr_week.at[0,"recent_team"]
                interceptions = curr_week[["interceptions"]].sum().iloc[0]
                sacks = curr_week[["sacks"]].sum().iloc[0]
                sack_yards = curr_week[["sack_yards"]].sum().iloc[0]
                sack_fumbles = curr_week[["sack_fumbles"]].sum().iloc[0]
                sack_fumbles_recovered = curr_week[["sack_fumbles_lost"]].sum().iloc[0]
                receiving_fumbles = curr_week[["receiving_fumbles"]].sum().iloc[0]
                receiving_fumbles_recovered = curr_week[["receiving_fumbles_lost"]].sum().iloc[0]
                rushing_yards_allowed = curr_week[["rushing_yards"]].sum().iloc[0]
                passing_yards_allowed = curr_week[["passing_yards"]].sum().iloc[0]
                passing_tds_allowed = curr_week[["passing_tds"]].sum().iloc[0]
                rushing_tds_allowed = curr_week[["rushing_tds"]].sum().iloc[0]
                special_teams_tds_allowed = curr_week[["special_teams_tds"]].sum().iloc[0]
                new_df = pd.DataFrame(data={'season': season_num, 'week': week_num, 'defending_team': defending_team, 'offensive_team': offensive_team, 'interceptions': interceptions, 'sacks': sacks, 'sack_yards': sack_yards, 'sack_fumbles': sack_fumbles, 'sack_fumbles_recovered': sack_fumbles_recovered, 'receiving_fumbles': receiving_fumbles, 'receiving_fumbles_recovered': receiving_fumbles_recovered, 'rushing_yards_allowed': rushing_yards_allowed, 'passing_yards_allowed': passing_yards_allowed, 'passing_tds_allowed': passing_tds_allowed, 'rushing_tds_allowed': rushing_tds_allowed, 'special_teams_tds_allowed': special_teams_tds_allowed},index=[f'{season_num}-{week_num}-{defending_team}'])
                return_df = pd.concat([return_df,new_df])
                #display(return_df)
                
            week-=1 #Repeat inner loop with data from a week ago  
            
        week=18 #Reset week counter
        season-=1 #Repeat outer loop with data from a season ago  
        
    return return_df #Array with appended data from all defensive activities

In [21]:
#Empty dictionary for each team abbreviation
team_defenses = {
    "NE" : None,
    "NO" : None,
    "NYJ": None,
    "LAC": None,
    "ATL": None,
    "NYG": None,
    "ARI": None,
    "PIT": None,
    "WAS": None,
    "GB" : None,
    "MIA": None,
    "PHI": None,
    "BUF": None,
    "DET": None,
    "TB" : None,
    "SEA": None,
    "TEN": None,
    "BAL": None,
    "LV" : None,
    "SF" : None,
    "CAR": None,
    "KC" : None,
    "JAX": None,
    "CHI": None,
    "LA" : None,
    "DEN": None,
    "HOU": None,
    "CIN": None,
    "MIN": None,
    "CLE": None,
    "IND": None,
    "DAL": None
}

#Populate dictionary with cooresponding team's history of defensive performances
for x in team_defenses:
    team_defenses[x] = get_defensive_performance_history(x)

team_defenses["CIN"] #Print sample of result

  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_df,new_df])
  return_df = pd.concat([return_

Unnamed: 0,season,week,defending_team,offensive_team,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_recovered,receiving_fumbles,receiving_fumbles_recovered,rushing_yards_allowed,passing_yards_allowed,passing_tds_allowed,rushing_tds_allowed,special_teams_tds_allowed
2022-18-CIN,2022,18,CIN,BAL,2.0,4.0,10.0,1,1,1.0,1.0,110.0,286.0,0,1,0.0
2022-16-CIN,2022,16,CIN,NE,0.0,4.0,16.0,0,0,1.0,0.0,61.0,240.0,2,0,0.0
2022-15-CIN,2022,15,CIN,TB,2.0,1.0,8.0,1,1,0.0,0.0,92.0,312.0,3,0,0.0
2022-14-CIN,2022,14,CIN,CLE,1.0,2.0,3.0,0,0,0.0,0.0,71.0,276.0,1,0,0.0
2022-13-CIN,2022,13,CIN,KC,0.0,2.0,12.0,0,0,1.0,1.0,138.0,223.0,1,2,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2004-6-CIN,2004,6,CIN,CLE,2.0,1.0,0.0,0,0,0.0,0.0,139.0,310.0,4,0,0.0
2004-4-CIN,2004,4,CIN,PIT,0.0,1.0,6.0,0,0,0.0,0.0,165.0,174.0,1,2,0.0
2004-3-CIN,2004,3,CIN,BAL,0.0,2.0,0.0,1,1,1.0,0.0,254.0,126.0,1,2,0.0
2004-2-CIN,2004,2,CIN,MIA,2.0,2.0,17.0,0,0,0.0,0.0,25.0,218.0,1,0,0.0


In [22]:
team_defenses["MIN"]

Unnamed: 0,season,week,defending_team,offensive_team,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_recovered,receiving_fumbles,receiving_fumbles_recovered,rushing_yards_allowed,passing_yards_allowed,passing_tds_allowed,rushing_tds_allowed,special_teams_tds_allowed
2022-18-MIN,2022,18,MIN,CHI,2.0,1.0,6.0,0,0,0.0,0.0,118.0,147.0,1,1,0.0
2022-17-MIN,2022,17,MIN,GB,0.0,1.0,16.0,0,0,0.0,0.0,163.0,168.0,1,2,1.0
2022-16-MIN,2022,16,MIN,NYG,1.0,3.0,15.0,1,0,1.0,1.0,126.0,334.0,1,1,0.0
2022-15-MIN,2022,15,MIN,IND,0.0,3.0,12.0,1,0,0.0,0.0,171.0,182.0,1,0,1.0
2022-14-MIN,2022,14,MIN,DET,0.0,0.0,0.0,0,0,0.0,0.0,134.0,330.0,3,1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2004-6-MIN,2004,6,MIN,NO,1.0,3.0,23.0,1,0,0.0,0.0,159.0,249.0,1,3,0.0
2004-5-MIN,2004,5,MIN,HOU,0.0,4.0,14.0,0,0,0.0,0.0,52.0,372.0,3,1,0.0
2004-3-MIN,2004,3,MIN,CHI,0.0,2.0,9.0,1,1,0.0,0.0,146.0,248.0,0,2,0.0
2004-2-MIN,2004,2,MIN,PHI,0.0,2.0,19.0,0,0,1.0,1.0,91.0,245.0,2,1,0.0


# Averages Data

## QB

In [23]:
count = 0

qb = qb.sort_values(by="label")

curr_id_count = 0

prev_id = " "

qb_nn_df = pd.DataFrame()
for x in range(1,game_coverage+1):
    new_columns = pd.DataFrame(columns=[
                        f"{x}_completions",
                        f"{x}_attempts",
                        f"{x}_passing_yards",
                        f"{x}_passing_tds",
                        f"{x}_interceptions",
                        f"{x}_sacks",
                        f"{x}_sack_fumbles",
                        f"{x}_passing_air_yards",
                        f"{x}_passing_yards_after_catch",
                        f"{x}_passing_first_downs",
                        f"{x}_passing_epa",
                        f"{x}_passing_2pt_conversions",
                        f"{x}_pacr",
                        f"{x}_dakota",
                        f"{x}_carries",
                        f"{x}_rushing_yards",
                        f"{x}_rushing_tds",
                        f"{x}_rushing_fumbles",
                        f"{x}_rushing_first_downs",
                        f"{x}_rushing_epa",
                        f"{x}_rushing_2pt_conversions",
                        f"{x}_fantasy_points",

                        f"{x}_opp_interceptions",
                        f"{x}_opp_sacks",
                        f"{x}_opp_sack_yards",
                        f"{x}_opp_sack_fumbles",
                        f"{x}_opp_sack_fumbles_recovered",
                        f"{x}_opp_receiving_fumbles",
                        f"{x}_opp_receiving_fumbles_recovered",
                        f"{x}_opp_rushing_yards_allowed",
                        f"{x}_opp_passing_yards_allowed",
                        f"{x}_opp_passing_tds_allowed",
                        f"{x}_opp_rushing_tds_allowed",
                        f"{x}_opp_special_teams_tds_allowed"])
    
    qb_nn_df = pd.concat([qb_nn_df, new_columns])

qb_nn_df["label"] = qb["label"]


#NEW : NEED TO ADD TO OTHER POS GROUPS
qb_nn_df = pd.concat([qb_nn_df,pd.DataFrame(columns=["fantasy_points"])])

#print(qb_nn_df)

qb_nn_df.set_index("label", inplace = True)

for x in qb["label"]:

    player_id, season, week, recent_team = x.split(":")
    week = int(week)
    season = int(season)

    if prev_id != player_id:
        
        prev_id = player_id
        curr_id_count=0

    if curr_id_count == 0:

        qb_nn_df = qb_nn_df.drop(index=x)
        print("Curr ID Count: " + str(curr_id_count) + ", DF based on draft pick")
        print(x)
        
    elif curr_id_count < game_coverage:

        curr_player = qb[qb["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        curr_defense = curr_player["opponent_team"].iloc[0]
        #print(curr_defense)

        while(count!=game_coverage):

            
            
            qb_nn_df.at[x,f"{count+1}_completions"] = curr_player.iloc[loop_idx]["completions"]
            qb_nn_df.at[x,f"{count+1}_attempts"] = curr_player.iloc[loop_idx]["attempts"]
            qb_nn_df.at[x,f"{count+1}_passing_yards"] = curr_player.iloc[loop_idx]["passing_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_tds"] = curr_player.iloc[loop_idx]["passing_tds"]
            qb_nn_df.at[x,f"{count+1}_interceptions"] = curr_player.iloc[loop_idx]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_sacks"] = curr_player.iloc[loop_idx]["sacks"]
            qb_nn_df.at[x,f"{count+1}_sack_fumbles"] = curr_player.iloc[loop_idx]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_passing_air_yards"] = curr_player.iloc[loop_idx]["passing_air_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_yards_after_catch"] = curr_player.iloc[loop_idx]["passing_yards_after_catch"]
            qb_nn_df.at[x,f"{count+1}_passing_first_downs"] = curr_player.iloc[loop_idx]["passing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_passing_epa"] = curr_player.iloc[loop_idx]["passing_epa"]
            qb_nn_df.at[x,f"{count+1}_passing_2pt_conversions"] = curr_player.iloc[loop_idx]["passing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_pacr"] = curr_player.iloc[loop_idx]["pacr"]
            qb_nn_df.at[x,f"{count+1}_dakota"] = curr_player.iloc[loop_idx]["dakota"]
            qb_nn_df.at[x,f"{count+1}_carries"] = curr_player.iloc[loop_idx]["carries"]
            qb_nn_df.at[x,f"{count+1}_rushing_yards"] = curr_player.iloc[loop_idx]["rushing_yards"]
            qb_nn_df.at[x,f"{count+1}_rushing_tds"] = curr_player.iloc[loop_idx]["rushing_tds"]
            qb_nn_df.at[x,f"{count+1}_rushing_fumbles"] = curr_player.iloc[loop_idx]["rushing_fumbles"]
            qb_nn_df.at[x,f"{count+1}_rushing_first_downs"] = curr_player.iloc[loop_idx]["rushing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_rushing_epa"] = curr_player.iloc[loop_idx]["rushing_epa"]
            qb_nn_df.at[x,f"{count+1}_rushing_2pt_conversions"] = curr_player.iloc[loop_idx]["rushing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]


            
            curr_loop_week = curr_player.iloc[loop_idx]["week"]
            curr_loop_season = curr_player.iloc[loop_idx]["season"]
            
            opp_team_df = team_defenses[curr_defense]
            opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
            opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)

            while(opp_team_df.empty):
                curr_loop_week-=1
                if curr_loop_week == 0:
                    curr_loop_week = 18
                    curr_loop_season-=1
                if curr_loop_season < first_year:
                    curr_loop_season = last_year-1
                opp_team_df = team_defenses[curr_defense]
                opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
                opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)


            qb_nn_df.at[x,f"{count+1}_opp_interceptions"] = opp_team_df.iloc[0]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_opp_sacks"] = opp_team_df.iloc[0]["sacks"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_yards"] = opp_team_df.iloc[0]["sack_yards"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles"] = opp_team_df.iloc[0]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles_recovered"] = opp_team_df.iloc[0]["sack_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles"] = opp_team_df.iloc[0]["receiving_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles_recovered"] = opp_team_df.iloc[0]["receiving_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_yards_allowed"] = opp_team_df.iloc[0]["rushing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_yards_allowed"] = opp_team_df.iloc[0]["passing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_tds_allowed"] = opp_team_df.iloc[0]["passing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_tds_allowed"] = opp_team_df.iloc[0]["rushing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_special_teams_tds_allowed"] = opp_team_df.iloc[0]["special_teams_tds_allowed"]

            
            

            loop_idx-=1
            if(loop_idx<0):
                loop_idx=curr_idx
            count+=1

        #NEW : NEED TO ADD TO OTHER POS GROUPS
        curr_game = curr_player[curr_player["label"].isin([x])]
        #print(curr_game["fantasy_points"].iloc[0])
        qb_nn_df.at[x,"fantasy_points"] = curr_game["fantasy_points"].iloc[0]

    else:

        curr_player = qb[qb["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0]-1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            qb_nn_df.at[x,f"{count+1}_completions"] = curr_player.iloc[loop_idx]["completions"]
            qb_nn_df.at[x,f"{count+1}_attempts"] = curr_player.iloc[loop_idx]["attempts"]
            qb_nn_df.at[x,f"{count+1}_passing_yards"] = curr_player.iloc[loop_idx]["passing_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_tds"] = curr_player.iloc[loop_idx]["passing_tds"]
            qb_nn_df.at[x,f"{count+1}_interceptions"] = curr_player.iloc[loop_idx]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_sacks"] = curr_player.iloc[loop_idx]["sacks"]
            qb_nn_df.at[x,f"{count+1}_sack_fumbles"] = curr_player.iloc[loop_idx]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_passing_air_yards"] = curr_player.iloc[loop_idx]["passing_air_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_yards_after_catch"] = curr_player.iloc[loop_idx]["passing_yards_after_catch"]
            qb_nn_df.at[x,f"{count+1}_passing_first_downs"] = curr_player.iloc[loop_idx]["passing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_passing_epa"] = curr_player.iloc[loop_idx]["passing_epa"]
            qb_nn_df.at[x,f"{count+1}_passing_2pt_conversions"] = curr_player.iloc[loop_idx]["passing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_pacr"] = curr_player.iloc[loop_idx]["pacr"]
            qb_nn_df.at[x,f"{count+1}_dakota"] = curr_player.iloc[loop_idx]["dakota"]
            qb_nn_df.at[x,f"{count+1}_carries"] = curr_player.iloc[loop_idx]["carries"]
            qb_nn_df.at[x,f"{count+1}_rushing_yards"] = curr_player.iloc[loop_idx]["rushing_yards"]
            qb_nn_df.at[x,f"{count+1}_rushing_tds"] = curr_player.iloc[loop_idx]["rushing_tds"]
            qb_nn_df.at[x,f"{count+1}_rushing_fumbles"] = curr_player.iloc[loop_idx]["rushing_fumbles"]
            qb_nn_df.at[x,f"{count+1}_rushing_first_downs"] = curr_player.iloc[loop_idx]["rushing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_rushing_epa"] = curr_player.iloc[loop_idx]["rushing_epa"]
            qb_nn_df.at[x,f"{count+1}_rushing_2pt_conversions"] = curr_player.iloc[loop_idx]["rushing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]



            

            curr_loop_week = curr_player.iloc[loop_idx]["week"]
            curr_loop_season = curr_player.iloc[loop_idx]["season"]
            
            opp_team_df = team_defenses[curr_defense]
            opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
            opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)

            while(opp_team_df.empty):
                curr_loop_week-=1
                if curr_loop_week == 0:
                    curr_loop_week = 18
                    curr_loop_season-=1
                if curr_loop_season < first_year:
                    curr_loop_season = last_year-1
                opp_team_df = team_defenses[curr_defense]
                opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
                opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)


            qb_nn_df.at[x,f"{count+1}_opp_interceptions"] = opp_team_df.iloc[0]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_opp_sacks"] = opp_team_df.iloc[0]["sacks"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_yards"] = opp_team_df.iloc[0]["sack_yards"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles"] = opp_team_df.iloc[0]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles_recovered"] = opp_team_df.iloc[0]["sack_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles"] = opp_team_df.iloc[0]["receiving_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles_recovered"] = opp_team_df.iloc[0]["receiving_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_yards_allowed"] = opp_team_df.iloc[0]["rushing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_yards_allowed"] = opp_team_df.iloc[0]["passing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_tds_allowed"] = opp_team_df.iloc[0]["passing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_tds_allowed"] = opp_team_df.iloc[0]["rushing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_special_teams_tds_allowed"] = opp_team_df.iloc[0]["special_teams_tds_allowed"]



            

            loop_idx-=1
            count+=1

        #NEW : NEED TO ADD TO OTHER POS GROUPS
        curr_game = curr_player[curr_player["label"].isin([x])]
        #print(curr_game["fantasy_points"].iloc[0])
        qb_nn_df.at[x,"fantasy_points"] = curr_game["fantasy_points"].iloc[0]

    curr_id_count += 1

qb_nn_df.head(10)

Curr ID Count: 0, DF based on draft pick
00-0000722:2004:08:HOU
Curr ID Count: 0, DF based on draft pick
00-0000865:2005:09:PIT
Curr ID Count: 0, DF based on draft pick
00-0001335:2004:10:PHI
Curr ID Count: 0, DF based on draft pick
00-0001361:2004:01:BUF
Curr ID Count: 0, DF based on draft pick
00-0001545:2005:05:NO
Curr ID Count: 0, DF based on draft pick
00-0001823:2004:01:NO
Curr ID Count: 0, DF based on draft pick
00-0002110:2004:01:WAS
Curr ID Count: 0, DF based on draft pick
00-0002876:2004:13:LA
Curr ID Count: 0, DF based on draft pick
00-0003292:2004:03:LV
Curr ID Count: 0, DF based on draft pick
00-0003315:2004:07:KC
Curr ID Count: 0, DF based on draft pick
00-0003739:2004:01:MIN
Curr ID Count: 0, DF based on draft pick
00-0004161:2004:01:CAR
Curr ID Count: 0, DF based on draft pick
00-0004229:2004:11:PHI
Curr ID Count: 0, DF based on draft pick
00-0004293:2004:03:SEA
Curr ID Count: 0, DF based on draft pick
00-0005106:2004:01:GB
Curr ID Count: 0, DF based on draft pick
00-00

KeyboardInterrupt: 

In [None]:
qb_nn_df

## RB

In [None]:
'''count = 0

rb = rb.sort_values(by="label")

curr_id_count = 0

prev_id = " "

rb_nn_df = pd.DataFrame()
for x in range(1,game_coverage+1):
    new_columns = pd.DataFrame(columns=[
                        f"{x}_carries",
                        f"{x}_rushing_yards",
                        f"{x}_rushing_tds",
                        f"{x}_rushing_fumbles",
                        f"{x}_rushing_first_downs",
                        f"{x}_rushing_epa",
                        f"{x}_rushing_2pt_conversions",
                        f"{x}_receptions",
                        f"{x}_targets",
                        f"{x}_receiving_yards",
                        f"{x}_receiving_tds",
                        f"{x}_receiving_fumbles",
                        f"{x}_receiving_air_yards",
                        f"{x}_receiving_yards_after_catch",
                        f"{x}_receiving_first_downs",
                        f"{x}_receiving_epa",
                        f"{x}_receiving_2pt_conversions",
                        f"{x}_racr",
                        f"{x}_target_share",
                        f"{x}_air_yards_share",
                        f"{x}_wopr",
                        f"{x}_special_teams_tds",
                        f"{x}_fantasy_points"])
    
    rb_nn_df = pd.concat([rb_nn_df, new_columns])

rb_nn_df["label"] = rb["label"]
rb_nn_df.set_index("label", inplace = True)

for x in rb["label"]:

    player_id, season, week, recent_team = x.split(":")
    week = int(week)
    season = int(season)

    if prev_id != player_id:
        
        prev_id = player_id
        curr_id_count=0

    if curr_id_count == 0:
        
        print("Curr ID Count: " + str(curr_id_count) + ", DF based on draft pick")
        
    elif curr_id_count < game_coverage:

        curr_player = rb[rb["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            rb_nn_df.at[x,f"{count+1}_carries"] = curr_player.iloc[loop_idx]["carries"]
            rb_nn_df.at[x,f"{count+1}_rushing_yards"] = curr_player.iloc[loop_idx]["rushing_yards"]
            rb_nn_df.at[x,f"{count+1}_rushing_tds"] = curr_player.iloc[loop_idx]["rushing_tds"]
            rb_nn_df.at[x,f"{count+1}_rushing_fumbles"] = curr_player.iloc[loop_idx]["rushing_fumbles"]
            rb_nn_df.at[x,f"{count+1}_rushing_first_downs"] = curr_player.iloc[loop_idx]["rushing_first_downs"]
            rb_nn_df.at[x,f"{count+1}_rushing_epa"] = curr_player.iloc[loop_idx]["rushing_epa"]
            rb_nn_df.at[x,f"{count+1}_rushing_2pt_conversions"] = curr_player.iloc[loop_idx]["rushing_2pt_conversions"]
            rb_nn_df.at[x,f"{count+1}_receptions"] = curr_player.iloc[loop_idx]["receptions"]
            rb_nn_df.at[x,f"{count+1}_targets"] = curr_player.iloc[loop_idx]["targets"]
            rb_nn_df.at[x,f"{count+1}_receiving_yards"] = curr_player.iloc[loop_idx]["receiving_yards"]
            rb_nn_df.at[x,f"{count+1}_receiving_tds"] = curr_player.iloc[loop_idx]["receiving_tds"]
            rb_nn_df.at[x,f"{count+1}_receiving_fumbles"] = curr_player.iloc[loop_idx]["receiving_fumbles"]
            rb_nn_df.at[x,f"{count+1}_receiving_air_yards"] = curr_player.iloc[loop_idx]["receiving_air_yards"]
            rb_nn_df.at[x,f"{count+1}_receiving_yards_after_catch"] = curr_player.iloc[loop_idx]["receiving_yards_after_catch"]
            rb_nn_df.at[x,f"{count+1}_receiving_first_downs"] = curr_player.iloc[loop_idx]["receiving_first_downs"]
            rb_nn_df.at[x,f"{count+1}_receiving_epa"] = curr_player.iloc[loop_idx]["receiving_epa"]
            rb_nn_df.at[x,f"{count+1}_receiving_2pt_conversions"] = curr_player.iloc[loop_idx]["receiving_2pt_conversions"]
            rb_nn_df.at[x,f"{count+1}_racr"] = curr_player.iloc[loop_idx]["racr"]
            rb_nn_df.at[x,f"{count+1}_target_share"] = curr_player.iloc[loop_idx]["target_share"]
            rb_nn_df.at[x,f"{count+1}_air_yards_share"] = curr_player.iloc[loop_idx]["air_yards_share"]
            rb_nn_df.at[x,f"{count+1}_wopr"] = curr_player.iloc[loop_idx]["wopr"]
            rb_nn_df.at[x,f"{count+1}_special_teams_tds"] = curr_player.iloc[loop_idx]["special_teams_tds"]
            rb_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]

            loop_idx-=1
            if(loop_idx<0):
                loop_idx=curr_idx
            count+=1

    else:

        curr_player = rb[rb["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            rb_nn_df.at[x,f"{count+1}_carries"] = curr_player.iloc[loop_idx]["carries"]
            rb_nn_df.at[x,f"{count+1}_rushing_yards"] = curr_player.iloc[loop_idx]["rushing_yards"]
            rb_nn_df.at[x,f"{count+1}_rushing_tds"] = curr_player.iloc[loop_idx]["rushing_tds"]
            rb_nn_df.at[x,f"{count+1}_rushing_fumbles"] = curr_player.iloc[loop_idx]["rushing_fumbles"]
            rb_nn_df.at[x,f"{count+1}_rushing_first_downs"] = curr_player.iloc[loop_idx]["rushing_first_downs"]
            rb_nn_df.at[x,f"{count+1}_rushing_epa"] = curr_player.iloc[loop_idx]["rushing_epa"]
            rb_nn_df.at[x,f"{count+1}_rushing_2pt_conversions"] = curr_player.iloc[loop_idx]["rushing_2pt_conversions"]
            rb_nn_df.at[x,f"{count+1}_receptions"] = curr_player.iloc[loop_idx]["receptions"]
            rb_nn_df.at[x,f"{count+1}_targets"] = curr_player.iloc[loop_idx]["targets"]
            rb_nn_df.at[x,f"{count+1}_receiving_yards"] = curr_player.iloc[loop_idx]["receiving_yards"]
            rb_nn_df.at[x,f"{count+1}_receiving_tds"] = curr_player.iloc[loop_idx]["receiving_tds"]
            rb_nn_df.at[x,f"{count+1}_receiving_fumbles"] = curr_player.iloc[loop_idx]["receiving_fumbles"]
            rb_nn_df.at[x,f"{count+1}_receiving_air_yards"] = curr_player.iloc[loop_idx]["receiving_air_yards"]
            rb_nn_df.at[x,f"{count+1}_receiving_yards_after_catch"] = curr_player.iloc[loop_idx]["receiving_yards_after_catch"]
            rb_nn_df.at[x,f"{count+1}_receiving_first_downs"] = curr_player.iloc[loop_idx]["receiving_first_downs"]
            rb_nn_df.at[x,f"{count+1}_receiving_epa"] = curr_player.iloc[loop_idx]["receiving_epa"]
            rb_nn_df.at[x,f"{count+1}_receiving_2pt_conversions"] = curr_player.iloc[loop_idx]["receiving_2pt_conversions"]
            rb_nn_df.at[x,f"{count+1}_racr"] = curr_player.iloc[loop_idx]["racr"]
            rb_nn_df.at[x,f"{count+1}_target_share"] = curr_player.iloc[loop_idx]["target_share"]
            rb_nn_df.at[x,f"{count+1}_air_yards_share"] = curr_player.iloc[loop_idx]["air_yards_share"]
            rb_nn_df.at[x,f"{count+1}_wopr"] = curr_player.iloc[loop_idx]["wopr"]
            rb_nn_df.at[x,f"{count+1}_special_teams_tds"] = curr_player.iloc[loop_idx]["special_teams_tds"]
            rb_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]

            loop_idx-=1
            count+=1

    curr_id_count += 1

rb_nn_df.head(10)'''

## WR

In [None]:
'''count = 0

wr = wr.sort_values(by="label")

curr_id_count = 0

prev_id = " "

wr_nn_df = pd.DataFrame()
for x in range(1,game_coverage+1):
    new_columns = pd.DataFrame(columns=[
                        f"{x}_receptions",
                        f"{x}_targets",
                        f"{x}_receiving_yards",
                        f"{x}_receiving_tds",
                        f"{x}_receiving_fumbles",
                        f"{x}_receiving_air_yards",
                        f"{x}_receiving_yards_after_catch",
                        f"{x}_receiving_first_downs",
                        f"{x}_receiving_epa",
                        f"{x}_receiving_2pt_conversions",
                        f"{x}_racr",
                        f"{x}_target_share",
                        f"{x}_air_yards_share",
                        f"{x}_wopr",
                        f"{x}_special_teams_tds",
                        f"{x}_fantasy_points"])
    
    wr_nn_df = pd.concat([wr_nn_df, new_columns])

wr_nn_df["label"] = wr["label"]
wr_nn_df.set_index("label", inplace = True)

for x in wr["label"]:

    player_id, season, week, recent_team = x.split(":")
    week = int(week)
    season = int(season)

    if prev_id != player_id:
        
        prev_id = player_id
        curr_id_count=0

    if curr_id_count == 0:
        
        print("Curr ID Count: " + str(curr_id_count) + ", DF based on draft pick")
        
    elif curr_id_count < game_coverage:

        curr_player = wr[wr["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            wr_nn_df.at[x,f"{count+1}_receptions"] = curr_player.iloc[loop_idx]["receptions"]
            wr_nn_df.at[x,f"{count+1}_targets"] = curr_player.iloc[loop_idx]["targets"]
            wr_nn_df.at[x,f"{count+1}_receiving_yards"] = curr_player.iloc[loop_idx]["receiving_yards"]
            wr_nn_df.at[x,f"{count+1}_receiving_tds"] = curr_player.iloc[loop_idx]["receiving_tds"]
            wr_nn_df.at[x,f"{count+1}_receiving_fumbles"] = curr_player.iloc[loop_idx]["receiving_fumbles"]
            wr_nn_df.at[x,f"{count+1}_receiving_air_yards"] = curr_player.iloc[loop_idx]["receiving_air_yards"]
            wr_nn_df.at[x,f"{count+1}_receiving_yards_after_catch"] = curr_player.iloc[loop_idx]["receiving_yards_after_catch"]
            wr_nn_df.at[x,f"{count+1}_receiving_first_downs"] = curr_player.iloc[loop_idx]["receiving_first_downs"]
            wr_nn_df.at[x,f"{count+1}_receiving_epa"] = curr_player.iloc[loop_idx]["receiving_epa"]
            wr_nn_df.at[x,f"{count+1}_receiving_2pt_conversions"] = curr_player.iloc[loop_idx]["receiving_2pt_conversions"]
            wr_nn_df.at[x,f"{count+1}_racr"] = curr_player.iloc[loop_idx]["racr"]
            wr_nn_df.at[x,f"{count+1}_target_share"] = curr_player.iloc[loop_idx]["target_share"]
            wr_nn_df.at[x,f"{count+1}_air_yards_share"] = curr_player.iloc[loop_idx]["air_yards_share"]
            wr_nn_df.at[x,f"{count+1}_wopr"] = curr_player.iloc[loop_idx]["wopr"]
            wr_nn_df.at[x,f"{count+1}_special_teams_tds"] = curr_player.iloc[loop_idx]["special_teams_tds"]
            wr_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]

            loop_idx-=1
            if(loop_idx<0):
                loop_idx=curr_idx
            count+=1

    else:

        curr_player = wr[wr["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            wr_nn_df.at[x,f"{count+1}_receptions"] = curr_player.iloc[loop_idx]["receptions"]
            wr_nn_df.at[x,f"{count+1}_targets"] = curr_player.iloc[loop_idx]["targets"]
            wr_nn_df.at[x,f"{count+1}_receiving_yards"] = curr_player.iloc[loop_idx]["receiving_yards"]
            wr_nn_df.at[x,f"{count+1}_receiving_tds"] = curr_player.iloc[loop_idx]["receiving_tds"]
            wr_nn_df.at[x,f"{count+1}_receiving_fumbles"] = curr_player.iloc[loop_idx]["receiving_fumbles"]
            wr_nn_df.at[x,f"{count+1}_receiving_air_yards"] = curr_player.iloc[loop_idx]["receiving_air_yards"]
            wr_nn_df.at[x,f"{count+1}_receiving_yards_after_catch"] = curr_player.iloc[loop_idx]["receiving_yards_after_catch"]
            wr_nn_df.at[x,f"{count+1}_receiving_first_downs"] = curr_player.iloc[loop_idx]["receiving_first_downs"]
            wr_nn_df.at[x,f"{count+1}_receiving_epa"] = curr_player.iloc[loop_idx]["receiving_epa"]
            wr_nn_df.at[x,f"{count+1}_receiving_2pt_conversions"] = curr_player.iloc[loop_idx]["receiving_2pt_conversions"]
            wr_nn_df.at[x,f"{count+1}_racr"] = curr_player.iloc[loop_idx]["racr"]
            wr_nn_df.at[x,f"{count+1}_target_share"] = curr_player.iloc[loop_idx]["target_share"]
            wr_nn_df.at[x,f"{count+1}_air_yards_share"] = curr_player.iloc[loop_idx]["air_yards_share"]
            wr_nn_df.at[x,f"{count+1}_wopr"] = curr_player.iloc[loop_idx]["wopr"]
            wr_nn_df.at[x,f"{count+1}_special_teams_tds"] = curr_player.iloc[loop_idx]["special_teams_tds"]
            wr_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]

            loop_idx-=1
            count+=1

    curr_id_count += 1

wr_nn_df.head(10)'''

## TE

In [None]:
'''count = 0

te = te.sort_values(by="label")

curr_id_count = 0

prev_id = " "

te_nn_df = pd.DataFrame()
for x in range(1,game_coverage+1):
    new_columns = pd.DataFrame(columns=[
                        f"{x}_receptions",
                        f"{x}_targets",
                        f"{x}_receiving_yards",
                        f"{x}_receiving_tds",
                        f"{x}_receiving_fumbles",
                        f"{x}_receiving_air_yards",
                        f"{x}_receiving_yards_after_catch",
                        f"{x}_receiving_first_downs",
                        f"{x}_receiving_epa",
                        f"{x}_receiving_2pt_conversions",
                        f"{x}_racr",
                        f"{x}_target_share",
                        f"{x}_air_yards_share",
                        f"{x}_wopr",
                        f"{x}_special_teams_tds",
                        f"{x}_fantasy_points"])
    
    te_nn_df = pd.concat([te_nn_df, new_columns])

te_nn_df["label"] = te["label"]
te_nn_df.set_index("label", inplace = True)

for x in te["label"]:

    player_id, season, week, recent_team = x.split(":")
    week = int(week)
    season = int(season)

    if prev_id != player_id:
        
        prev_id = player_id
        curr_id_count=0

    if curr_id_count == 0:
        
        print("Curr ID Count: " + str(curr_id_count) + ", DF based on draft pick")
        
    elif curr_id_count < game_coverage:

        curr_player = te[te["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            te_nn_df.at[x,f"{count+1}_receptions"] = curr_player.iloc[loop_idx]["receptions"]
            te_nn_df.at[x,f"{count+1}_targets"] = curr_player.iloc[loop_idx]["targets"]
            te_nn_df.at[x,f"{count+1}_receiving_yards"] = curr_player.iloc[loop_idx]["receiving_yards"]
            te_nn_df.at[x,f"{count+1}_receiving_tds"] = curr_player.iloc[loop_idx]["receiving_tds"]
            te_nn_df.at[x,f"{count+1}_receiving_fumbles"] = curr_player.iloc[loop_idx]["receiving_fumbles"]
            te_nn_df.at[x,f"{count+1}_receiving_air_yards"] = curr_player.iloc[loop_idx]["receiving_air_yards"]
            te_nn_df.at[x,f"{count+1}_receiving_yards_after_catch"] = curr_player.iloc[loop_idx]["receiving_yards_after_catch"]
            te_nn_df.at[x,f"{count+1}_receiving_first_downs"] = curr_player.iloc[loop_idx]["receiving_first_downs"]
            te_nn_df.at[x,f"{count+1}_receiving_epa"] = curr_player.iloc[loop_idx]["receiving_epa"]
            te_nn_df.at[x,f"{count+1}_receiving_2pt_conversions"] = curr_player.iloc[loop_idx]["receiving_2pt_conversions"]
            te_nn_df.at[x,f"{count+1}_racr"] = curr_player.iloc[loop_idx]["racr"]
            te_nn_df.at[x,f"{count+1}_target_share"] = curr_player.iloc[loop_idx]["target_share"]
            te_nn_df.at[x,f"{count+1}_air_yards_share"] = curr_player.iloc[loop_idx]["air_yards_share"]
            te_nn_df.at[x,f"{count+1}_wopr"] = curr_player.iloc[loop_idx]["wopr"]
            te_nn_df.at[x,f"{count+1}_special_teams_tds"] = curr_player.iloc[loop_idx]["special_teams_tds"]
            te_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]

            loop_idx-=1
            if(loop_idx<0):
                loop_idx=curr_idx
            count+=1

    else:

        curr_player = te[te["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            te_nn_df.at[x,f"{count+1}_receptions"] = curr_player.iloc[loop_idx]["receptions"]
            te_nn_df.at[x,f"{count+1}_targets"] = curr_player.iloc[loop_idx]["targets"]
            te_nn_df.at[x,f"{count+1}_receiving_yards"] = curr_player.iloc[loop_idx]["receiving_yards"]
            te_nn_df.at[x,f"{count+1}_receiving_tds"] = curr_player.iloc[loop_idx]["receiving_tds"]
            te_nn_df.at[x,f"{count+1}_receiving_fumbles"] = curr_player.iloc[loop_idx]["receiving_fumbles"]
            te_nn_df.at[x,f"{count+1}_receiving_air_yards"] = curr_player.iloc[loop_idx]["receiving_air_yards"]
            te_nn_df.at[x,f"{count+1}_receiving_yards_after_catch"] = curr_player.iloc[loop_idx]["receiving_yards_after_catch"]
            te_nn_df.at[x,f"{count+1}_receiving_first_downs"] = curr_player.iloc[loop_idx]["receiving_first_downs"]
            te_nn_df.at[x,f"{count+1}_receiving_epa"] = curr_player.iloc[loop_idx]["receiving_epa"]
            te_nn_df.at[x,f"{count+1}_receiving_2pt_conversions"] = curr_player.iloc[loop_idx]["receiving_2pt_conversions"]
            te_nn_df.at[x,f"{count+1}_racr"] = curr_player.iloc[loop_idx]["racr"]
            te_nn_df.at[x,f"{count+1}_target_share"] = curr_player.iloc[loop_idx]["target_share"]
            te_nn_df.at[x,f"{count+1}_air_yards_share"] = curr_player.iloc[loop_idx]["air_yards_share"]
            te_nn_df.at[x,f"{count+1}_wopr"] = curr_player.iloc[loop_idx]["wopr"]
            te_nn_df.at[x,f"{count+1}_special_teams_tds"] = curr_player.iloc[loop_idx]["special_teams_tds"]
            te_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]

            loop_idx-=1
            count+=1

    curr_id_count += 1

te_nn_df.head(10)'''

## K

In [None]:
'''count = 0

k = k.sort_values(by="label")
k = k.drop(k[k["player_id"] == "None"].index)

curr_id_count = 0

prev_id = " "

k_nn_df = pd.DataFrame()
for x in range(1,game_coverage+1):
    new_columns = pd.DataFrame(columns=[
                        f"{x}_num_fgs",
                        f"{x}_num_eps",
                        f"{x}_avg_fg_dist",
                        f"{x}_fg_pctg",
                        f"{x}_ep_pctg",
                        f"{x}_fantasy_points"])
    
    k_nn_df = pd.concat([k_nn_df, new_columns])

k_nn_df["label"] = k["label"]
k_nn_df.set_index("label", inplace = True)

for x in k["label"]:

    player_id, season, week, recent_team = x.split(":")
    week = int(week)
    season = int(season)

    if prev_id != player_id:
        
        prev_id = player_id
        curr_id_count=0

    if curr_id_count == 0:
        
        print("Curr ID Count: " + str(curr_id_count) + ", DF based on draft pick")
        
    elif curr_id_count < game_coverage:

        curr_player = k[k["player_id"].isin([player_id])].reset_index(drop=True)
        if not curr_player.empty: #I dont know why this is necessary. But it is. Pain.
            
            #print(curr_player)
            #print(curr_player["label"])
            #print(x)
            curr_idx = curr_player.index[curr_player["label"] == x][0]-1
            loop_idx = curr_idx
            count = 0
    
            while(count!=game_coverage):
                
                k_nn_df.at[x,f"{count+1}_num_fgs"] = curr_player.iloc[loop_idx]["num_fgs"]
                k_nn_df.at[x,f"{count+1}_num_eps"] = curr_player.iloc[loop_idx]["num_eps"]
                k_nn_df.at[x,f"{count+1}_avg_fg_dist"] = curr_player.iloc[loop_idx]["avg_fg_dist"]
                k_nn_df.at[x,f"{count+1}_fg_pctg"] = curr_player.iloc[loop_idx]["fg_pctg"]
                k_nn_df.at[x,f"{count+1}_ep_pctg"] = curr_player.iloc[loop_idx]["ep_pctg"]
                k_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]
    
                loop_idx-=1
                if(loop_idx<0):
                    loop_idx=curr_idx
                count+=1
z
    else:

        curr_player = k[k["player_id"].isin([player_id])].reset_index(drop=True)
        if not curr_player.empty: #I dont know why this is necessary. But it is. Pain.
            curr_idx = curr_player.index[curr_player["label"] == x][0] -1
            loop_idx = curr_idx
            count = 0
    
            while(count!=game_coverage):
                
                k_nn_df.at[x,f"{count+1}_num_fgs"] = curr_player.iloc[loop_idx]["num_fgs"]
                k_nn_df.at[x,f"{count+1}_num_eps"] = curr_player.iloc[loop_idx]["num_eps"]
                k_nn_df.at[x,f"{count+1}_avg_fg_dist"] = curr_player.iloc[loop_idx]["avg_fg_dist"]
                k_nn_df.at[x,f"{count+1}_fg_pctg"] = curr_player.iloc[loop_idx]["fg_pctg"]
                k_nn_df.at[x,f"{count+1}_ep_pctg"] = curr_player.iloc[loop_idx]["ep_pctg"]
                k_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]
    
                loop_idx-=1
                count+=1

    curr_id_count += 1

k_nn_df.head(10)'''

# Using Keras Instead

In [None]:
target = qb_nn_df["fantasy_points"]
target = target.fillna(0)
target = target.astype('float')
target

In [None]:
features = qb_nn_df.drop(columns=["fantasy_points"])
features = features.fillna(0)
features = features.astype('float')
features

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

EPOCHS = 900
BATCH_SIZE = 128


model = tf.keras.Sequential([
    layers.Dense(512, activation='relu', input_shape=(features.shape[1],)),
    layers.Dense(512, activation='relu', input_shape=(features.shape[1],)),
    layers.Dense(512, activation='relu', input_shape=(features.shape[1],)),
    layers.Dense(512, activation='relu', input_shape=(features.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
model.fit(features, target, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2)

In [None]:
loss, mae = model.evaluate(features, target)
print(f'Mean Absolute Error: {mae}')

# Verification

## Raw Data

In [None]:
first_year = 2023
last_year = 2024

In [None]:
weekly = nfl.import_weekly_data(range(first_year,last_year)) #Built in function collecting raw weekly data within year range
weekly = weekly[weekly["season_type"].isin(["REG"])].reset_index(drop=True) #Only take data from regular season (no post season)
weekly[0:5] #Print sample of result


#Input year, week, and team of player
#Output what team they faced at that time. Necessary because data before 2022 does not contain opposing team information
def date_and_team_to_other_team(year, week, team):
    
    curr_game = pbp[pbp["week"].isin([week])] #Isolate pbp data to only the week of the game currently being played
    curr_game = curr_game[curr_game["year"].isin([year])] #Isolate pbp data to only the year of the game currently being played
    
    away_check = curr_game[curr_game["away_team"].isin([team])].reset_index(drop=True) #Option 1: Input team is the AWAY TEAM in pbp data
    home_check = curr_game[curr_game["home_team"].isin([team])].reset_index(drop=True) #Option 2: Input team is the HOME TEAM in pbp data

    #If option 1, return cooresponding opposing team
    if home_check.empty and not away_check.empty:
        return away_check["home_team"].at[0]
        
    #Else if option 2, return cooresponding opposing team    
    elif away_check.empty and not home_check.empty:
        return home_check["away_team"].at[0] 

    #Else, must be a bye-week for the team since there is no pbp data in the criteria. Return an empty string
    return " "


#Add opponent_team column to raw weekly data. Necessary for processing defensive performance. Uses a TQDM progress bar
#TODO: See if vectorization can speed up processing time. Currently takes a significant amount of time to run (approx. 3.5 minutes/year of data)
weekly["opponent_team"] = weekly.progress_apply(lambda row: date_and_team_to_other_team(row["season"], row["week"], row["recent_team"]), axis=1)
display(weekly[0:5]) #Print sample of result


pbp = nfl.import_pbp_data(range(first_year,last_year)) #Built in function collecting raw pbp data within year range
pbp = pbp[pbp["season_type"].isin(["REG"])].reset_index(drop=True) #Only take data from regular season (no post season)
pbp["year"] = pbp["game_id"].str[:4] #Use substring of game_id to create new column "year"
pbp["year"] = pbp["year"].astype(int) #Cast all elements of new column to int (rather than string)
display(pbp.head(1)) #Print sample of result
display(pbp.tail(1)) #Print sample of result

qb = weekly[weekly["position"].isin(["QB"])].reset_index(drop=True) #Create new dataframe for relevant QB information
qb["label"] = qb.progress_apply(lambda row: row["player_id"] + ":" + str(row["season"]) + ":" + str(row["week"]).zfill(2) + ":" + row["recent_team"], axis=1)
qb = qb.loc[:, ["label",
                "player_id",
                "week",
                "season",
                "opponent_team",
                 "completions",
                 "attempts",
                 "passing_yards",
                 "passing_tds",
                 "interceptions",
                 "sacks",
                 "sack_fumbles",
                 "passing_air_yards",
                 "passing_yards_after_catch",
                 "passing_first_downs",
                 "passing_epa",
                 "passing_2pt_conversions",
                 "pacr",
                 "dakota",
                 "carries",
                 "rushing_yards",
                 "rushing_tds",
                 "rushing_fumbles",
                 "rushing_first_downs",
                 "rushing_epa",
                 "rushing_2pt_conversions",
                 "fantasy_points"]]
qb[0:5]

## Preprocessing

In [None]:
#Inputs team name
#Outputs performance history across team history in list format, where [0] is their most recent game and [len-1] is their first recorded game

def get_defensive_performance_history(team):
    
    season = last_year-1 #Upper bound for season year
    week=18 #Upper bound for week in season

    #pd.DataFrame(data={'season': season_num, 'week': week_num, 'defending_team': defending_team, 'offensive_team': offensive_team, 'interceptions': interceptions, 'sacks': sacks, 'sack_yards': sack_yards, 'sack_fumbles': sack_fumbles, 'sack_fumbles_recovered': sack_fumbles_recovered, 'receiving_fumbles': receiving_fumbles, 'receiving_fumbles_recovered': receiving_fumbles_recovered, 'rushing_yards_allowed': rushing_yards_allowed, 'passing_yards_allowed': passing_yards_allowed, 'passing_tds_allowed': passing_tds_allowed, 'rushing_tds_allowed': rushing_tds_allowed, 'special_teams_tds_allowed': special_teams_tds_allowed},index=[f'{season_num}-{week_num}-{defending_team}'])
    return_df = pd.DataFrame(columns=["season","week","defending_team","offensive_team","interceptions","sacks","sack_yards","sack_fumbles","sack_fumbles_recovered","receiving_fumbles","receiving_fumbles_recovered","rushing_yards_allowed","passing_yards_allowed","passing_tds_allowed","rushing_tds_allowed","special_teams_tds_allowed"])
    #display(return_df)

    while(season>=first_year): #Not exceeding accepted range
        
        while(week>0): #Not exceeding accepted week
            
            curr_week = weekly[weekly["week"].isin([week])] #Isolate weekly data to only the week of the game currently being played
            curr_week = curr_week[curr_week["season"].isin([season])] #Isolate weekly data to only the year of the game currently being played
            curr_week = curr_week[curr_week["opponent_team"].isin([team])].reset_index(drop=True) #Isolate weekly data to only when the defending team is the input team

            #display(curr_week)
            
            #Construct dataset using weekly data for current week, season, and defending team
            #Note: opponents stats now reflect the defensive performance, just inverted (E.G. rushing_yards now is rushing_yards allowed)
            if not curr_week.empty:
                season_num = curr_week.at[0,"season"]
                #print(season_num)
                week_num = curr_week.at[0,"week"]
                defending_team = curr_week.at[0,"opponent_team"]
                offensive_team = curr_week.at[0,"recent_team"]
                interceptions = curr_week[["interceptions"]].sum().iloc[0]
                sacks = curr_week[["sacks"]].sum().iloc[0]
                sack_yards = curr_week[["sack_yards"]].sum().iloc[0]
                sack_fumbles = curr_week[["sack_fumbles"]].sum().iloc[0]
                sack_fumbles_recovered = curr_week[["sack_fumbles_lost"]].sum().iloc[0]
                receiving_fumbles = curr_week[["receiving_fumbles"]].sum().iloc[0]
                receiving_fumbles_recovered = curr_week[["receiving_fumbles_lost"]].sum().iloc[0]
                rushing_yards_allowed = curr_week[["rushing_yards"]].sum().iloc[0]
                passing_yards_allowed = curr_week[["passing_yards"]].sum().iloc[0]
                passing_tds_allowed = curr_week[["passing_tds"]].sum().iloc[0]
                rushing_tds_allowed = curr_week[["rushing_tds"]].sum().iloc[0]
                special_teams_tds_allowed = curr_week[["special_teams_tds"]].sum().iloc[0]
                new_df = pd.DataFrame(data={'season': season_num, 'week': week_num, 'defending_team': defending_team, 'offensive_team': offensive_team, 'interceptions': interceptions, 'sacks': sacks, 'sack_yards': sack_yards, 'sack_fumbles': sack_fumbles, 'sack_fumbles_recovered': sack_fumbles_recovered, 'receiving_fumbles': receiving_fumbles, 'receiving_fumbles_recovered': receiving_fumbles_recovered, 'rushing_yards_allowed': rushing_yards_allowed, 'passing_yards_allowed': passing_yards_allowed, 'passing_tds_allowed': passing_tds_allowed, 'rushing_tds_allowed': rushing_tds_allowed, 'special_teams_tds_allowed': special_teams_tds_allowed},index=[f'{season_num}-{week_num}-{defending_team}'])
                return_df = pd.concat([return_df,new_df])
                #display(return_df)
                
            week-=1 #Repeat inner loop with data from a week ago  
            
        week=18 #Reset week counter
        season-=1 #Repeat outer loop with data from a season ago  
        
    return return_df #Array with appended data from all defensive activities

#Empty dictionary for each team abbreviation
team_defenses = {
    "NE" : None,
    "NO" : None,
    "NYJ": None,
    "LAC": None,
    "ATL": None,
    "NYG": None,
    "ARI": None,
    "PIT": None,
    "WAS": None,
    "GB" : None,
    "MIA": None,
    "PHI": None,
    "BUF": None,
    "DET": None,
    "TB" : None,
    "SEA": None,
    "TEN": None,
    "BAL": None,
    "LV" : None,
    "SF" : None,
    "CAR": None,
    "KC" : None,
    "JAX": None,
    "CHI": None,
    "LA" : None,
    "DEN": None,
    "HOU": None,
    "CIN": None,
    "MIN": None,
    "CLE": None,
    "IND": None,
    "DAL": None
}

#Populate dictionary with cooresponding team's history of defensive performances
for x in team_defenses:
    team_defenses[x] = get_defensive_performance_history(x)

team_defenses["CIN"] #Print sample of result

In [None]:
count = 0

qb = qb.sort_values(by="label")

curr_id_count = 0

prev_id = " "

qb_nn_df = pd.DataFrame()
for x in range(1,game_coverage+1):
    new_columns = pd.DataFrame(columns=[
                        f"{x}_completions",
                        f"{x}_attempts",
                        f"{x}_passing_yards",
                        f"{x}_passing_tds",
                        f"{x}_interceptions",
                        f"{x}_sacks",
                        f"{x}_sack_fumbles",
                        f"{x}_passing_air_yards",
                        f"{x}_passing_yards_after_catch",
                        f"{x}_passing_first_downs",
                        f"{x}_passing_epa",
                        f"{x}_passing_2pt_conversions",
                        f"{x}_pacr",
                        f"{x}_dakota",
                        f"{x}_carries",
                        f"{x}_rushing_yards",
                        f"{x}_rushing_tds",
                        f"{x}_rushing_fumbles",
                        f"{x}_rushing_first_downs",
                        f"{x}_rushing_epa",
                        f"{x}_rushing_2pt_conversions",
                        f"{x}_fantasy_points",

                        f"{x}_opp_interceptions",
                        f"{x}_opp_sacks",
                        f"{x}_opp_sack_yards",
                        f"{x}_opp_sack_fumbles",
                        f"{x}_opp_sack_fumbles_recovered",
                        f"{x}_opp_receiving_fumbles",
                        f"{x}_opp_receiving_fumbles_recovered",
                        f"{x}_opp_rushing_yards_allowed",
                        f"{x}_opp_passing_yards_allowed",
                        f"{x}_opp_passing_tds_allowed",
                        f"{x}_opp_rushing_tds_allowed",
                        f"{x}_opp_special_teams_tds_allowed"])
    
    qb_nn_df = pd.concat([qb_nn_df, new_columns])

qb_nn_df["label"] = qb["label"]


#NEW : NEED TO ADD TO OTHER POS GROUPS
qb_nn_df = pd.concat([qb_nn_df,pd.DataFrame(columns=["fantasy_points"])])

#print(qb_nn_df)

qb_nn_df.set_index("label", inplace = True)

for x in qb["label"]:

    player_id, season, week, recent_team = x.split(":")
    week = int(week)
    season = int(season)

    if prev_id != player_id:
        
        prev_id = player_id
        curr_id_count=0

    if curr_id_count == 0:

        qb_nn_df = qb_nn_df.drop(index=x)
        print("Curr ID Count: " + str(curr_id_count) + ", DF based on draft pick")
        print(x)
        
    elif curr_id_count < game_coverage:

        curr_player = qb[qb["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0] -1
        loop_idx = curr_idx
        count = 0

        curr_defense = curr_player["opponent_team"].iloc[0]
        #print(curr_defense)

        while(count!=game_coverage):

            
            
            qb_nn_df.at[x,f"{count+1}_completions"] = curr_player.iloc[loop_idx]["completions"]
            qb_nn_df.at[x,f"{count+1}_attempts"] = curr_player.iloc[loop_idx]["attempts"]
            qb_nn_df.at[x,f"{count+1}_passing_yards"] = curr_player.iloc[loop_idx]["passing_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_tds"] = curr_player.iloc[loop_idx]["passing_tds"]
            qb_nn_df.at[x,f"{count+1}_interceptions"] = curr_player.iloc[loop_idx]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_sacks"] = curr_player.iloc[loop_idx]["sacks"]
            qb_nn_df.at[x,f"{count+1}_sack_fumbles"] = curr_player.iloc[loop_idx]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_passing_air_yards"] = curr_player.iloc[loop_idx]["passing_air_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_yards_after_catch"] = curr_player.iloc[loop_idx]["passing_yards_after_catch"]
            qb_nn_df.at[x,f"{count+1}_passing_first_downs"] = curr_player.iloc[loop_idx]["passing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_passing_epa"] = curr_player.iloc[loop_idx]["passing_epa"]
            qb_nn_df.at[x,f"{count+1}_passing_2pt_conversions"] = curr_player.iloc[loop_idx]["passing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_pacr"] = curr_player.iloc[loop_idx]["pacr"]
            qb_nn_df.at[x,f"{count+1}_dakota"] = curr_player.iloc[loop_idx]["dakota"]
            qb_nn_df.at[x,f"{count+1}_carries"] = curr_player.iloc[loop_idx]["carries"]
            qb_nn_df.at[x,f"{count+1}_rushing_yards"] = curr_player.iloc[loop_idx]["rushing_yards"]
            qb_nn_df.at[x,f"{count+1}_rushing_tds"] = curr_player.iloc[loop_idx]["rushing_tds"]
            qb_nn_df.at[x,f"{count+1}_rushing_fumbles"] = curr_player.iloc[loop_idx]["rushing_fumbles"]
            qb_nn_df.at[x,f"{count+1}_rushing_first_downs"] = curr_player.iloc[loop_idx]["rushing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_rushing_epa"] = curr_player.iloc[loop_idx]["rushing_epa"]
            qb_nn_df.at[x,f"{count+1}_rushing_2pt_conversions"] = curr_player.iloc[loop_idx]["rushing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]


            
            curr_loop_week = curr_player.iloc[loop_idx]["week"]
            curr_loop_season = curr_player.iloc[loop_idx]["season"]
            
            opp_team_df = team_defenses[curr_defense]
            opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
            opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)

            while(opp_team_df.empty):
                curr_loop_week-=1
                if curr_loop_week == 0:
                    curr_loop_week = 18
                    curr_loop_season-=1
                if curr_loop_season < first_year:
                    curr_loop_season = last_year-1
                opp_team_df = team_defenses[curr_defense]
                opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
                opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)


            qb_nn_df.at[x,f"{count+1}_opp_interceptions"] = opp_team_df.iloc[0]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_opp_sacks"] = opp_team_df.iloc[0]["sacks"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_yards"] = opp_team_df.iloc[0]["sack_yards"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles"] = opp_team_df.iloc[0]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles_recovered"] = opp_team_df.iloc[0]["sack_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles"] = opp_team_df.iloc[0]["receiving_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles_recovered"] = opp_team_df.iloc[0]["receiving_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_yards_allowed"] = opp_team_df.iloc[0]["rushing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_yards_allowed"] = opp_team_df.iloc[0]["passing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_tds_allowed"] = opp_team_df.iloc[0]["passing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_tds_allowed"] = opp_team_df.iloc[0]["rushing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_special_teams_tds_allowed"] = opp_team_df.iloc[0]["special_teams_tds_allowed"]

            
            

            loop_idx-=1
            if(loop_idx<0):
                loop_idx=curr_idx
            count+=1

        #NEW : NEED TO ADD TO OTHER POS GROUPS
        curr_game = curr_player[curr_player["label"].isin([x])]
        #print(curr_game["fantasy_points"].iloc[0])
        qb_nn_df.at[x,"fantasy_points"] = curr_game["fantasy_points"].iloc[0]

    else:

        curr_player = qb[qb["player_id"].isin([player_id])].reset_index(drop=True)
        curr_idx = curr_player.index[curr_player["label"] == x][0]-1
        loop_idx = curr_idx
        count = 0

        while(count!=game_coverage):
            
            qb_nn_df.at[x,f"{count+1}_completions"] = curr_player.iloc[loop_idx]["completions"]
            qb_nn_df.at[x,f"{count+1}_attempts"] = curr_player.iloc[loop_idx]["attempts"]
            qb_nn_df.at[x,f"{count+1}_passing_yards"] = curr_player.iloc[loop_idx]["passing_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_tds"] = curr_player.iloc[loop_idx]["passing_tds"]
            qb_nn_df.at[x,f"{count+1}_interceptions"] = curr_player.iloc[loop_idx]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_sacks"] = curr_player.iloc[loop_idx]["sacks"]
            qb_nn_df.at[x,f"{count+1}_sack_fumbles"] = curr_player.iloc[loop_idx]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_passing_air_yards"] = curr_player.iloc[loop_idx]["passing_air_yards"]
            qb_nn_df.at[x,f"{count+1}_passing_yards_after_catch"] = curr_player.iloc[loop_idx]["passing_yards_after_catch"]
            qb_nn_df.at[x,f"{count+1}_passing_first_downs"] = curr_player.iloc[loop_idx]["passing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_passing_epa"] = curr_player.iloc[loop_idx]["passing_epa"]
            qb_nn_df.at[x,f"{count+1}_passing_2pt_conversions"] = curr_player.iloc[loop_idx]["passing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_pacr"] = curr_player.iloc[loop_idx]["pacr"]
            qb_nn_df.at[x,f"{count+1}_dakota"] = curr_player.iloc[loop_idx]["dakota"]
            qb_nn_df.at[x,f"{count+1}_carries"] = curr_player.iloc[loop_idx]["carries"]
            qb_nn_df.at[x,f"{count+1}_rushing_yards"] = curr_player.iloc[loop_idx]["rushing_yards"]
            qb_nn_df.at[x,f"{count+1}_rushing_tds"] = curr_player.iloc[loop_idx]["rushing_tds"]
            qb_nn_df.at[x,f"{count+1}_rushing_fumbles"] = curr_player.iloc[loop_idx]["rushing_fumbles"]
            qb_nn_df.at[x,f"{count+1}_rushing_first_downs"] = curr_player.iloc[loop_idx]["rushing_first_downs"]
            qb_nn_df.at[x,f"{count+1}_rushing_epa"] = curr_player.iloc[loop_idx]["rushing_epa"]
            qb_nn_df.at[x,f"{count+1}_rushing_2pt_conversions"] = curr_player.iloc[loop_idx]["rushing_2pt_conversions"]
            qb_nn_df.at[x,f"{count+1}_fantasy_points"] = curr_player.iloc[loop_idx]["fantasy_points"]



            

            curr_loop_week = curr_player.iloc[loop_idx]["week"]
            curr_loop_season = curr_player.iloc[loop_idx]["season"]
            
            opp_team_df = team_defenses[curr_defense]
            opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
            opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)

            while(opp_team_df.empty):
                curr_loop_week-=1
                if curr_loop_week == 0:
                    curr_loop_week = 18
                    curr_loop_season-=1
                if curr_loop_season < first_year:
                    curr_loop_season = last_year-1
                opp_team_df = team_defenses[curr_defense]
                opp_team_df = opp_team_df[opp_team_df["week"].isin([curr_loop_week])]
                opp_team_df = opp_team_df[opp_team_df["season"].isin([curr_loop_season])].reset_index(drop=True)


            qb_nn_df.at[x,f"{count+1}_opp_interceptions"] = opp_team_df.iloc[0]["interceptions"]
            qb_nn_df.at[x,f"{count+1}_opp_sacks"] = opp_team_df.iloc[0]["sacks"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_yards"] = opp_team_df.iloc[0]["sack_yards"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles"] = opp_team_df.iloc[0]["sack_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_sack_fumbles_recovered"] = opp_team_df.iloc[0]["sack_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles"] = opp_team_df.iloc[0]["receiving_fumbles"]
            qb_nn_df.at[x,f"{count+1}_opp_receiving_fumbles_recovered"] = opp_team_df.iloc[0]["receiving_fumbles_recovered"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_yards_allowed"] = opp_team_df.iloc[0]["rushing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_yards_allowed"] = opp_team_df.iloc[0]["passing_yards_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_passing_tds_allowed"] = opp_team_df.iloc[0]["passing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_rushing_tds_allowed"] = opp_team_df.iloc[0]["rushing_tds_allowed"]
            qb_nn_df.at[x,f"{count+1}_opp_special_teams_tds_allowed"] = opp_team_df.iloc[0]["special_teams_tds_allowed"]



            

            loop_idx-=1
            count+=1

        #NEW : NEED TO ADD TO OTHER POS GROUPS
        curr_game = curr_player[curr_player["label"].isin([x])]
        #print(curr_game["fantasy_points"].iloc[0])
        qb_nn_df.at[x,"fantasy_points"] = curr_game["fantasy_points"].iloc[0]

    curr_id_count += 1

qb_nn_df.head(10)

## Model Passthrough/Error Determination

In [None]:
target = qb_nn_df["fantasy_points"]
target = target.fillna(0)
target = target.astype('float')
target

In [None]:
features = qb_nn_df.drop(columns=["fantasy_points"])
features = features.fillna(0)
features = features.astype('float')
features

In [None]:
loss, mae = model.evaluate(features, target)
print(f'Mean Absolute Error: {mae}')