# Imports

In [1]:
import numpy as np
import pandas as pd
import nfl_data_py as nfl

# Importing NFL Data

In [2]:
years = range(1999, 2023 + 1)

# Loading Basic Data

Loading in our current version of the filter data - this takes forever to grab due to the wikipedia queries.

In [3]:
df_core = pd.read_pickle("./temp_df_filter.pkl")

In [4]:
df_core.head()

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,team,rank,tier,Draft Year,Draft No.,Draft Round,Draft Pick,Draft Overall,Draft Team,College
0,00-0000003,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,Browns,49.0,5.0,,,,,,,
1,00-0000003,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,Dolphins,50.0,5.0,,,,,,,
2,00-0000003,2000,REG,0,0,0.0,0,0.0,0.0,0.0,...,Colts,167.0,14.0,,,,,,,
3,00-0000007,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,Buccaneers,152.0,13.0,,,,,,,
4,00-0000007,2000,REG,0,0,0.0,0,0.0,0.0,0.0,...,Buccaneers,122.0,11.0,,,,,,,


The players we missed from the pass to create the data set:

```
[('Willie Jackson', 'WR', 'Bengals'), ('Willie Jackson', 'WR', 'Saints'), ('Willie Jackson', 'WR', 'Saints'), ('Willie Jackson', 'WR', 'Falcons'), ('Willie Jackson', 'WR', 'Redskins'), ('Charles Johnson', 'WR', 'Eagles'), ('Charles Johnson', 'WR', 'Eagles'), ('Charles Johnson', 'WR', 'Patriots'), ('Charles Johnson', 'WR', 'Bills'), ('Jim Miller', 'QB', 'Bears'), ('Jim Miller', 'QB', 'Bears'), ('Jim Miller', 'QB', 'Bears'), ('Jim Miller', 'QB', 'Bears'), ('Irv Smith', 'TE', 'Browns'), ('Ricky Williams', 'RB', 'Colts'), ('Ricky Williams', 'RB', 'Colts'), ('Michael Williams', 'TE', 'Patriots'), ('Charles Johnson', 'WR', 'Vikings'), ('Charles Johnson', 'WR', 'Vikings'), ('David Williams', 'RB', 'Jaguars')]
```

# Adding NGS Data

In [5]:
cols_to_drop = ['player_first_name', 'player_last_name', 'player_jersey_number', 'player_short_name', 'team_abbr']

In [6]:
cols_to_rename = {"player_display_name": "player_name", "player_gsis_id": "player_id", "player_position": "position"}

In [7]:
def convert_to_seasonal(df_ngs):
    """"""
    # clean up df
    df_ngs.drop(columns=cols_to_drop, inplace=True)
    df_ngs.rename(columns=cols_to_rename, inplace=True)
    
    # convert to seasonal version
    df_ngs_seasonal = df_ngs.copy().query("week == 0")
    df_ngs_seasonal.drop(columns=["week"], inplace=True)
    
    # return output
    return df_ngs_seasonal

## Passing

In [8]:
df_ngs_passing = nfl.import_ngs_data('passing', years)
df_ngs_passing.head()

Unnamed: 0,season,season_type,week,player_display_name,player_position,team_abbr,avg_time_to_throw,avg_completed_air_yards,avg_intended_air_yards,avg_air_yards_differential,...,completion_percentage,expected_completion_percentage,completion_percentage_above_expectation,avg_air_distance,max_air_distance,player_gsis_id,player_first_name,player_last_name,player_jersey_number,player_short_name
0,2016,REG,0,Drew Brees,QB,NO,2.424238,6.195456,7.550997,-1.355541,...,69.985141,64.783965,5.201176,20.815996,59.033016,00-0020531,Drew,Brees,9,D.Brees
1,2016,REG,0,Andrew Luck,QB,IND,2.879505,7.679451,8.86511,-1.185659,...,63.486239,58.393363,5.092876,21.712038,56.956055,00-0029668,Andrew,Luck,12,A.Luck
2,2016,REG,0,Derek Carr,QB,LV,2.49175,5.939776,8.352804,-2.413028,...,63.75,60.066501,3.683499,22.219365,62.925848,00-0031280,Derek,Carr,4,
3,2016,REG,0,Tom Brady,QB,NE,2.559618,6.306392,8.112685,-1.806293,...,67.361111,64.09122,3.269891,21.358495,56.977436,00-0019596,Tom,Brady,12,T.Brady
4,2016,REG,0,Aaron Rodgers,QB,GB,2.8746,6.272394,9.238016,-2.965622,...,65.737705,62.073017,3.664688,23.43448,70.765238,00-0023459,Aaron,Rodgers,12,A.Rodgers


### Seasonal Version

In [9]:
df_ngs_passing_seasonal = convert_to_seasonal(df_ngs_passing.copy())
df_ngs_passing_seasonal.head()

Unnamed: 0,season,season_type,player_name,position,avg_time_to_throw,avg_completed_air_yards,avg_intended_air_yards,avg_air_yards_differential,aggressiveness,max_completed_air_distance,...,pass_touchdowns,interceptions,passer_rating,completions,completion_percentage,expected_completion_percentage,completion_percentage_above_expectation,avg_air_distance,max_air_distance,player_id
0,2016,REG,Drew Brees,QB,2.424238,6.195456,7.550997,-1.355541,17.533432,52.24008,...,37,15,101.687098,471,69.985141,64.783965,5.201176,20.815996,59.033016,00-0020531
1,2016,REG,Andrew Luck,QB,2.879505,7.679451,8.86511,-1.185659,18.715596,56.956055,...,31,13,96.418196,346,63.486239,58.393363,5.092876,21.712038,56.956055,00-0029668
2,2016,REG,Derek Carr,QB,2.49175,5.939776,8.352804,-2.413028,19.642857,62.925848,...,28,6,96.703869,357,63.75,60.066501,3.683499,22.219365,62.925848,00-0031280
3,2016,REG,Tom Brady,QB,2.559618,6.306392,8.112685,-1.806293,17.592593,56.977436,...,28,2,112.172068,291,67.361111,64.09122,3.269891,21.358495,56.977436,00-0019596
4,2016,REG,Aaron Rodgers,QB,2.8746,6.272394,9.238016,-2.965622,16.393443,60.821912,...,40,7,104.187158,401,65.737705,62.073017,3.664688,23.43448,70.765238,00-0023459


## Rushing

In [10]:
df_ngs_rushing = nfl.import_ngs_data('rushing', years)
df_ngs_rushing.head()

Unnamed: 0,season,season_type,week,player_display_name,player_position,team_abbr,efficiency,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_attempts,...,rush_yards_over_expected,avg_rush_yards,rush_yards_over_expected_per_att,rush_pct_over_expected,rush_touchdowns,player_gsis_id,player_first_name,player_last_name,player_jersey_number,player_short_name
0,2016,REG,0,Latavius Murray,RB,LV,3.913071,45.641026,2.589171,195,...,-0.997213,4.041026,-0.997213,0.0,12,00-0030513,Latavius,Murray,28,L.Murray
1,2016,REG,0,Tim Hightower,RB,NO,3.737044,27.819549,2.553855,133,...,,4.120301,,,4,00-0026289,Tim,Hightower,34,T.Hightower
2,2016,REG,0,Matt Asiata,RB,MIN,3.999453,38.842975,2.256416,121,...,,3.322314,,,6,00-0028198,Matt,Asiata,44,M.Asiata
3,2016,REG,0,David Johnson,RB,ARI,3.869201,25.59727,2.637508,293,...,,4.228669,,,16,00-0032187,David,Johnson,31,D.Johnson
4,2016,REG,0,Jonathan Stewart,RB,CAR,4.379927,38.073394,2.83439,218,...,,3.779817,,,9,00-0026153,Jonathan,Stewart,28,J.Stewart


### Seasonal Version

In [11]:
df_ngs_rushing_seasonal = convert_to_seasonal(df_ngs_rushing.copy())
df_ngs_rushing_seasonal.head()

Unnamed: 0,season,season_type,player_name,position,efficiency,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_attempts,rush_yards,expected_rush_yards,rush_yards_over_expected,avg_rush_yards,rush_yards_over_expected_per_att,rush_pct_over_expected,rush_touchdowns,player_id
0,2016,REG,Latavius Murray,RB,3.913071,45.641026,2.589171,195,788,5.997213,-0.997213,4.041026,-0.997213,0.0,12,00-0030513
1,2016,REG,Tim Hightower,RB,3.737044,27.819549,2.553855,133,548,,,4.120301,,,4,00-0026289
2,2016,REG,Matt Asiata,RB,3.999453,38.842975,2.256416,121,402,,,3.322314,,,6,00-0028198
3,2016,REG,David Johnson,RB,3.869201,25.59727,2.637508,293,1239,,,4.228669,,,16,00-0032187
4,2016,REG,Jonathan Stewart,RB,4.379927,38.073394,2.83439,218,824,,,3.779817,,,9,00-0026153


## Receiving

In [12]:
df_ngs_receiving = nfl.import_ngs_data('receiving', years)
df_ngs_receiving.head()

Unnamed: 0,season,season_type,week,player_display_name,player_position,team_abbr,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,...,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_gsis_id,player_first_name,player_last_name,player_jersey_number,player_short_name
0,2016,REG,0,Tyreek Hill,WR,KC,7.822639,3.518714,7.870602,16.708058,...,599.0,6,4.670656,5.33157,-0.660915,00-0033040,Tyreek,Hill,10,T.Hill
1,2016,REG,0,Richard Rodgers,TE,GB,7.605789,2.693882,8.381277,6.884246,...,271.0,2,3.347667,3.274428,0.073239,00-0031384,Richard,Rodgers,82,
2,2016,REG,0,Travis Benjamin,WR,LAC,7.601111,3.017426,13.329867,19.536131,...,677.0,4,5.679362,4.968711,0.710651,00-0029269,Travis,Benjamin,12,
3,2016,REG,0,Cordarrelle Patterson,WR,MIN,7.466269,3.230445,5.564,9.530008,...,453.0,2,6.278462,5.61906,0.659402,00-0030578,Cordarrelle,Patterson,84,C.Patterson
4,2016,REG,0,DeSean Jackson,WR,WAS,7.295833,2.886285,16.0354,28.085914,...,1005.0,4,4.867143,5.051773,-0.18463,00-0026189,DeSean,Jackson,11,D.Jackson


### Seasonal Version

In [13]:
df_ngs_receiving_seasonal = convert_to_seasonal(df_ngs_receiving.copy())
df_ngs_receiving_seasonal.head()

Unnamed: 0,season,season_type,player_name,position,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,receptions,targets,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_id
0,2016,REG,Tyreek Hill,WR,7.822639,3.518714,7.870602,16.708058,61,83,73.493976,599.0,6,4.670656,5.33157,-0.660915,00-0033040
1,2016,REG,Richard Rodgers,TE,7.605789,2.693882,8.381277,6.884246,30,47,63.829787,271.0,2,3.347667,3.274428,0.073239,00-0031384
2,2016,REG,Travis Benjamin,WR,7.601111,3.017426,13.329867,19.536131,47,75,62.666667,677.0,4,5.679362,4.968711,0.710651,00-0029269
3,2016,REG,Cordarrelle Patterson,WR,7.466269,3.230445,5.564,9.530008,52,70,74.285714,453.0,2,6.278462,5.61906,0.659402,00-0030578
4,2016,REG,DeSean Jackson,WR,7.295833,2.886285,16.0354,28.085914,56,100,56.0,1005.0,4,4.867143,5.051773,-0.18463,00-0026189


## Merging of Columns

## First Merge - Passsing

In [14]:
common_columns = list(set(df_core).intersection(set(df_ngs_passing_seasonal)))
common_columns

['player_id',
 'player_name',
 'interceptions',
 'season',
 'completions',
 'position',
 'season_type',
 'attempts']

In [15]:
df_ngs_merge_one = df_core.merge(df_ngs_passing_seasonal,
                                on=common_columns, how="left")

In [17]:
df_ngs_merge_one.head()

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,max_completed_air_distance,avg_air_yards_to_sticks,pass_yards,pass_touchdowns,passer_rating,completion_percentage,expected_completion_percentage,completion_percentage_above_expectation,avg_air_distance,max_air_distance
0,00-0000003,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
1,00-0000003,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
2,00-0000003,2000,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
3,00-0000007,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
4,00-0000007,2000,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,


In [18]:
df_ngs_merge_one.query("player_name == 'Tom Brady'")

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,max_completed_air_distance,avg_air_yards_to_sticks,pass_yards,pass_touchdowns,passer_rating,completion_percentage,expected_completion_percentage,completion_percentage_above_expectation,avg_air_distance,max_air_distance
2636,00-0019596,2000,REG,1,3,6.0,0,0.0,0.0,0.0,...,,,,,,,,,,
2637,00-0019596,2001,REG,264,413,2843.0,18,12.0,41.0,216.0,...,,,,,,,,,,
2638,00-0019596,2002,REG,373,601,3764.0,28,14.0,31.0,190.0,...,,,,,,,,,,
2639,00-0019596,2003,REG,317,527,3620.0,23,12.0,32.0,219.0,...,,,,,,,,,,
2640,00-0019596,2004,REG,288,474,3692.0,28,14.0,26.0,162.0,...,,,,,,,,,,
2641,00-0019596,2005,REG,334,530,4110.0,26,14.0,26.0,188.0,...,,,,,,,,,,
2642,00-0019596,2006,REG,320,517,3533.0,24,12.0,26.0,175.0,...,,,,,,,,,,
2643,00-0019596,2007,REG,398,578,4806.0,50,8.0,21.0,128.0,...,,,,,,,,,,
2644,00-0019596,2008,REG,7,11,76.0,0,0.0,0.0,0.0,...,,,,,,,,,,
2645,00-0019596,2009,REG,371,565,4398.0,28,13.0,16.0,86.0,...,,,,,,,,,,


## Second Merge - Rushing

In [19]:
common_columns = list(set(df_ngs_merge_one).intersection(set(df_ngs_rushing_seasonal)))
common_columns

['player_id', 'player_name', 'season', 'position', 'season_type']

In [20]:
df_ngs_merge_two = df_ngs_merge_one.merge(df_ngs_rushing_seasonal, 
                                   on=common_columns, how="left")

In [28]:
df_ngs_merge_two.query("player_name == 'Derrick Henry'")

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,percent_attempts_gte_eight_defenders,avg_time_to_los,rush_attempts,rush_yards,expected_rush_yards,rush_yards_over_expected,avg_rush_yards,rush_yards_over_expected_per_att,rush_pct_over_expected,rush_touchdowns
10667,00-0032764,2016,REG,0,0,0.0,0,0.0,0.0,0.0,...,52.727273,2.79301,110.0,490.0,,,4.454545,,,5.0
10668,00-0032764,2017,REG,0,0,0.0,0,0.0,0.0,0.0,...,50.0,2.99298,176.0,744.0,,,4.227273,,,5.0
10669,00-0032764,2018,REG,2,3,14.0,0,0.0,0.0,0.0,...,32.093023,2.921061,215.0,1059.0,815.974155,236.025845,4.925581,1.108103,0.474178,12.0
10670,00-0032764,2019,REG,0,0,0.0,0,0.0,0.0,0.0,...,35.643564,2.722727,303.0,1540.0,1130.29466,325.70534,5.082508,1.130921,0.420139,16.0
10671,00-0032764,2020,REG,0,0,0.0,0,0.0,0.0,0.0,...,27.777778,2.759423,378.0,2027.0,1640.891725,360.108275,5.362434,0.970642,0.423181,17.0
10672,00-0032764,2021,REG,1,1,5.0,1,0.0,0.0,0.0,...,36.52968,2.763232,219.0,937.0,903.228041,24.771959,4.278539,0.114156,0.364055,10.0
10673,00-0032764,2022,REG,2,2,4.0,1,0.0,0.0,0.0,...,38.108883,2.696984,349.0,1538.0,1385.481835,88.518165,4.406877,0.261888,0.402367,13.0
10674,00-0032764,2023,REG,2,3,14.0,2,0.0,0.0,0.0,...,35.357143,2.801296,280.0,1167.0,1067.373227,83.626773,4.167857,0.305207,0.40146,12.0


## Third Merge - Receiving

In [32]:
df_ngs_receiving_seasonal.rename(columns={"avg_intended_air_yards": "avg_intended_air_yards_receiving"},
                                 inplace=True)

In [33]:
common_columns = list(set(df_ngs_merge_two).intersection(set(df_ngs_receiving_seasonal)))
common_columns

['player_id',
 'player_name',
 'season',
 'position',
 'season_type',
 'receptions',
 'targets']

In [34]:
df_ngs_merge_three = df_ngs_merge_two.merge(df_ngs_receiving_seasonal, 
                                   on=common_columns, how="left")

In [35]:
df_ngs_merge_three.query('player_name == "Tyreek Hill"')

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,avg_cushion,avg_separation,avg_intended_air_yards_receiving,percent_share_of_intended_air_yards,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation
10845,00-0033040,2016,REG,0,0,0.0,0,0.0,0.0,0.0,...,7.822639,3.518714,7.870602,16.708058,73.493976,599.0,6.0,4.670656,5.33157,-0.660915
10846,00-0033040,2017,REG,0,1,0.0,0,1.0,0.0,0.0,...,7.15871,3.545983,11.453048,29.583154,71.428571,1183.0,7.0,6.396133,6.090304,0.30583
10847,00-0033040,2018,REG,0,0,0.0,0,0.0,0.0,0.0,...,5.855714,3.148718,14.804015,38.425177,63.50365,1479.0,12.0,6.379655,4.846704,1.532951
10848,00-0033040,2019,REG,0,0,0.0,0,0.0,0.0,0.0,...,5.86925,3.209024,12.578876,23.858916,65.168539,860.0,7.0,4.874655,4.183096,0.691559
10849,00-0033040,2020,REG,0,0,0.0,0,0.0,0.0,0.0,...,5.61094,3.433064,13.225111,35.585528,64.444444,1276.0,15.0,5.221839,4.217578,1.004261
10850,00-0033040,2021,REG,0,0,0.0,0,0.0,0.0,0.0,...,6.119592,3.586234,10.399434,35.499121,69.811321,1239.0,9.0,4.098198,3.763653,0.334545
10851,00-0033040,2022,REG,0,0,0.0,0,0.0,0.0,0.0,...,6.379797,3.310133,12.171118,40.152491,70.0,1710.0,7.0,4.331092,4.085669,0.245423
10852,00-0033040,2023,REG,0,0,0.0,0,0.0,0.0,0.0,...,5.756883,3.258427,10.837251,43.438525,69.590643,1799.0,13.0,5.770504,5.003955,0.766549


# Saving Data

In [36]:
df_final = df_ngs_merge_three.copy()

In [37]:
df_final.head()

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,avg_cushion,avg_separation,avg_intended_air_yards_receiving,percent_share_of_intended_air_yards,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation
0,00-0000003,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
1,00-0000003,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
2,00-0000003,2000,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
3,00-0000007,1999,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,
4,00-0000007,2000,REG,0,0,0.0,0,0.0,0.0,0.0,...,,,,,,,,,,


In [39]:
df_final[["season", "player_name", "fantasy_points_ppr"]]

Unnamed: 0,season,player_name,fantasy_points_ppr
0,1999,Abdul-Karim al-Jabbar,81.70
1,1999,Abdul-Karim al-Jabbar,81.70
2,2000,Abdul-Karim al-Jabbar,-0.20
3,1999,Rabih Abdullah,4.30
4,2000,Rabih Abdullah,10.40
...,...,...,...
13246,2023,Bryce Young,156.38
13247,2023,Will Levis,102.02
13248,2023,C.J. Stroud,275.02
13249,2023,Anthony Richardson,72.68


In [40]:
df_final.to_pickle("../../FantasyData/data-frames/df_basic_ngs_1999_2023.pkl")