# Imports

In [1]:
import pandas as pd
import nfl_data_py as nfl

In [2]:
def get_qb(player_name, season):
    row = df_roster.query(f"player_name == \"{player_name}\" and season == {season}")
    team = row["team"].iloc[0]
    
    min_depth = 10
    starting_qb_id = ''
    qbs = df_roster.query(f"season == {season} and team == '{team}' and position == 'QB'")["player_id"]
    
    for qb_id in qbs:
        depth_chart = df_depth_chart.query(f'gsis_id == "{qb_id}" and season == {season}')
        if not depth_chart.empty:
            if depth_chart["depth_team_mean"].iloc[0] < min_depth:
                min_depth = depth_chart["depth_team_mean"].iloc[0]
                starting_qb_id = qb_id
       
    qb_df = df_seasonal.query(f"player_id == '{starting_qb_id}' and season == {season}")
    
    if qb_df.empty:
        return None
    
    return qb_df["completions"].iloc[0], qb_df["attempts"].iloc[0], qb_df["passing_yards"].iloc[0], \
qb_df["passing_tds"].iloc[0], qb_df["interceptions"].iloc[0]

In [3]:
def calculate_qbr(completions, attempts, passing_yards, interceptions, passing_touchdowns):
    thres = 2.375
        
    completion_percentage_comp = min(max((completions / attempts - .3) * 5, 0), thres)
    yards_per_attempt_comp = min(max(((passing_yards / attempts - 3) * 0.25), 0), thres)
    touchdown_percentage_comp = min(max((passing_touchdowns / attempts) * 20, 0), thres)
    interception_percentage_comp = min(max(thres - (interceptions / attempts) * 25, 0), thres)
    
    
    sum_comp = completion_percentage_comp + yards_per_attempt_comp + \
                touchdown_percentage_comp + interception_percentage_comp
        
    qbr = (sum_comp / 6) * 100
    return qbr

# Grabbing Data

In [4]:
# df_roster = pd.read_pickle("../../interactive/df_roster_2012_2022.pkl")
df_roster = pd.read_pickle("../../interactive/df_roster_2023.pkl")

In [5]:
# df_seasonal = pd.read_pickle("../../interactive/df_seasonal_2012_2022.pkl")
# df_seasonal = pd.read_pickle("../../interactive/df_seasonal_2023.pkl")
df_seasonal = nfl.import_seasonal_data([2023])

In [6]:
# df_depth_chart = pd.read_pickle("../../interactive/df_dc_mean_2012_2022.pkl")
df_depth_chart = nfl.import_depth_charts([2023])

In [7]:
df_draft_picks = pd.read_pickle("../../interactive/df_draft_picks_1980_2022.pkl")

In [8]:
df_ngs_rec = pd.read_pickle("../../interactive/df_ngs_rec_2016_2022.pkl")
df_ngs_rec = df_ngs_rec.query("season_type == 'REG'")

In [9]:
df_pfr_rec =  pd.read_pickle("../../interactive/df_pfr_rec_2018_2022.pkl")

# Normal Stat DF Creation

In [10]:
df_pass = df_seasonal.copy()

In [11]:
df_depth_chart["depth_team"] = df_depth_chart["depth_team"].astype(int)

In [12]:
df_depth_chart.columns

Index(['season', 'club_code', 'week', 'game_type', 'depth_team', 'last_name',
       'first_name', 'football_name', 'formation', 'gsis_id', 'jersey_number',
       'position', 'elias_id', 'depth_position', 'full_name'],
      dtype='object')

In [13]:
df_temp = df_depth_chart.query("game_type == 'REG'") \
    .groupby(["season", "club_code", "gsis_id"]) \
    .agg({
        "depth_team": ["mean"]
    })

In [14]:
df_temp.columns = list(map("_".join, df_temp.columns))
df_temp.reset_index(inplace=True)

In [15]:
df_depth_chart = df_temp.copy()

In [16]:
for i, row in df_pass.copy().iterrows():
    player_id = row["player_id"]
    season = row["season"]
    
    roster_row = df_roster.query(f"season == {season} and player_id == '{player_id}'")
    depth_chart_row = df_depth_chart.query(f"season == {season} and gsis_id == '{player_id}'")
    dc_row = df_draft_picks.query(f"gsis_id == '{player_id}'")
    
    if roster_row.empty or depth_chart_row.empty or dc_row.empty:
        pass
    else:
        player_name = roster_row["player_name"].iloc[0]
#         player_name = player_name.replace("'", "\'")
        
        df_pass.at[i, "age"] = roster_row["age"].iloc[0]
        df_pass.at[i, "position"] = roster_row["position"].iloc[0]
        df_pass.at[i, "player_name"] = player_name
    
        df_pass.at[i, "depth_team"] = depth_chart_row["depth_team_mean"].iloc[0]

        df_pass.at[i, "round"] = dc_row["round"].iloc[0]
        df_pass.at[i, "pick"] = dc_row["pick"].iloc[0]
        
        output = get_qb(player_name, season)
        if output != None:
            completions, attempts, passing_yards, passing_tds, interceptions = output
            qbr = calculate_qbr(completions, attempts, passing_yards, interceptions, passing_tds)
            df_pass.at[i, "qbr"] = qbr

In [17]:
# df_pass.to_pickle("./wr-simple-data-2012-2022.pkl")

In [18]:
df_pass.drop(columns=['completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions', 'special_teams_tds'],
            inplace=True
            )

In [19]:
df_pass.head()

Unnamed: 0,player_id,season,season_type,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,...,w8dom,yptmpa,ppr_sh,age,position,player_name,depth_team,round,pick,qbr
0,00-0023459,2023,REG,0,0,0.0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,39.0,QB,Aaron Rodgers,1.0,1.0,24.0,39.583333
1,00-0024243,2023,REG,4,5,29.0,1,0.0,0.0,9.0,...,0.046907,0.197279,0.028467,39.0,TE,Marcedes Lewis,3.0,1.0,28.0,85.816855
2,00-0026158,2023,REG,0,0,0.0,0,0.0,0.0,0.0,...,0.0,0.0,0.191464,38.0,QB,Joe Flacco,1.0,1.0,18.0,90.236928
3,00-0026498,2023,REG,0,0,0.0,0,0.0,0.0,0.0,...,0.0,0.0,0.173299,35.0,QB,Matthew Stafford,1.0,1.0,1.0,92.494402
4,00-0026625,2023,REG,0,0,0.0,0,0.0,0.0,0.0,...,0.0,0.0,0.036106,,na,,,,,


In [20]:
df_pass.dropna(inplace=True)

In [21]:
df_pass.to_pickle("./wr-simple-data-2023-qbr.pkl")

# Advanced Stat DF Creation

In [None]:
df_ngs_rec.columns

In [None]:
# df_pass_ngs = 

# For modelling

In [9]:
import math

In [3]:
years = range(2005, 2023 + 1)

In [4]:
df_roster = nfl.import_seasonal_rosters(years)

In [6]:
df_roster_seasonal = df_roster.groupby(["position", "player_name", "player_id"]).agg({"age": ["mean"]})

In [7]:
df_roster_seasonal.columns = list(map("_".join, df_roster_seasonal.columns))
df_roster_seasonal.reset_index(inplace=True)

In [10]:
df_roster_seasonal["age"] = df_roster_seasonal["age_mean"].dropna().apply(math.floor)
df_roster_seasonal.drop(columns=["age_mean"], inplace=True)

In [5]:
df_seasonal = nfl.import_seasonal_data(years)

In [11]:
df_merge = df_seasonal.merge(df_roster_seasonal, on="player_id", how="left")

In [12]:
df_merge_cleaned = df_merge.drop_duplicates().dropna()

In [15]:
ls ../../interactive

[34mQB[m[m/                            df_pbp_2012_2022.pkl
[34mRB[m[m/                            df_pfr_pass_2018_2022.pkl
[34mTE[m[m/                            df_pfr_rec_2018_2022.pkl
[34mWR[m[m/                            df_pfr_rush_2018_2022.pkl
df_dc_mean_2012_2022.pkl       df_qbr_seasonal_2012_2022.pkl
df_draft_picks_1980_2022.pkl   df_roster_2012_2022.pkl
df_draft_picks_2012_2022.pkl   df_roster_2023.pkl
df_ngs_passing_2016_2022.pkl   df_seasonal_2012_2022.pkl
df_ngs_rec_2016_2022.pkl       df_seasonal_2023.pkl
df_ngs_rush_2016_2022.pkl


In [16]:
df_merge_cleaned.to_pickle("../../interactive/df_all_2005_2023.pkl")