In [81]:
#Import Packages

import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import nfl_data_py as nfl
import time
import warnings
import cfbd
import datetime

#Ignoring Warning messages for cleaner presentation
warnings.filterwarnings("ignore")

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [82]:
#Specifying Year Range
years = range(2015,2025)
current_year = datetime.date.today().year

In [83]:
#Pull in CSVs from Personal GitHubs
epa_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/combined_EPA.csv")
blocking_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/blocking_stats.csv")
passing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/passing_stats.csv")
receiving_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/receiving_stats.csv")
rushing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/rushing_stats.csv")
defensive_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/defensive_stats.csv")

power_5_team  = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/power_5_teams.csv")

# Creating Player Profiles

In [84]:
#Creating College Career DFs

#Creating Passing DF
positions = ['ED', 'DI', 'CB', 'WR', 'LB', 'S', 'G', 'HB', 'TE', 'FB', 'T', 'C', 'QB']

filtered_passing_career_df = passing_df.fillna(0).sort_values(by='Season')

filtered_passing_career_df = filtered_passing_career_df.groupby(['player_id', 'player']).agg({'Season':'count', 'position':'last', 'player_game_count':'sum', 'team_name':'last',
        'player_game_count':'sum',  'aimed_passes':'sum', 'attempts':'sum', 'avg_depth_of_target':'mean', 
        'avg_time_to_throw':'mean', 'bats':'sum','big_time_throws':'sum', 'completions':'sum','declined_penalties':'sum', 
        'def_gen_pressures':'sum', 'dropbacks':'sum','drops':'sum', 'first_downs':'sum', 'grades_hands_fumble':'median',
        'grades_offense':'median', 'grades_pass':'median', 'hit_as_threw':'sum','interceptions':'sum', 
        'passing_snaps':'sum', 'penalties':'sum', 'pressure_to_sack_rate':'last','qb_rating':'last', 'sacks':'sum', 
        'scrambles':'sum', 'spikes':'sum','thrown_aways':'sum', 'touchdowns':'sum', 'turnover_worthy_plays':'sum','yards':'sum'})

filtered_passing_career_df = filtered_passing_career_df.reset_index()

#Creating Rushing DF

filtered_rushing_career_df = rushing_df.fillna(0).sort_values(by='Season')

filtered_rushing_career_df = filtered_rushing_career_df.groupby(['player_id', 'player']).agg({'Season':'count','position':'last', 'player_game_count':'sum', 'team_name':'last',
        'attempts':'sum','avoided_tackles':'sum', 'breakaway_attempts':'sum',
       'breakaway_yards':'sum', 'designed_yards':'sum', 'elu_recv_mtf':'last', 'elu_rush_mtf':'last',
       'elu_yco':'last', 'elusive_rating':'last', 'explosive':'sum', 'first_downs':'sum',
       'fumbles':'sum', 'gap_attempts':'sum', 'grades_run':'median', 'grades_offense_penalty':'median','grades_run_block':'median', 
       'longest':'max','run_plays':'sum', 'scramble_yards':'sum','total_touches':'sum', 'touchdowns':'sum', 'yards':'sum',
       'yards_after_contact':'sum', 'yco_attempt':'sum', 'ypa':'mean', 'zone_attempts':'sum'})

filtered_rushing_career_df = filtered_rushing_career_df.reset_index()

#Creating Receiving DF

filtered_receiving_career_df = receiving_df.fillna(0).sort_values(by='Season')

filtered_receiving_career_df = filtered_receiving_career_df.groupby(['player_id', 'player']).agg({'Season':'count', 'position':'last', 'player_game_count':'sum', 'team_name':'last',
        'avg_depth_of_target' : 'mean', 'avoided_tackles':'sum','contested_receptions':'sum',
       'contested_targets':'sum', 'declined_penalties':'sum', 'drops':'sum','first_downs':'sum', 'fumbles':'sum',
       'grades_hands_drop':'median','grades_hands_fumble':'median', 'grades_offense':'median','grades_pass_route':'median', 
       'inline_rate':'mean', 'inline_snaps':'sum', 'interceptions':'sum','longest':'max', 'pass_plays':'sum', 
       'receptions':'sum', 'route_rate' :'mean', 'routes':'sum', 'slot_rate':'mean', 'slot_snaps':'sum',
       'targeted_qb_rating':'mean', 'targets':'sum', 'touchdowns':'sum', 'wide_rate':'mean','wide_snaps':'sum',
       'yards':'sum', 'yards_after_catch':'sum'})

filtered_receiving_career_df = filtered_receiving_career_df.reset_index()

#Creating Blocking DF
filtered_blocking_career_df = blocking_df.fillna(0).sort_values(by='Season')

filtered_blocking_career_df = filtered_blocking_career_df.groupby(['player_id', 'player']).agg({'Season':'count','position':'last', 'player_game_count':'sum', 'team_name':'last',
        'declined_penalties':'sum', 'grades_offense':'median', 'grades_pass_block':'median',
       'grades_run_block':'median', 'hits_allowed':'sum', 'hurries_allowed':'sum','snap_counts_offense':'sum',
        'non_spike_pass_block':'sum', 'snap_counts_pass_block':'sum', 'penalties':'sum', 'pressures_allowed':'sum',
       'sacks_allowed':'sum', 'snap_counts_block':'sum', 'snap_counts_run_block':'sum', 'snap_counts_lt':'sum', 
        'snap_counts_lg':'sum', 'snap_counts_ce':'sum', 'snap_counts_rg':'sum', 'snap_counts_rt':'sum',  'snap_counts_te':'sum'})

filtered_blocking_career_df = filtered_blocking_career_df.reset_index()

#Creating Blocking DF
filtered_defensive_career_df = defensive_df.fillna(0).sort_values(by='Season')

filtered_defensive_career_df = filtered_defensive_career_df.groupby(['player_id', 'player']).agg({'Season':'count','position':'last', 'player_game_count':'sum', 'team_name':'last',
       'assists':'sum', 'batted_passes':'sum', 'declined_penalties':'sum', 'forced_fumbles':'sum',
       'fumble_recoveries':'sum', 'fumble_recovery_touchdowns':'sum',
       'grades_coverage_defense':'median', 'grades_defense':'median', 'grades_defense_penalty':'median',
       'grades_pass_rush_defense':'median', 'grades_run_defense':'median', 'grades_tackle':'median',
       'hits':'sum', 'hurries':'sum', 'interception_touchdowns':'sum', 'interceptions':'sum',
       'missed_tackle_rate':'mean', 'missed_tackles':'sum', 'pass_break_ups':'sum',
       'penalties':'sum','receptions':'sum', 'sacks':'sum', 'safeties':'sum',
       'stops':'sum', 'tackles':'sum', 'tackles_for_loss':'sum', 'targets':'sum', 'total_pressures':'sum',
       'touchdowns':'sum', 'yards':'sum', 'yards_after_catch':'sum'})

filtered_defensive_career_df = filtered_defensive_career_df.reset_index()

#Creating EPA DF
filtered_epa_career_df = epa_df.fillna(0).sort_values(by='Season')

filtered_epa_career_df = filtered_epa_career_df.groupby(['Id', 'Name']).agg({'Season':'count','Position':'last', 'Team': 'last',
        'CountablePlays':'sum', 'TotalPPA All':'sum', 'TotalPPA Pass':'sum','TotalPPA Rush':'sum', 
        'TotalPPA FirstDown':'sum', 'TotalPPA SecondDown':'sum','TotalPPA ThirdDown':'sum', 
        'TotalPPA StandardDowns':'sum','TotalPPA PassingDowns':'sum'})

filtered_epa_career_df = filtered_epa_career_df.reset_index()

#Renaming same name columns
filtered_passing_career_df = filtered_passing_career_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_passing', 'drops': 'drops_passing', 'first_downs': 'first_downs_passing',
        'interceptions':'interceptions_passing', 'touchdowns':'touchdowns_passing', 'yards':'yards_passing'})
filtered_rushing_career_df = filtered_rushing_career_df.rename(columns={'attempts' : 'attempts_rushing', 'first_downs':'first_downs_rushing',
        'touchdowns':'touchdowns_rushing', 'yards':'yards_rushing', 'longest': 'longest_rush'})
filtered_receiving_career_df = filtered_receiving_career_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_receiving', 'drops': 'drops_receiving', 'first_downs': 'first_downs_receiving',
        'interceptions':'interceptions_receiving', 'touchdowns':'touchdowns_receiving', 'yards':'yards_receiving',
        'targets':'targets_receiving'})
filtered_defensive_career_df = filtered_defensive_career_df.rename(columns={'fumble_recoveries':'fumble_recoveries_defensive',
        'interceptions': 'interceptions_defensive', 'receptions':'receptions_allowed', 'sacks':'sacks_defensive',
        'targets':'targets_allowed', 'touchdowns':'coverage_touchdowns_allowed', 'yards':'coverage_yards_allowed'})
filtered_epa_career_df = filtered_epa_career_df.rename(columns={'Id':'player_id', 'Name':'player', 'Position':'position'})

career_dfs = [filtered_passing_career_df, filtered_rushing_career_df, filtered_receiving_career_df, filtered_blocking_career_df, filtered_defensive_career_df]

In [85]:
#combining career dfs

def combine_dfs(df_list, on_columns):
    combined_df = pd.concat(df_list, axis=0)
    return combined_df

on_columns = ['player_id', 'player']

career_df = combine_dfs(career_dfs, on_columns)

career_df = career_df[career_df['position'].isin(positions)]
career_df = career_df.groupby(['player_id', 'player']).max()
career_df = career_df.fillna(0)

#renaming HB position to RB for consistency among dfs
career_df['position'] = career_df['position'].replace('HB', 'RB')
career_df['offensive/defensive_grade'] = career_df[['grades_offense','grades_defense']].max(axis=1)
#career_df.sort_values(by='offensive/defensive_grade', ascending=False)

In [86]:
#Adding Career Rates

#Passing Career Rates
career_df['yards_passing/att_career'] = career_df['yards_passing'] / career_df['attempts_passing']
career_df['completion_pct_career'] = career_df['completions'] / career_df['attempts_passing']
career_df['adj_completion_pct_career'] = (career_df['completions'] + career_df['drops_passing']) / career_df['aimed_passes']
career_df['touchdown_pct_career'] = career_df['touchdowns_passing'] / career_df['attempts_passing']
career_df['interception_pct_career'] = career_df['interceptions_passing'] / career_df['attempts_passing']
career_df['dangerous_play_pct_career'] = career_df['turnover_worthy_plays'] / career_df['attempts_passing']
career_df['money_throw_pct_career'] = career_df['big_time_throws'] / career_df['attempts_passing']
career_df['1st_downs/pass_att_career'] = career_df['first_downs_passing'] / career_df['attempts_passing']

#Rushing Career Rates
career_df['breakaway_runs/att_career'] = career_df['breakaway_attempts'] / career_df['attempts_rushing']
career_df['breakaway_runs/att_career'] = career_df['breakaway_attempts'] / career_df['attempts_rushing']
career_df['fumbles/att_career'] = career_df['fumbles'] / career_df['attempts_rushing']
career_df['touchdowns_rushing/att_career'] = career_df['touchdowns_rushing'] / career_df['attempts_rushing']
career_df['yards_rushing/att_career'] = career_df['yards_rushing'] / career_df['attempts_rushing']
career_df['1st_downs/pass_att_career'] = career_df['first_downs_rushing'] / career_df['attempts_rushing']

#Receiving Career Rates
career_df['catch_pct_career'] = career_df['receptions'] / career_df['targets_receiving']
career_df['contested_catch_pct_career'] = career_df['contested_receptions'] / career_df['contested_targets']
career_df['touchdowns_receiving/target_career'] = career_df['touchdowns_receiving'] / career_df['targets_receiving']
career_df['interceptions_receiving/target_career'] = career_df['interceptions_receiving'] / career_df['targets_receiving']

career_df['targets/route_ran_career'] = career_df['targets_receiving'] / career_df['routes']
career_df['1st_downs/route_ran_career'] = career_df['first_downs_receiving'] / career_df['routes']

career_df['drops/target_career'] = career_df['drops_receiving'] / career_df['targets_receiving']

career_df['yards/reception_career'] = career_df['yards_receiving'] / career_df['receptions']
career_df['yards/target_career'] = career_df['yards_receiving'] / career_df['targets_receiving']
career_df['yards/route_ran_career'] = career_df['yards_receiving'] / career_df['routes']

career_df['yards_after_catch/reception_career'] = career_df['yards_after_catch'] / career_df['receptions']
career_df['yards_after_catch/target_career'] = career_df['yards_after_catch'] / career_df['targets_receiving']
career_df['yards_after_catch/route_ran_career'] = career_df['yards_after_catch'] / career_df['routes']

career_df['wide_snaps_routes_pct_career'] = career_df['wide_snaps'] / career_df['routes']
career_df['slot_snaps_routes_pct_career'] = career_df['slot_snaps'] / career_df['routes']
career_df['slot_snaps_routes_pct_career'] = career_df['inline_snaps'] / career_df['routes']

#Blocking Career Rates
career_df['pass_blocking_efficency'] = career_df['pressures_allowed'] / career_df['non_spike_pass_block']
career_df['blocking_pct_per play'] = career_df['snap_counts_block'] / career_df['snap_counts_offense']

#General Offensive Career Rates
career_df['avoided_tackles/touches_career'] = career_df['avoided_tackles'] / (career_df['attempts_rushing'] + career_df['targets_receiving'])
career_df['fumbles/touches_career'] = career_df['fumbles'] / (career_df['attempts_rushing'] + career_df['targets_receiving'])

#Defensive Career Rates
career_df['allowed_catch_pct'] = career_df['receptions_allowed'] / career_df['targets_allowed']
career_df['yards_per_catch_allowed'] = career_df['coverage_yards_allowed'] / career_df['receptions_allowed']

career_df = career_df.fillna(0)
career_df = career_df.reset_index()
#career_df

In [87]:
#Creating player_key to match up players from dfs
filtered_epa_career_df['player_key'] = filtered_epa_career_df['player'] + "_" + filtered_epa_career_df['position']
career_df['player_key'] = career_df['player'] + "_" + career_df['position']

team_name_dict = dict(zip(power_5_team['epa_team_name'], power_5_team['pff_ team_name']))
power_5_dict = dict(zip(power_5_team['pff_ team_name'], power_5_team['P5?']))

filtered_epa_career_df['Team'] = filtered_epa_career_df['Team'].map(team_name_dict)
filtered_epa_career_df['power_5_team'] = filtered_epa_career_df['Team'].map(power_5_dict)
filtered_epa_career_df = filtered_epa_career_df.fillna(0)
filtered_epa_career_df['position'] = filtered_epa_career_df['position'].replace('HB', 'RB')
#filtered_epa_career_df[filtered_epa_career_df['player'] == 'Jay Ajayi']

In [88]:
#merging career stats df with career epa df
career_df = career_df.merge(filtered_epa_career_df, on='player_key', suffixes=['','_epa'], how='outer')
career_df = career_df[~career_df['player'].isna()]
career_df = career_df.drop_duplicates(subset='player_id')
#career_df

In [89]:
#creating EPA play averages

career_df['Total_EPA_avg'] = career_df['TotalPPA All'] / career_df['CountablePlays']
career_df['EPA_Pass_avg'] = career_df['TotalPPA Pass'] / career_df['CountablePlays']
career_df['EPA_Rush_avg'] = career_df['TotalPPA Rush'] / career_df['CountablePlays']
career_df['EPA_1st_down_avg'] = career_df['TotalPPA FirstDown'] / career_df['CountablePlays']
career_df['EPA_2nd_down_avg'] = career_df['TotalPPA SecondDown'] / career_df['CountablePlays']
career_df['EPA_3rd_down_avg'] = career_df['TotalPPA ThirdDown'] / career_df['CountablePlays']
career_df['EPA_StandardDowns_avg'] = career_df['TotalPPA StandardDowns'] / career_df['CountablePlays']
career_df['EPA_PassingDowns_avg'] = career_df['TotalPPA PassingDowns'] / career_df['CountablePlays']

career_df

Unnamed: 0,player_id,player,Season,position,player_game_count,team_name,aimed_passes,attempts_passing,avg_depth_of_target_passing,avg_time_to_throw,...,TotalPPA PassingDowns,power_5_team,Total_EPA_avg,EPA_Pass_avg,EPA_Rush_avg,EPA_1st_down_avg,EPA_2nd_down_avg,EPA_3rd_down_avg,EPA_StandardDowns_avg,EPA_PassingDowns_avg
0,9434.0,Jameis Winston,1.0,QB,13.0,FLORIDA ST,450.0,467.0,8.8,2.74,...,114.066,1.0,0.419510,0.397581,0.021929,0.117035,0.124444,0.189038,0.20015,0.219358
1,9435.0,Marcus Mariota,1.0,QB,15.0,OREGON,434.0,444.0,9.8,2.83,...,125.802,1.0,0.586410,0.431969,0.154441,0.232592,0.158228,0.179654,0.36876,0.217651
2,9436.0,Dante Fowler Jr.,1.0,ED,12.0,FLORIDA,0.0,0.0,0.0,0.00,...,,,,,,,,,,
3,9437.0,Amari Cooper,1.0,WR,14.0,ALABAMA,0.0,0.0,0.0,0.00,...,59.464,1.0,0.768914,0.777486,-0.008577,0.337229,0.116606,0.297486,0.42912,0.339794
4,9438.0,Brandon Scherff,1.0,T,13.0,IOWA,0.0,0.0,0.0,0.00,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37932,183052.0,Marlon McClendon,1.0,CB,1.0,GA STATE,0.0,0.0,0.0,0.00,...,,,,,,,,,,
37933,183053.0,Brooks Hickman,1.0,WR,1.0,GA STATE,0.0,0.0,0.0,0.00,...,,,,,,,,,,
37934,183054.0,Jace Larsen,1.0,WR,1.0,S ALABAMA,0.0,0.0,0.0,0.00,...,,,,,,,,,,
37935,183055.0,Jamal Meriweather,1.0,T,1.0,GEORGIA,0.0,0.0,0.0,0.00,...,,,,,,,,,,


In [90]:
#Creating final season DFs

#Creating Passing DF
filtered_passing_final_season_df = passing_df.fillna(0).sort_values(by='Season')

filtered_passing_final_season_df = filtered_passing_final_season_df.groupby(['player_id', 'player']).last()

filtered_passing_final_season_df = filtered_passing_final_season_df.reset_index()

#Creating Rushing DF

filtered_rushing_final_season_df = rushing_df.fillna(0).sort_values(by='Season')

filtered_rushing_final_season_df = filtered_rushing_final_season_df.groupby(['player_id', 'player']).last()

filtered_rushing_final_season_df = filtered_rushing_final_season_df.reset_index()

#Creating Receiving DF
filtered_receiving_final_season_df = receiving_df.fillna(0).sort_values(by='Season')

filtered_receiving_final_season_df = filtered_receiving_final_season_df.groupby(['player_id', 'player']).last()

filtered_receiving_final_season_df = filtered_receiving_final_season_df.reset_index()

#Creating Blocking DF
filtered_blocking_final_season_df = blocking_df.fillna(0).sort_values(by='Season')

filtered_blocking_final_season_df = filtered_blocking_final_season_df.groupby(['player_id', 'player']).last()

filtered_blocking_final_season_df = filtered_blocking_final_season_df.reset_index()

#Creating Blocking DF
filtered_defensive_final_season_df = defensive_df.fillna(0).sort_values(by='Season')

filtered_defensive_final_season_df = filtered_defensive_final_season_df.groupby(['player_id', 'player']).last()

filtered_defensive_final_season_df = filtered_defensive_final_season_df.reset_index()

#Creating EPA DF
filtered_epa_final_season_df = epa_df.fillna(0).sort_values(by='Season')

filtered_epa_final_season_df = filtered_epa_final_season_df.groupby(['Id', 'Name']).last()

filtered_epa_final_season_df = filtered_epa_final_season_df.reset_index()

#Renaming same name columns
filtered_passing_final_season_df = filtered_passing_final_season_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_passing', 'drops': 'drops_passing', 'first_downs': 'first_downs_passing',
        'interceptions':'interceptions_passing', 'touchdowns':'touchdowns_passing', 'yards':'yards_passing'})
filtered_rushing_final_season_df = filtered_rushing_final_season_df.rename(columns={'attempts' : 'attempts_rushing', 'first_downs':'first_downs_rushing',
        'touchdowns':'touchdowns_rushing', 'yards':'yards_rushing', 'longest': 'longest_rush'})
filtered_receiving_final_season_df = filtered_receiving_final_season_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_receiving', 'drops': 'drops_receiving', 'first_downs': 'first_downs_receiving',
        'interceptions':'interceptions_receiving', 'touchdowns':'touchdowns_receiving', 'yards':'yards_receiving',
        'targets':'targets_receiving'})
filtered_defensive_final_season_df = filtered_defensive_final_season_df.rename(columns={'fumble_recoveries':'fumble_recoveries_defensive',
        'interceptions': 'interceptions_defensive', 'receptions':'receptions_allowed', 'sacks':'sacks_defensive',
        'targets':'targets_allowed', 'touchdowns':'coverage_touchdowns_allowed', 'yards':'coverage_yards_allowed'})
filtered_epa_final_season_df = filtered_epa_final_season_df.rename(columns={'Id':'player_id', 'Name':'player', 'Position':'position'})

final_season_dfs = [filtered_passing_final_season_df, filtered_rushing_final_season_df, filtered_receiving_final_season_df,
      filtered_blocking_final_season_df, filtered_defensive_final_season_df]

In [91]:
#Creating final season stats DF

final_season_df = pd.DataFrame()

final_season_df = combine_dfs(final_season_dfs, on_columns)

final_season_df = final_season_df[final_season_df['position'].isin(positions)]
final_season_df = final_season_df.groupby(['player_id', 'player']).last()
final_season_df = final_season_df.fillna(0)
final_season_df['position'] = final_season_df['position'].replace('HB', 'RB')
final_season_df['offensive/defensive_grade'] = final_season_df[['grades_offense','grades_defense']].max(axis=1)

final_season_df = final_season_df.reset_index()
#final_season_df = final_season_df.drop('player_id',axis=1)
#final_season_df.sort_values(by='offensive/defensive_grade', ascending=False)

In [92]:
#creating final season EPA DF
filtered_epa_final_season_df['player_key'] = filtered_epa_final_season_df['player'] + "_" + filtered_epa_final_season_df['position']
final_season_df['player_key'] = final_season_df['player'] + "_" + final_season_df['position']

filtered_epa_final_season_df['Team'] = filtered_epa_final_season_df['Team'].map(team_name_dict)
filtered_epa_final_season_df = filtered_epa_final_season_df.fillna(0)
filtered_epa_final_season_df['position'] = filtered_epa_final_season_df['position'].replace('HB', 'RB')

#filtered_epa_final_season_df

In [93]:
#merging final seasons DFs

final_season_df = final_season_df.merge(filtered_epa_final_season_df, on='player_key', suffixes=['','_epa'], how='outer')
final_season_df = final_season_df.drop_duplicates(subset='player_id')
final_season_df

Unnamed: 0,player_id,player,Season,position,team_name,player_game_count,accuracy_percent,aimed_passes,attempts_passing,avg_depth_of_target_passing,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All,TotalPPA Pass,TotalPPA Rush,TotalPPA FirstDown,TotalPPA SecondDown,TotalPPA ThirdDown,TotalPPA StandardDowns,TotalPPA PassingDowns
0,9434.0,Jameis Winston,2014.0,QB,FLORIDA ST,13.0,72.7,450.0,467.0,8.8,...,0.322,0.579,218.145,206.742,11.403,60.858,64.711,98.300,104.078,114.066
1,9435.0,Marcus Mariota,2014.0,QB,OREGON,15.0,76.7,434.0,444.0,9.8,...,0.549,0.662,338.945,249.678,89.267,134.438,91.456,103.840,213.143,125.802
2,9436.0,Dante Fowler Jr.,2016.0,ED,FLORIDA,12.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,9437.0,Amari Cooper,2014.0,WR,ALABAMA,14.0,0.0,0.0,0.0,0.0,...,0.636,1.043,134.560,136.060,-1.501,59.015,20.406,52.060,75.096,59.464
4,9438.0,Brandon Scherff,2014.0,T,IOWA,13.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37929,183053.0,Brooks Hickman,2023.0,WR,GA STATE,1.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
37930,183054.0,Jace Larsen,2023.0,WR,S ALABAMA,1.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
37931,183055.0,Jamal Meriweather,2023.0,T,GEORGIA,1.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
37932,183056.0,Jacoby Davis,2023.0,CB,KANSAS,1.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [94]:
#merging Final Season and Career dfs

player_df = career_df.merge(final_season_df, on='player_id', suffixes=['_career','_final_season'])
player_df = player_df.rename(columns={'position_career':'pos','player_id':'pff_id'})
player_df

Unnamed: 0,pff_id,player_career,Season_career,pos,player_game_count_career,team_name_career,aimed_passes_career,attempts_passing_career,avg_depth_of_target_passing_career,avg_time_to_throw_career,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_final_season,TotalPPA Pass_final_season,TotalPPA Rush_final_season,TotalPPA FirstDown_final_season,TotalPPA SecondDown_final_season,TotalPPA ThirdDown_final_season,TotalPPA StandardDowns_final_season,TotalPPA PassingDowns_final_season
0,9434.0,Jameis Winston,1.0,QB,13.0,FLORIDA ST,450.0,467.0,8.8,2.74,...,0.322,0.579,218.145,206.742,11.403,60.858,64.711,98.30,104.078,114.066
1,9435.0,Marcus Mariota,1.0,QB,15.0,OREGON,434.0,444.0,9.8,2.83,...,0.549,0.662,338.945,249.678,89.267,134.438,91.456,103.84,213.143,125.802
2,9436.0,Dante Fowler Jr.,1.0,ED,12.0,FLORIDA,0.0,0.0,0.0,0.00,...,,,,,,,,,,
3,9437.0,Amari Cooper,1.0,WR,14.0,ALABAMA,0.0,0.0,0.0,0.00,...,0.636,1.043,134.560,136.060,-1.501,59.015,20.406,52.06,75.096,59.464
4,9438.0,Brandon Scherff,1.0,T,13.0,IOWA,0.0,0.0,0.0,0.00,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37819,183052.0,Marlon McClendon,1.0,CB,1.0,GA STATE,0.0,0.0,0.0,0.00,...,,,,,,,,,,
37820,183053.0,Brooks Hickman,1.0,WR,1.0,GA STATE,0.0,0.0,0.0,0.00,...,,,,,,,,,,
37821,183054.0,Jace Larsen,1.0,WR,1.0,S ALABAMA,0.0,0.0,0.0,0.00,...,,,,,,,,,,
37822,183055.0,Jamal Meriweather,1.0,T,1.0,GEORGIA,0.0,0.0,0.0,0.00,...,,,,,,,,,,


In [95]:
#importing combine & draft dfs from API
combine_df  = nfl.import_combine_data(years)
draft_df = nfl.import_draft_picks(years)
id_df = nfl.import_ids()

draft_df = draft_df[['season', 'round', 'pick', 'team', 'gsis_id', 'pfr_player_id',
       'cfb_player_id', 'pfr_player_name', 'position', 'category','college', 'age']]

id_df = id_df[['gsis_id', 'pff_id', 'pfr_id','cfbref_id', 'name', 'merge_name', 'position', 'team', 'birthdate', 'age',
       'draft_year', 'draft_round', 'draft_pick', 'draft_ovr', 'height', 'weight']]

combine_df = combine_df[['season', 'draft_year', 'pfr_id', 'cfb_id', 'player_name', 'pos', 'school', 'forty',
       'bench', 'vertical', 'broad_jump', 'cone', 'shuttle']]

In [96]:
#Data mungering combine df

combine_df  = nfl.import_combine_data(years)
combine_df = combine_df[~combine_df['ht'].isna()]

#editing height from Foot-Inches to only Inches
def extract_height(height_str):
    feet, inches = height_str.split('-')
    return int(feet) * 12 + int(inches)

combine_df['ht'] = combine_df['ht'].apply(extract_height) 

combine_df

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player_name,pos,school,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
4902,2015,2015.0,Detroit Lions,2.0,54.0,AbduAm00,ameer-abdullah-1,Ameer Abdullah,RB,Nebraska,69,205.0,4.60,24.0,42.5,130.0,6.79,3.95
4903,2015,2015.0,Philadelphia Eagles,1.0,20.0,AghoNe00,nelson-agholor-1,Nelson Agholor,WR,USC,72,198.0,4.42,12.0,,,,
4904,2015,2015.0,Miami Dolphins,5.0,149.0,AjayJa00,jay-ajayi-1,Jay Ajayi,RB,Boise State,72,221.0,4.57,19.0,39.0,121.0,7.10,4.10
4905,2015,2015.0,Tampa Bay Buccaneers,4.0,124.0,AlexKw00,kwon-alexander-1,Kwon Alexander,OLB,LSU,73,227.0,4.55,24.0,36.0,121.0,7.14,4.20
4906,2015,2015.0,Cincinnati Bengals,7.0,238.0,AlfoMa00,mario-alford-1,Mario Alford,WR,West Virginia,68,180.0,4.43,13.0,34.0,121.0,6.64,4.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8315,2024,,,,,,roman-wilson-1,Roman Wilson,WR,Michigan,71,185.0,4.39,12.0,,,,
8316,2024,,,,,,mekhi-wingo-1,Mekhi Wingo,DT,LSU,72,284.0,4.85,25.0,31.5,109.0,,
8317,2024,,,,,,xavier-worthy-1,Xavier Worthy,WR,Texas,71,165.0,4.21,,41.0,131.0,,
8318,2024,,,,,,jaylen-wright-1,Jaylen Wright,RB,Tennessee,71,210.0,4.38,,38.0,134.0,,


In [97]:
#assigning draft picks to players

combine_current_year_df = combine_df[combine_df['season'] == current_year]
cfb_current_year_id = combine_current_year_df['cfb_id']

draft_current_year_df = draft_df[(draft_df['cfb_player_id'].isin(cfb_current_year_id)) & (draft_df['season'] == current_year)]

#draft_current_year_df.head(32)

In [98]:
#combining combine df with draft picks

draft_dict = dict(zip(draft_current_year_df['cfb_player_id'], draft_current_year_df['pick']))
draft_round_dict = dict(zip(draft_current_year_df['cfb_player_id'], draft_current_year_df['round']))

combine_df['draft_ovr'] = combine_df['draft_ovr'].fillna(combine_df['cfb_id'].map(draft_dict))
combine_df['draft_round'] = combine_df['draft_round'].fillna(combine_df['cfb_id'].map(draft_round_dict))

combine_df

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player_name,pos,school,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
4902,2015,2015.0,Detroit Lions,2.0,54.0,AbduAm00,ameer-abdullah-1,Ameer Abdullah,RB,Nebraska,69,205.0,4.60,24.0,42.5,130.0,6.79,3.95
4903,2015,2015.0,Philadelphia Eagles,1.0,20.0,AghoNe00,nelson-agholor-1,Nelson Agholor,WR,USC,72,198.0,4.42,12.0,,,,
4904,2015,2015.0,Miami Dolphins,5.0,149.0,AjayJa00,jay-ajayi-1,Jay Ajayi,RB,Boise State,72,221.0,4.57,19.0,39.0,121.0,7.10,4.10
4905,2015,2015.0,Tampa Bay Buccaneers,4.0,124.0,AlexKw00,kwon-alexander-1,Kwon Alexander,OLB,LSU,73,227.0,4.55,24.0,36.0,121.0,7.14,4.20
4906,2015,2015.0,Cincinnati Bengals,7.0,238.0,AlfoMa00,mario-alford-1,Mario Alford,WR,West Virginia,68,180.0,4.43,13.0,34.0,121.0,6.64,4.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8315,2024,,,3.0,84.0,,roman-wilson-1,Roman Wilson,WR,Michigan,71,185.0,4.39,12.0,,,,
8316,2024,,,6.0,189.0,,mekhi-wingo-1,Mekhi Wingo,DT,LSU,72,284.0,4.85,25.0,31.5,109.0,,
8317,2024,,,1.0,28.0,,xavier-worthy-1,Xavier Worthy,WR,Texas,71,165.0,4.21,,41.0,131.0,,
8318,2024,,,4.0,120.0,,jaylen-wright-1,Jaylen Wright,RB,Tennessee,71,210.0,4.38,,38.0,134.0,,


In [99]:
#Assigning undrafted players pick 263 & round 8

column_order = ['season', 'cfb_id', 'player_name', 'pos', 'school', 'ht', 
           'wt', 'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle','draft_ovr', 'draft_round']

combine_df = combine_df[column_order]

combine_df['draft_ovr'] = combine_df['draft_ovr'].fillna(263)
combine_df['draft_round'] = combine_df['draft_round'].fillna(8)

#combine_df.sort_values(by=['draft_ovr','season']).head(20)

In [100]:
#Removing duplicated keys

duplicated_keys = ['Kevin White_2015', 'Jordan Thomas_2018', 'Byron Young_2023']

combine_df['player_key'] = combine_df['player_name'] + "_" + combine_df['season'].astype(str)
combine_df = combine_df[~combine_df['player_key'].isin(duplicated_keys)]
#combine_df

In [101]:
#redefining player keys

player_df['Season_final_season'] = player_df['Season_final_season'].astype(int)
player_df['player_key'] = player_df['player_final_season'] + "_" + (player_df['Season_final_season']+1).astype(str)
#player_df

In [102]:
#Data Mungering the df for consistency

prospect_df = combine_df.merge(player_df, on='player_key',how='left')
prospect_df['pos_y'] = prospect_df['pos_y'].fillna(prospect_df['pos_x'])
prospect_df['pos_y'].unique()

pos_map = {'RB':'RB', 'WR':'WR', 'OLB':'ED', 'S':'S', 'DE':'ED', 'TE':'TE', 'ILB':'LB', 'QB':'QB', 'DT':'DI', 'T':'T',
'OG':'G', 'FB':'FB', 'G':'G', 'CB':'CB', 'OT':'T', 'C':'C', 'DL':'DL', 'LB':'LB', 'EDGE':'ED','DB':'DB', 'DI':'DI', 
           'ED':'ED', 'OL':'OL', 'SAF':'S'}

pos_to_drop = ['P', 'K', 'LS', 'DL', 'OL', 'DB']

prospect_df = prospect_df[~prospect_df['pos_y'].isin(pos_to_drop)]
prospect_df['pos_y'] = prospect_df['pos_y'].map(pos_map)
prospect_df = prospect_df[~prospect_df['pos_y'].isna()]
prospect_df['pos_x'] = prospect_df['pos_y']
prospect_df = prospect_df.drop('pos_y', axis=1)
prospect_df = prospect_df.rename(columns={'pos_x' : 'pos'})

prospect_df[(prospect_df['pos']=='WR') & (prospect_df['season']==current_year)].head(10)

Unnamed: 0,season,cfb_id,player_name,pos,school,ht,wt,forty,bench,vertical,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_final_season,TotalPPA Pass_final_season,TotalPPA Rush_final_season,TotalPPA FirstDown_final_season,TotalPPA SecondDown_final_season,TotalPPA ThirdDown_final_season,TotalPPA StandardDowns_final_season,TotalPPA PassingDowns_final_season
3096,2024,javon-baker-1,Javon Baker,WR,Central Florida,73,202.0,4.54,,37.0,...,,,,,,,,,,
3118,2024,jermaine-burton-1,Jermaine Burton,WR,Alabama,72,196.0,4.45,,38.5,...,0.751,1.702,65.277,64.33,0.946,12.889,23.399,30.616,21.031,44.245
3128,2024,,Jalen Coker,WR,Holy Cross,73,208.0,4.57,,42.5,...,,,,,,,,,,
3131,2024,keon-coleman-1,Keon Coleman,WR,Florida St.,75,213.0,4.61,,38.0,...,0.466,0.381,35.441,36.443,-1.002,19.697,10.707,8.681,25.156,10.284
3135,2024,malachi-corley-1,Malachi Corley,WR,Western Kentucky,71,215.0,,,,...,0.778,0.897,78.881,79.797,-0.916,19.051,30.947,23.858,47.484,31.397
3137,2024,jacob-cowing-1,Jacob Cowing,WR,Arizona,68,168.0,4.38,,36.0,...,0.788,0.725,55.754,55.181,0.574,15.293,3.769,33.776,35.46,20.294
3167,2024,,Ryan Flournoy,WR,Southeast Missouri St.,73,202.0,4.44,19.0,39.5,...,,,,,,,,,,
3170,2024,troy-franklin-2,Troy Franklin,WR,Oregon,74,176.0,4.41,,39.0,...,0.964,1.842,119.7,119.7,0.0,30.577,50.318,37.211,60.75,58.95
3179,2024,anthony-gould-1,Anthony Gould,WR,Oregon St.,68,174.0,4.39,,39.5,...,0.631,1.559,51.361,52.464,-1.103,14.938,16.841,17.675,20.185,31.176
3184,2024,lideatrick-griffin-1,Lideatrick Griffin,WR,Mississippi St.,70,181.0,4.43,,35.5,...,0.274,1.023,41.812,42.276,-0.464,3.816,25.249,12.653,13.171,28.641


In [103]:
#Finding median for combine drills to fill omitted drills

combine_drills = ['forty','bench','vertical','broad_jump','cone','shuttle']

combine_median_df = prospect_df.groupby('pos')[combine_drills].median()
#combine_median_df = combine_median_df.reset_index()
combine_median_df

Unnamed: 0_level_0,forty,bench,vertical,broad_jump,cone,shuttle
pos,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
C,5.225,25.0,28.75,105.0,7.63,4.655
CB,4.48,14.0,36.0,124.0,6.95,4.195
DI,5.07,26.0,29.0,106.0,7.69,4.68
ED,4.73,23.0,33.0,118.0,7.2,4.38
FB,4.75,23.5,33.0,115.0,7.235,4.34
G,5.24,26.0,27.5,104.0,7.84,4.8
LB,4.66,20.0,33.5,119.0,7.13,4.33
QB,4.8,16.0,31.0,114.0,7.13,4.36
RB,4.56,19.0,34.0,120.0,7.08,4.31
S,4.55,17.0,35.5,122.0,7.0,4.24


In [104]:
#Adding median to drill if player omitted the drill

test_df = prospect_df.merge(combine_median_df, how='left', on='pos', suffixes=['','_y'])

for drill in combine_drills:
    test_df[drill].fillna(test_df[drill + '_y'], inplace=True)
    
test_df[['season', 'player_name', 'pos', 'school', 'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle', 
         'forty_y', 'bench_y', 'vertical_y', 'broad_jump_y', 'cone_y', 'shuttle_y']]

Unnamed: 0,season,player_name,pos,school,forty,bench,vertical,broad_jump,cone,shuttle,forty_y,bench_y,vertical_y,broad_jump_y,cone_y,shuttle_y
0,2015,Ameer Abdullah,RB,Nebraska,4.60,24.0,42.5,130.0,6.79,3.95,4.560,19.0,34.0,120.0,7.08,4.31
1,2015,Nelson Agholor,WR,USC,4.42,12.0,35.5,123.0,7.00,4.28,4.505,14.0,35.5,123.0,7.00,4.28
2,2015,Jay Ajayi,RB,Boise State,4.57,19.0,39.0,121.0,7.10,4.10,4.560,19.0,34.0,120.0,7.08,4.31
3,2015,Kwon Alexander,ED,LSU,4.55,24.0,36.0,121.0,7.14,4.20,4.730,23.0,33.0,118.0,7.20,4.38
4,2015,Mario Alford,WR,West Virginia,4.43,13.0,34.0,121.0,6.64,4.07,4.505,14.0,35.5,123.0,7.00,4.28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3194,2024,Roman Wilson,WR,Michigan,4.39,12.0,35.5,123.0,7.00,4.28,4.505,14.0,35.5,123.0,7.00,4.28
3195,2024,Mekhi Wingo,DI,LSU,4.85,25.0,31.5,109.0,7.69,4.68,5.070,26.0,29.0,106.0,7.69,4.68
3196,2024,Xavier Worthy,WR,Texas,4.21,14.0,41.0,131.0,7.00,4.28,4.505,14.0,35.5,123.0,7.00,4.28
3197,2024,Jaylen Wright,RB,Tennessee,4.38,19.0,38.0,134.0,7.08,4.31,4.560,19.0,34.0,120.0,7.08,4.31


In [105]:
#Adding positional averages if NaN in a combine drill

prospect_df = prospect_df.merge(combine_median_df, how='left', on='pos', suffixes=['','_y'])

for drill in combine_drills:
    prospect_df[drill].fillna(prospect_df[drill + '_y'], inplace=True)
    
for drill in combine_drills:
    prospect_df = prospect_df.drop(drill+'_y', axis=1)
    
prospect_df

Unnamed: 0,season,cfb_id,player_name,pos,school,ht,wt,forty,bench,vertical,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_final_season,TotalPPA Pass_final_season,TotalPPA Rush_final_season,TotalPPA FirstDown_final_season,TotalPPA SecondDown_final_season,TotalPPA ThirdDown_final_season,TotalPPA StandardDowns_final_season,TotalPPA PassingDowns_final_season
0,2015,ameer-abdullah-1,Ameer Abdullah,RB,Nebraska,69,205.0,4.60,24.0,42.5,...,0.226,0.527,83.942,17.301,66.641,39.322,19.745,24.964,52.844,31.098
1,2015,nelson-agholor-1,Nelson Agholor,WR,USC,72,198.0,4.42,12.0,35.5,...,,,,,,,,,,
2,2015,jay-ajayi-1,Jay Ajayi,RB,Boise State,72,221.0,4.57,19.0,39.0,...,0.316,0.253,120.971,37.805,83.166,51.364,28.396,40.262,102.981,17.990
3,2015,kwon-alexander-1,Kwon Alexander,ED,LSU,73,227.0,4.55,24.0,36.0,...,,,,,,,,,,
4,2015,mario-alford-1,Mario Alford,WR,West Virginia,68,180.0,4.43,13.0,34.0,...,0.376,0.593,52.070,52.866,-0.796,13.567,15.007,21.390,24.784,27.286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3194,2024,roman-wilson-1,Roman Wilson,WR,Michigan,71,185.0,4.39,12.0,35.5,...,1.277,1.707,77.975,77.570,0.406,26.932,17.982,32.313,47.257,30.718
3195,2024,mekhi-wingo-1,Mekhi Wingo,DI,LSU,72,284.0,4.85,25.0,31.5,...,,,,,,,,,,
3196,2024,xavier-worthy-1,Xavier Worthy,WR,Texas,71,165.0,4.21,14.0,41.0,...,0.539,0.797,54.652,54.277,0.375,27.559,27.743,1.895,32.344,22.308
3197,2024,jaylen-wright-1,Jaylen Wright,RB,Tennessee,71,210.0,4.38,19.0,38.0,...,0.195,0.481,40.407,8.046,32.361,12.984,12.669,18.541,24.529,15.878


In [106]:
#replacing invalid values with 0
prospect_df = prospect_df.replace([np.inf, -np.inf], np.nan)
prospect_df = prospect_df.fillna(0)

var_stats = list(prospect_df.select_dtypes(include=['float64']).columns)
prospect_df = prospect_df.reset_index()

#Zach Wilson got duplicated and removing the duplicated line
prospect_df = prospect_df[prospect_df['index'] != 2251]
    
#prospect_df[(prospect_df['pos']=='QB') & (prospect_df['season']==current_year)]

In [107]:
#Creating a list of positions

pos_list = list(prospect_df['pos'].unique())

draft_proj_stats = var_stats
draft_proj_stats.remove('draft_ovr')

# Projecting Rookie Draft Spots

In [108]:
#Creating Draft Perdictions

temp_df = pd.DataFrame()
draft_proj_df = pd.DataFrame()

for pos in pos_list:
    temp_df = prospect_df[prospect_df['pos'] == pos]
    
    X = temp_df[draft_proj_stats]
    Y = temp_df['draft_ovr']

    reg_model = LinearRegression()
    reg_model.fit(X,Y)
    y_pred_regression = reg_model.predict(X)
    temp_df['y_pred_regression'] = y_pred_regression
    
    rand_model = RandomForestRegressor(n_estimators=(temp_df.shape[0]), random_state=42)
    rand_model.fit(X,Y)
    y_pred_rand = rand_model.predict(X)
    temp_df['y_pred_random_forrest'] = y_pred_rand

    svr_model = SVR(kernel='rbf', C=1.0)
    svr_model.fit(X, Y)
    svr_predictions = svr_model.predict(X)
    temp_df['y_pred_svr'] = svr_predictions

    gbr_model = GradientBoostingRegressor(n_estimators=(temp_df.shape[0]), learning_rate=0.1)
    gbr_model.fit(X, Y)
    gbr_predictions = gbr_model.predict(X)
    temp_df['y_pred_gbf'] = gbr_predictions

    draft_proj_df = pd.concat([draft_proj_df, temp_df])

In [109]:
print_df = draft_proj_df.merge(prospect_df[['cfb_id', 'draft_ovr']], on='cfb_id', suffixes=['','_y'])
print_df = print_df[['season','player_name','pos','school','draft_ovr','y_pred_regression','y_pred_random_forrest','y_pred_svr','y_pred_gbf']]
print_df[(print_df['season']==2024) & (print_df['draft_ovr']<=32)].sort_values(by='draft_ovr')

Unnamed: 0,season,player_name,pos,school,draft_ovr,y_pred_regression,y_pred_random_forrest,y_pred_svr,y_pred_gbf
70102,2024,Caleb Williams,QB,USC,1.0,1.0,3.491018,164.170632,0.965843
70089,2024,Jayden Daniels,QB,LSU,2.0,2.0,6.730539,164.177508,1.929287
70093,2024,Drake Maye,QB,North Carolina,3.0,3.0,4.431138,164.170841,3.633064
68880,2024,Marvin Harrison,WR,Ohio St.,4.0,13.565898,7.327273,204.860836,11.350211
70538,2024,Joe Alt,T,Notre Dame,5.0,7.993164,8.350649,141.190574,4.859694
68889,2024,Malik Nabers,WR,LSU,6.0,4.917945,10.214141,205.121461,5.983482
70560,2024,JC Latham,T,Alabama,7.0,7.993742,9.152597,141.202421,7.943654
70091,2024,Michael Penix Jr.,QB,Washington,8.0,8.0,9.359281,164.292969,7.916309
68890,2024,Rome Odunze,WR,Washington,9.0,14.400914,12.319192,204.860836,10.76205
70094,2024,J.J. McCarthy,QB,Michigan,10.0,10.0,9.065868,164.166936,10.107876


# Creating Rookie Projections

In [110]:
#Pulling rookie stats

rookie_stats_df = nfl.import_seasonal_data(years[:-1])

rookie_stats_df = rookie_stats_df.sort_values(by='season')
rookie_stats_df = rookie_stats_df.groupby(by='player_id').first()
rookie_stats_df = rookie_stats_df.reset_index()

rookie_stats_df

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,yac_sh,wopr_y,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,dom,w8dom,yptmpa,ppr_sh
0,00-0007091,2015,REG,156,256,1690.0,9,5.0,16.0,101.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.159657
1,00-0010346,2015,REG,198,331,2249.0,9,17.0,16.0,95.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.113044
2,00-0018227,2015,REG,0,0,0.0,0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,-0.003743
3,00-0019596,2015,REG,402,624,4770.0,36,7.0,38.0,225.0,...,0.011866,0.003519,0.007481,0.0,0.004367,0.003774,0.003741,0.005985,0.057234,0.210173
4,00-0020245,2015,REG,40,66,371.0,2,1.0,10.0,53.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.074269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1823,00-0039150,2023,REG,315,527,2877.0,11,10.0,62.0,477.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.154486
1824,00-0039152,2023,REG,149,255,1808.0,8,4.0,28.0,185.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.159821
1825,00-0039163,2023,REG,319,499,4108.0,23,5.0,38.0,331.0,...,0.000562,0.002783,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.206779
1826,00-0039164,2023,REG,50,84,577.0,3,1.0,7.0,29.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.213689


In [111]:
#Creating ids for players

combine_df  = nfl.import_combine_data(years)
combine_df[['season', 'draft_team', 'pfr_id', 'cfb_id', 'player_name']]

id_df = nfl.import_ids()
id_df = id_df[~id_df['pfr_id'].isna()]
id_df[['gsis_id', 'pff_id', 'pfr_id', 'cfbref_id', 'name', 'merge_name', 'position']]

Unnamed: 0,gsis_id,pff_id,pfr_id,cfbref_id,name,merge_name,position
265,00-0038400,,McKeTa01,tanner-mckee-1,Tanner McKee,tanner mckee,QB
266,00-0039150,,YounBr01,bryce-young-1,Bryce Young,bryce young,QB
267,00-0039152,,LeviWi00,will-levis-1,Will Levis,will levis,QB
268,00-0039163,,StroCJ00,cj-stroud-1,C.J. Stroud,cj stroud,QB
269,00-0038550,,HookHe00,hendon-hooker-1,Hendon Hooker,hendon hooker,QB
...,...,...,...,...,...,...,...
10171,,,andermor01,morten-andersen-1,Morten Andersen,morten andersen,PK
10172,,,andergar02,gary-anderson-3,Gary Anderson,gary anderson,PK
10174,,,HoraMi20,mike-horan-1,Mike Horan,mike horan,PN
10183,,,WilsWa00,,Wade Wilson,wade wilson,QB


In [112]:
#Merging IDs
rookie_stats_df = rookie_stats_df.merge(id_df[['gsis_id', 'pff_id', 'pfr_id', 'cfbref_id', 'name', 'merge_name']], left_on= 'player_id',right_on='gsis_id')

In [113]:
#selecting columns for rookie stats table

column_selection = ['player_id', 'name', 'season', 'games', 'season_type', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions']

rookie_stats_df = rookie_stats_df.groupby(['player_id', 'name']).first()
rookie_stats_df = rookie_stats_df.reset_index()

rookie_stats_df.columns

Index(['player_id', 'name', 'season', 'season_type', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr_x', 'special_teams_tds', 'fantasy_points', 'fantasy_points_ppr',
       'games', 'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ry_sh', 'rtd_sh',
       'rfd_sh', 'rtdfd_sh', 

In [114]:
#more merging

rookie_proj_df = prospect_df.merge(rookie_stats_df, left_on = 'cfb_id', right_on='cfbref_id', how='outer', suffixes=['','_y'])
rookie_proj_df = rookie_proj_df.dropna(subset='player_name').fillna(0)

rookie_proj_df

Unnamed: 0,index,season,cfb_id,player_name,pos,school,ht,wt,forty,bench,...,rtdfd_sh,dom,w8dom,yptmpa,ppr_sh,gsis_id,pff_id_y,pfr_id,cfbref_id,merge_name
0,0.0,2015.0,ameer-abdullah-1,Ameer Abdullah,RB,Nebraska,69.0,205.0,4.60,24.0,...,0.044610,0.035653,0.038864,0.289557,0.077570,00-0032104,9487.0,AbduAm00,ameer-abdullah-1,ameer abdullah
1,1.0,2015.0,nelson-agholor-1,Nelson Agholor,WR,USC,72.0,198.0,4.42,12.0,...,0.065934,0.063546,0.073101,0.562624,0.046309,00-0031549,9453.0,AghoNe00,nelson-agholor-1,nelson agholor
2,2.0,2015.0,jay-ajayi-1,Jay Ajayi,RB,Boise State,72.0,221.0,4.57,19.0,...,0.043103,0.019659,0.031455,0.277778,0.057933,00-0031590,9582.0,AjayJa00,jay-ajayi-1,jay ajayi
3,3.0,2015.0,kwon-alexander-1,Kwon Alexander,ED,LSU,73.0,227.0,4.55,24.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.0,0,0,0
4,4.0,2015.0,mario-alford-1,Mario Alford,WR,West Virginia,68.0,180.0,4.43,13.0,...,0.090909,0.031381,0.050209,0.681818,0.026080,00-0031962,9671.0,AlfoMa00,mario-alford-1,mario alford
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3193,3194.0,2024.0,roman-wilson-1,Roman Wilson,WR,Michigan,71.0,185.0,4.39,12.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.0,0,0,0
3194,3195.0,2024.0,mekhi-wingo-1,Mekhi Wingo,DI,LSU,72.0,284.0,4.85,25.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.0,0,0,0
3195,3196.0,2024.0,xavier-worthy-1,Xavier Worthy,WR,Texas,71.0,165.0,4.21,14.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.0,0,0,0
3196,3197.0,2024.0,jaylen-wright-1,Jaylen Wright,RB,Tennessee,71.0,210.0,4.38,19.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.0,0,0,0


In [115]:
#stats to perdict in a list

stats_to_pred = ['completions','attempts','passing_yards','passing_tds','interceptions','sacks','sack_yards','sack_fumbles','sack_fumbles_lost',
'passing_2pt_conversions','carries','rushing_yards','rushing_tds','rushing_fumbles','rushing_fumbles_lost',
'rushing_first_downs','rushing_epa','rushing_2pt_conversions','receptions','targets_y','receiving_yards','receiving_tds',
'receiving_fumbles','receiving_fumbles_lost','receiving_air_yards','receiving_first_downs',
'receiving_epa','receiving_2pt_conversions','target_share','games','tgt_sh','ay_sh','ry_sh','rtd_sh','rfd_sh',
'dom','yptmpa']

In [116]:
#the X values in a list for the analysis
rookie_df_x = ['ht', 'wt', 'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle', 'draft_ovr', 'draft_round', 
               'Season_career', 'player_game_count_career', 'aimed_passes_career', 'attempts_passing_career', 
               'avg_depth_of_target_passing_career', 'avg_time_to_throw_career', 'bats_career', 'big_time_throws_career',
               'completions_career', 'declined_penalties_career', 'def_gen_pressures_career', 'dropbacks_career',
               'drops_passing_career', 'first_downs_passing_career', 'grades_hands_fumble_career', 'grades_offense_career',
               'grades_pass_career', 'hit_as_threw_career', 'interceptions_passing_career', 'passing_snaps_career',
               'penalties_career', 'pressure_to_sack_rate_career', 'qb_rating_career', 'sacks_career', 'scrambles_career',
               'spikes_career', 'thrown_aways_career', 'touchdowns_passing_career', 'turnover_worthy_plays_career',
               'yards_passing_career', 'attempts_rushing_career', 'avoided_tackles_career', 'breakaway_attempts_career',
               'breakaway_yards_career', 'designed_yards_career', 'elu_recv_mtf_career', 'elu_rush_mtf_career',
               'elu_yco_career', 'elusive_rating_career', 'explosive_career', 'first_downs_rushing_career', 'fumbles_career',
               'gap_attempts_career', 'grades_run_career', 'grades_offense_penalty_career', 'grades_run_block_career',
               'longest_rush_career', 'run_plays_career', 'scramble_yards_career', 'total_touches_career',
               'touchdowns_rushing_career', 'yards_rushing_career', 'yards_after_contact_career', 'yco_attempt_career',
               'ypa_career', 'zone_attempts_career', 'avg_depth_of_target_receiving_career', 'contested_receptions_career',
               'contested_targets_career', 'drops_receiving_career', 'first_downs_receiving_career', 'grades_hands_drop_career',
               'grades_pass_route_career', 'inline_rate_career', 'inline_snaps_career', 'interceptions_receiving_career',
               'longest_career', 'pass_plays_career', 'receptions_career', 'route_rate_career', 'routes_career',
               'slot_rate_career', 'slot_snaps_career', 'targeted_qb_rating_career', 'targets_receiving_career',
               'touchdowns_receiving_career', 'wide_rate_career', 'wide_snaps_career', 'yards_receiving_career',
               'yards_after_catch_career', 'grades_pass_block_career', 'hits_allowed_career', 'hurries_allowed_career',
               'snap_counts_offense_career', 'non_spike_pass_block_career', 'snap_counts_pass_block_career', 
               'pressures_allowed_career', 'sacks_allowed_career', 'snap_counts_block_career', 'snap_counts_run_block_career',
               'snap_counts_lt_career', 'snap_counts_lg_career', 'snap_counts_ce_career', 'snap_counts_rg_career',
               'snap_counts_rt_career', 'snap_counts_te_career', 'assists_career', 'batted_passes_career',
               'forced_fumbles_career', 'fumble_recoveries_defensive_career', 'fumble_recovery_touchdowns_career',
               'grades_coverage_defense_career', 'grades_defense_career', 'grades_defense_penalty_career',
               'grades_pass_rush_defense_career', 'grades_run_defense_career', 'grades_tackle_career', 'hits_career',
               'hurries_career', 'interception_touchdowns_career', 'interceptions_defensive_career',
               'missed_tackle_rate_career', 'missed_tackles_career', 'pass_break_ups_career', 'receptions_allowed_career',
               'sacks_defensive_career', 'safeties_career', 'stops_career', 'tackles_career', 'tackles_for_loss_career',
               'targets_allowed_career', 'total_pressures_career', 'coverage_touchdowns_allowed_career', 
               'coverage_yards_allowed_career', 'offensive/defensive_grade_career', 'yards_passing/att_career', 
               'completion_pct_career', 'adj_completion_pct_career', 'touchdown_pct_career', 'interception_pct_career',
               'dangerous_play_pct_career', 'money_throw_pct_career', '1st_downs/pass_att_career', 'breakaway_runs/att_career',
               'fumbles/att_career', 'touchdowns_rushing/att_career', 'yards_rushing/att_career', 'catch_pct_career',
               'contested_catch_pct_career', 'touchdowns_receiving/target_career', 'interceptions_receiving/target_career',
               'targets/route_ran_career', '1st_downs/route_ran_career', 'drops/target_career', 'yards/reception_career',
               'yards/target_career', 'yards/route_ran_career', 'yards_after_catch/reception_career', 
               'yards_after_catch/target_career', 'yards_after_catch/route_ran_career', 'wide_snaps_routes_pct_career',
               'slot_snaps_routes_pct_career', 'pass_blocking_efficency', 'blocking_pct_per play', 
               'avoided_tackles/touches_career', 'fumbles/touches_career', 'allowed_catch_pct', 'yards_per_catch_allowed',
               'CountablePlays_career', 'TotalPPA All_career', 'TotalPPA Pass_career', 'TotalPPA Rush_career',
               'TotalPPA FirstDown_career', 'TotalPPA SecondDown_career', 'TotalPPA ThirdDown_career',
               'TotalPPA StandardDowns_career', 'TotalPPA PassingDowns_career', 'power_5_team', 'Total_EPA_avg',
               'EPA_Pass_avg', 'EPA_Rush_avg', 'EPA_1st_down_avg', 'EPA_2nd_down_avg', 'EPA_3rd_down_avg',
               'EPA_StandardDowns_avg', 'EPA_PassingDowns_avg', 'player_game_count_final_season', 'accuracy_percent',
               'aimed_passes_final_season', 'attempts_passing_final_season', 'avg_depth_of_target_passing_final_season',
               'avg_time_to_throw_final_season', 'bats_final_season', 'big_time_throws_final_season', 'btt_rate',
               'completion_percent', 'completions_final_season', 'declined_penalties_final_season',
               'def_gen_pressures_final_season', 'drop_rate', 'dropbacks_final_season', 'drops_passing_final_season',
               'first_downs_passing_final_season', 'grades_hands_fumble_final_season', 'grades_offense_final_season',
               'grades_pass_final_season', 'grades_run_final_season', 'hit_as_threw_final_season',
               'interceptions_passing_final_season', 'passing_snaps_final_season', 'penalties_final_season',
               'pressure_to_sack_rate_final_season', 'qb_rating_final_season', 'sack_percent', 'sacks_final_season',
               'scrambles_final_season', 'spikes_final_season', 'thrown_aways_final_season', 'touchdowns_passing_final_season',
               'turnover_worthy_plays_final_season', 'twp_rate', 'yards_passing_final_season', 'ypa_final_season',
               'attempts_rushing_final_season', 'avoided_tackles_final_season', 'breakaway_attempts_final_season',
               'breakaway_percent', 'breakaway_yards_final_season', 'designed_yards_final_season', 'drops',
               'elu_recv_mtf_final_season', 'elu_rush_mtf_final_season', 'elu_yco_final_season', 'elusive_rating_final_season',
               'explosive_final_season', 'first_downs_rushing_final_season', 'fumbles_final_season',
               'gap_attempts_final_season', 'grades_offense_penalty_final_season', 'grades_pass_block_final_season',
               'grades_pass_route_final_season', 'grades_run_block_final_season', 'longest_rush_final_season', 'rec_yards',
               'receptions_final_season', 'routes_final_season', 'run_plays_final_season', 'scramble_yards_final_season',
               'targets', 'total_touches_final_season', 'touchdowns_rushing_final_season', 'yards_rushing_final_season',
               'yards_after_contact_final_season', 'yco_attempt_final_season', 'yprr', 'zone_attempts_final_season',
               'avg_depth_of_target_receiving_final_season', 'caught_percent', 'contested_catch_rate',
               'contested_receptions_final_season', 'contested_targets_final_season', 'drops_receiving_final_season',
               'first_downs_receiving_final_season', 'grades_hands_drop_final_season', 'inline_rate_final_season',
               'inline_snaps_final_season', 'interceptions_receiving_final_season', 'longest_final_season',
               'pass_block_rate', 'pass_blocks', 'pass_plays_final_season', 'route_rate_final_season',
               'slot_rate_final_season', 'slot_snaps_final_season', 'targeted_qb_rating_final_season',
               'targets_receiving_final_season', 'touchdowns_receiving_final_season', 'wide_rate_final_season',
               'wide_snaps_final_season', 'yards_receiving_final_season', 'yards_after_catch_final_season',
               'yards_after_catch_per_reception', 'yards_per_reception', 'block_percent', 'hits_allowed_final_season',
               'hurries_allowed_final_season', 'non_spike_pass_block_final_season', 'non_spike_pass_block_percentage',
               'pass_block_percent', 'pbe', 'pressures_allowed_final_season', 'sacks_allowed_final_season',
               'snap_counts_block_final_season', 'snap_counts_ce_final_season', 'snap_counts_lg_final_season',
               'snap_counts_lt_final_season', 'snap_counts_offense_final_season', 'snap_counts_pass_block_final_season',
               'snap_counts_pass_play', 'snap_counts_rg_final_season', 'snap_counts_rt_final_season',
               'snap_counts_run_block_final_season', 'snap_counts_te_final_season', 'assists_final_season',
               'batted_passes_final_season', 'catch_rate', 'forced_fumbles_final_season',
               'fumble_recoveries_defensive_final_season', 'fumble_recovery_touchdowns_final_season', 
               'grades_coverage_defense_final_season', 'grades_defense_final_season', 'grades_defense_penalty_final_season',
               'grades_pass_rush_defense_final_season', 'grades_run_defense_final_season', 'grades_tackle_final_season',
               'hits_final_season', 'hurries_final_season', 'interception_touchdowns_final_season',
               'interceptions_defensive_final_season', 'missed_tackle_rate_final_season', 'missed_tackles_final_season',
               'pass_break_ups_final_season', 'qb_rating_against', 'receptions_allowed_final_season',
               'sacks_defensive_final_season', 'safeties_final_season', 'snap_counts_box', 'snap_counts_corner',
               'snap_counts_coverage', 'snap_counts_defense', 'snap_counts_dl', 'snap_counts_dl_a_gap', 'snap_counts_dl_b_gap',
               'snap_counts_dl_outside_t', 'snap_counts_dl_over_t', 'snap_counts_fs', 'snap_counts_offball',
               'snap_counts_pass_rush', 'snap_counts_run_defense', 'snap_counts_slot', 'stops_final_season',
               'tackles_final_season', 'tackles_for_loss_final_season', 'targets_allowed_final_season',
               'total_pressures_final_season', 'coverage_touchdowns_allowed_final_season',
               'coverage_yards_allowed_final_season', 'offensive/defensive_grade_final_season',
               'player_id_epa_final_season', 'CountablePlays_final_season', 'AveragePPA All', 'AveragePPA Pass',
               'AveragePPA Rush', 'AveragePPA FirstDown', 'AveragePPA SecondDown', 'AveragePPA ThirdDown',
               'AveragePPA StandardDowns', 'AveragePPA PassingDowns', 'TotalPPA All_final_season', 'TotalPPA Pass_final_season',
               'TotalPPA Rush_final_season', 'TotalPPA FirstDown_final_season', 'TotalPPA SecondDown_final_season',
               'TotalPPA ThirdDown_final_season', 'TotalPPA StandardDowns_final_season', 'TotalPPA PassingDowns_final_season']

In [117]:
#Creating projections using GDR

df_export = pd.DataFrame() 

for pos in pos_list:
    temp_df = rookie_proj_df[(rookie_proj_df['pos'] == pos) & (rookie_proj_df['season'] != current_year)]
    temp_rookie_df = rookie_proj_df[rookie_proj_df['pos'] == pos]
    for stat in stats_to_pred:
        X = temp_df[rookie_df_x]
        Y = temp_df[stat]

        gbr_model = GradientBoostingRegressor(n_estimators=(temp_df.shape[0]), learning_rate=0.1)
        gbr_model.fit(X, Y)
        gbr_predictions = gbr_model.predict(temp_rookie_df[rookie_df_x])
        temp_rookie_df[stat + '_y_pred_gbf'] = gbr_predictions
        
    #temp_df = temp_df[temp_df['season'] == current_year]
    temp_rookie_df[temp_rookie_df['season'] == current_year]
    df_export = pd.concat([df_export, temp_rookie_df])

In [118]:
#Unfiltered CSV export if you are wanting unfiltered data

#df_export.to_csv('unfiltered_rookie_proj.csv')

In [119]:
#Creating columns list to create a cleaner df to extract
columns = [col for col in df_export.columns if '_y_pred_gbf' in col]

print(columns)

['completions_y_pred_gbf', 'attempts_y_pred_gbf', 'passing_yards_y_pred_gbf', 'passing_tds_y_pred_gbf', 'interceptions_y_pred_gbf', 'sacks_y_pred_gbf', 'sack_yards_y_pred_gbf', 'sack_fumbles_y_pred_gbf', 'sack_fumbles_lost_y_pred_gbf', 'passing_2pt_conversions_y_pred_gbf', 'carries_y_pred_gbf', 'rushing_yards_y_pred_gbf', 'rushing_tds_y_pred_gbf', 'rushing_fumbles_y_pred_gbf', 'rushing_fumbles_lost_y_pred_gbf', 'rushing_first_downs_y_pred_gbf', 'rushing_epa_y_pred_gbf', 'rushing_2pt_conversions_y_pred_gbf', 'receptions_y_pred_gbf', 'targets_y_y_pred_gbf', 'receiving_yards_y_pred_gbf', 'receiving_tds_y_pred_gbf', 'receiving_fumbles_y_pred_gbf', 'receiving_fumbles_lost_y_pred_gbf', 'receiving_air_yards_y_pred_gbf', 'receiving_first_downs_y_pred_gbf', 'receiving_epa_y_pred_gbf', 'receiving_2pt_conversions_y_pred_gbf', 'target_share_y_pred_gbf', 'games_y_pred_gbf', 'tgt_sh_y_pred_gbf', 'ay_sh_y_pred_gbf', 'ry_sh_y_pred_gbf', 'rtd_sh_y_pred_gbf', 'rfd_sh_y_pred_gbf', 'dom_y_pred_gbf', 'yptm

In [120]:
ff_pts = {'passing_yards_y_pred_gbf':.04, 'passing_tds_y_pred_gbf':4, 'interceptions_y_pred_gbf':-2, 'sack_fumbles_y_pred_gbf':-1,
         'sack_fumbles_lost_y_pred_gbf':-1, 'passing_2pt_conversions_y_pred_gbf':2, 'rushing_yards_y_pred_gbf':.1, 
         'rushing_tds_y_pred_gbf':6, 'rushing_fumbles_y_pred_gbf':-1, 'rushing_fumbles_lost_y_pred_gbf':-1, 
         'rushing_first_downs_y_pred_gbf': .5,'rushing_2pt_conversions_y_pred_gbf':2, 'receptions_y_pred_gbf':.5,
         'receiving_yards_y_pred_gbf': .1, 'receiving_tds_y_pred_gbf':6, 'receiving_fumbles_y_pred_gbf':-1, 
         'receiving_fumbles_lost_y_pred_gbf':-1, 'receiving_first_downs_y_pred_gbf': .5,'receiving_2pt_conversions_y_pred_gbf':2}

In [121]:
#Cleaning df

columns = ['index', 'season', 'cfb_id', 'player_name', 'pos', 'school', 'completions_y_pred_gbf', 'attempts_y_pred_gbf', 'passing_yards_y_pred_gbf', 'passing_tds_y_pred_gbf', 'interceptions_y_pred_gbf', 'sacks_y_pred_gbf', 'sack_yards_y_pred_gbf', 'sack_fumbles_y_pred_gbf', 'sack_fumbles_lost_y_pred_gbf', 'passing_2pt_conversions_y_pred_gbf', 'carries_y_pred_gbf', 'rushing_yards_y_pred_gbf', 'rushing_tds_y_pred_gbf', 'rushing_fumbles_y_pred_gbf', 'rushing_fumbles_lost_y_pred_gbf', 'rushing_first_downs_y_pred_gbf', 'rushing_epa_y_pred_gbf', 'rushing_2pt_conversions_y_pred_gbf', 'receptions_y_pred_gbf', 'targets_y_y_pred_gbf', 'receiving_yards_y_pred_gbf', 'receiving_tds_y_pred_gbf', 'receiving_fumbles_y_pred_gbf', 'receiving_fumbles_lost_y_pred_gbf', 'receiving_air_yards_y_pred_gbf', 'receiving_first_downs_y_pred_gbf', 'receiving_epa_y_pred_gbf', 'receiving_2pt_conversions_y_pred_gbf', 'games_y_pred_gbf', 'tgt_sh_y_pred_gbf', 'ry_sh_y_pred_gbf', 'rtd_sh_y_pred_gbf', 'rfd_sh_y_pred_gbf', 'dom_y_pred_gbf']

df_export = df_export[columns]
df_export = df_export.round(decimals=2)

df_export['scrimage_yards_pred'] = df_export['rushing_yards_y_pred_gbf'] + df_export['receiving_yards_y_pred_gbf']
df_export['touch_pred'] = df_export['carries_y_pred_gbf'] + df_export['receptions_y_pred_gbf']

df_export['proj_ff_pts'] = df_export[ff_pts.keys()].mul(list(ff_pts.values()), axis=1).sum(axis=1)
df_export['proj_ff_pts_per_game'] = df_export['proj_ff_pts']/df_export['games_y_pred_gbf']

df_export = df_export.replace([np.inf, -np.inf], np.nan)
df_export = df_export.fillna(0)

Unnamed: 0,index,season,cfb_id,player_name,pos,school,completions_y_pred_gbf,attempts_y_pred_gbf,passing_yards_y_pred_gbf,passing_tds_y_pred_gbf,...,games_y_pred_gbf,tgt_sh_y_pred_gbf,ry_sh_y_pred_gbf,rtd_sh_y_pred_gbf,rfd_sh_y_pred_gbf,dom_y_pred_gbf,scrimage_yards_pred,touch_pred,proj_ff_pts,proj_ff_pts_per_game
3044,3032.0,2024.0,michael-penix-jr-1,Michael Penix Jr.,QB,Washington,159.26,273.79,2005.39,22.52,...,7.24,0.0,0.0,0.0,0.0,0.0,113.0,34.67,173.4106,23.95174
3098,3089.0,2024.0,bo-nix-1,Bo Nix,QB,Oregon,213.75,310.15,2565.75,22.39,...,10.95,0.0,0.0,0.0,0.0,0.0,237.87,63.17,211.697,19.333059
2997,2979.0,2024.0,isaac-guerendo-1,Isaac Guerendo,RB,Louisville,0.0,0.0,0.0,0.0,...,4.66,0.06,0.01,0.07,0.04,0.04,398.56,81.51,80.751,17.328541
3185,3186.0,2024.0,caleb-williams-3,Caleb Williams,QB,USC,289.7,473.36,3151.48,14.52,...,12.7,0.0,0.0,0.0,0.0,0.0,358.55,66.08,216.5642,17.052299
3072,3062.0,2024.0,jj-mccarthy-1,J.J. McCarthy,QB,Michigan,271.62,346.31,2562.43,18.06,...,11.4,-0.0,0.0,0.0,0.0,0.0,178.22,29.25,187.9592,16.487649
2966,2935.0,2024.0,jayden-daniels-1,Jayden Daniels,QB,LSU,289.1,413.43,2539.14,17.83,...,13.71,0.0,0.0,0.0,0.0,0.0,403.87,78.75,215.7076,15.733596
3014,3001.0,2024.0,markeise-irving-1,Bucky Irving,RB,Oregon,0.0,0.0,0.0,0.0,...,10.71,0.09,0.09,0.09,0.08,0.08,848.99,169.04,155.394,14.509244
3070,3060.0,2024.0,drake-maye-1,Drake Maye,QB,North Carolina,265.62,443.23,2765.14,14.61,...,14.0,0.0,0.0,0.0,0.0,0.0,313.51,53.45,198.0916,14.1494
3127,3122.0,2024.0,keilan-robinson-1,Keilan Robinson,RB,Texas,0.0,0.0,0.0,0.0,...,4.12,0.05,0.04,0.08,0.03,0.06,311.68,73.03,53.643,13.020146
3004,2989.0,2024.0,marvin-harrison-jr-1,Marvin Harrison,WR,Ohio St.,0.0,0.0,-0.0,0.0,...,14.69,0.25,0.29,0.26,0.29,0.27,880.01,68.53,181.901,12.382641


In [124]:
#used this cell to filter the df for curiosity

df_export[(df_export['season'] == 2024) & (df_export['games_y_pred_gbf'] >= 4)].sort_values(by='proj_ff_pts_per_game', ascending=False).head(10)

Unnamed: 0,index,season,cfb_id,player_name,pos,school,completions_y_pred_gbf,attempts_y_pred_gbf,passing_yards_y_pred_gbf,passing_tds_y_pred_gbf,...,games_y_pred_gbf,tgt_sh_y_pred_gbf,ry_sh_y_pred_gbf,rtd_sh_y_pred_gbf,rfd_sh_y_pred_gbf,dom_y_pred_gbf,scrimage_yards_pred,touch_pred,proj_ff_pts,proj_ff_pts_per_game
3044,3032.0,2024.0,michael-penix-jr-1,Michael Penix Jr.,QB,Washington,159.26,273.79,2005.39,22.52,...,7.24,0.0,0.0,0.0,0.0,0.0,113.0,34.67,173.4106,23.95174
3098,3089.0,2024.0,bo-nix-1,Bo Nix,QB,Oregon,213.75,310.15,2565.75,22.39,...,10.95,0.0,0.0,0.0,0.0,0.0,237.87,63.17,211.697,19.333059
2997,2979.0,2024.0,isaac-guerendo-1,Isaac Guerendo,RB,Louisville,0.0,0.0,0.0,0.0,...,4.66,0.06,0.01,0.07,0.04,0.04,398.56,81.51,80.751,17.328541
3185,3186.0,2024.0,caleb-williams-3,Caleb Williams,QB,USC,289.7,473.36,3151.48,14.52,...,12.7,0.0,0.0,0.0,0.0,0.0,358.55,66.08,216.5642,17.052299
3072,3062.0,2024.0,jj-mccarthy-1,J.J. McCarthy,QB,Michigan,271.62,346.31,2562.43,18.06,...,11.4,-0.0,0.0,0.0,0.0,0.0,178.22,29.25,187.9592,16.487649
2966,2935.0,2024.0,jayden-daniels-1,Jayden Daniels,QB,LSU,289.1,413.43,2539.14,17.83,...,13.71,0.0,0.0,0.0,0.0,0.0,403.87,78.75,215.7076,15.733596
3014,3001.0,2024.0,markeise-irving-1,Bucky Irving,RB,Oregon,0.0,0.0,0.0,0.0,...,10.71,0.09,0.09,0.09,0.08,0.08,848.99,169.04,155.394,14.509244
3070,3060.0,2024.0,drake-maye-1,Drake Maye,QB,North Carolina,265.62,443.23,2765.14,14.61,...,14.0,0.0,0.0,0.0,0.0,0.0,313.51,53.45,198.0916,14.1494
3127,3122.0,2024.0,keilan-robinson-1,Keilan Robinson,RB,Texas,0.0,0.0,0.0,0.0,...,4.12,0.05,0.04,0.08,0.03,0.06,311.68,73.03,53.643,13.020146
3004,2989.0,2024.0,marvin-harrison-jr-1,Marvin Harrison,WR,Ohio St.,0.0,0.0,-0.0,0.0,...,14.69,0.25,0.29,0.26,0.29,0.27,880.01,68.53,181.901,12.382641


In [123]:
#Exporting the rookie perdictions into CSV

#df_export.to_csv('filtered_rookie_proj.csv')