In [126]:
#Import Packages

import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import nfl_data_py as nfl
import time
import warnings
import cfbd
warnings.filterwarnings("ignore")

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [127]:
#Specifying Year Range
years = range(2014,2025)

In [128]:
#Pull in CSVs from Personal GitHubs
epa_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/combined_EPA.csv")
blocking_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/blocking_stats.csv")
passing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/passing_stats.csv")
receiving_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/receiving_stats.csv")
rushing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/rushing_stats.csv")
defensive_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/defensive_stats.csv")

power_5_team  = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/power_5_teams.csv")

# Creating Player Profiles

In [129]:
#Creating Passing DF
positions = ['ED', 'DI', 'CB', 'WR', 'LB', 'S', 'G', 'HB', 'TE', 'FB', 'T', 'C', 'QB']

filtered_passing_career_df = passing_df.fillna(0).sort_values(by='Season')

filtered_passing_career_df = filtered_passing_career_df.groupby(['player_id', 'player']).agg({'Season':'count', 'position':'last', 'player_game_count':'sum', 'team_name':'last',
        'player_game_count':'sum',  'aimed_passes':'sum', 'attempts':'sum', 'avg_depth_of_target':'mean', 
        'avg_time_to_throw':'mean', 'bats':'sum','big_time_throws':'sum', 'completions':'sum','declined_penalties':'sum', 
        'def_gen_pressures':'sum', 'dropbacks':'sum','drops':'sum', 'first_downs':'sum', 'grades_hands_fumble':'median',
        'grades_offense':'median', 'grades_pass':'median', 'hit_as_threw':'sum','interceptions':'sum', 
        'passing_snaps':'sum', 'penalties':'sum', 'pressure_to_sack_rate':'last','qb_rating':'last', 'sacks':'sum', 
        'scrambles':'sum', 'spikes':'sum','thrown_aways':'sum', 'touchdowns':'sum', 'turnover_worthy_plays':'sum','yards':'sum'})

filtered_passing_career_df = filtered_passing_career_df.reset_index()

#Creating Rushing DF

filtered_rushing_career_df = rushing_df.fillna(0).sort_values(by='Season')

filtered_rushing_career_df = filtered_rushing_career_df.groupby(['player_id', 'player']).agg({'Season':'count','position':'last', 'player_game_count':'sum', 'team_name':'last',
        'attempts':'sum','avoided_tackles':'sum', 'breakaway_attempts':'sum',
       'breakaway_yards':'sum', 'designed_yards':'sum', 'elu_recv_mtf':'last', 'elu_rush_mtf':'last',
       'elu_yco':'last', 'elusive_rating':'last', 'explosive':'sum', 'first_downs':'sum',
       'fumbles':'sum', 'gap_attempts':'sum', 'grades_run':'median', 'grades_offense_penalty':'median','grades_run_block':'median', 
       'longest':'max','run_plays':'sum', 'scramble_yards':'sum','total_touches':'sum', 'touchdowns':'sum', 'yards':'sum',
       'yards_after_contact':'sum', 'yco_attempt':'sum', 'ypa':'mean', 'zone_attempts':'sum'})

filtered_rushing_career_df = filtered_rushing_career_df.reset_index()

#Creating Receiving DF
filtered_receiving_career_df = receiving_df.fillna(0).sort_values(by='Season')

filtered_receiving_career_df = filtered_receiving_career_df.groupby(['player_id', 'player']).agg({'Season':'count', 'position':'last', 'player_game_count':'sum', 'team_name':'last',
        'avg_depth_of_target' : 'mean', 'avoided_tackles':'sum','contested_receptions':'sum',
       'contested_targets':'sum', 'declined_penalties':'sum', 'drops':'sum','first_downs':'sum', 'fumbles':'sum',
       'grades_hands_drop':'median','grades_hands_fumble':'median', 'grades_offense':'median','grades_pass_route':'median', 
       'inline_rate':'mean', 'inline_snaps':'sum', 'interceptions':'sum','longest':'max', 'pass_plays':'sum', 
       'receptions':'sum', 'route_rate' :'mean', 'routes':'sum', 'slot_rate':'mean', 'slot_snaps':'sum',
       'targeted_qb_rating':'mean', 'targets':'sum', 'touchdowns':'sum', 'wide_rate':'mean','wide_snaps':'sum',
       'yards':'sum', 'yards_after_catch':'sum'})

filtered_receiving_career_df = filtered_receiving_career_df.reset_index()

#Creating Blocking DF
filtered_blocking_career_df = blocking_df.fillna(0).sort_values(by='Season')

filtered_blocking_career_df = filtered_blocking_career_df.groupby(['player_id', 'player']).agg({'Season':'count','position':'last', 'player_game_count':'sum', 'team_name':'last',
        'declined_penalties':'sum', 'grades_offense':'median', 'grades_pass_block':'median',
       'grades_run_block':'median', 'hits_allowed':'sum', 'hurries_allowed':'sum','snap_counts_offense':'sum',
        'non_spike_pass_block':'sum', 'snap_counts_pass_block':'sum', 'penalties':'sum', 'pressures_allowed':'sum',
       'sacks_allowed':'sum', 'snap_counts_block':'sum', 'snap_counts_run_block':'sum', 'snap_counts_lt':'sum', 
        'snap_counts_lg':'sum', 'snap_counts_ce':'sum', 'snap_counts_rg':'sum', 'snap_counts_rt':'sum',  'snap_counts_te':'sum'})

filtered_blocking_career_df = filtered_blocking_career_df.reset_index()

#Creating Blocking DF
filtered_defensive_career_df = defensive_df.fillna(0).sort_values(by='Season')

filtered_defensive_career_df = filtered_defensive_career_df.groupby(['player_id', 'player']).agg({'Season':'count','position':'last', 'player_game_count':'sum', 'team_name':'last',
       'assists':'sum', 'batted_passes':'sum', 'declined_penalties':'sum', 'forced_fumbles':'sum',
       'fumble_recoveries':'sum', 'fumble_recovery_touchdowns':'sum',
       'grades_coverage_defense':'median', 'grades_defense':'median', 'grades_defense_penalty':'median',
       'grades_pass_rush_defense':'median', 'grades_run_defense':'median', 'grades_tackle':'median',
       'hits':'sum', 'hurries':'sum', 'interception_touchdowns':'sum', 'interceptions':'sum',
       'missed_tackle_rate':'mean', 'missed_tackles':'sum', 'pass_break_ups':'sum',
       'penalties':'sum','receptions':'sum', 'sacks':'sum', 'safeties':'sum',
       'stops':'sum', 'tackles':'sum', 'tackles_for_loss':'sum', 'targets':'sum', 'total_pressures':'sum',
       'touchdowns':'sum', 'yards':'sum', 'yards_after_catch':'sum'})

filtered_defensive_career_df = filtered_defensive_career_df.reset_index()

#Creating EPA DF
filtered_epa_career_df = epa_df.fillna(0).sort_values(by='Season')

filtered_epa_career_df = filtered_epa_career_df.groupby(['Id', 'Name']).agg({'Season':'count','Position':'last', 'Team': 'last',
        'CountablePlays':'sum', 'TotalPPA All':'sum', 'TotalPPA Pass':'sum','TotalPPA Rush':'sum', 
        'TotalPPA FirstDown':'sum', 'TotalPPA SecondDown':'sum','TotalPPA ThirdDown':'sum', 
        'TotalPPA StandardDowns':'sum','TotalPPA PassingDowns':'sum'})

filtered_epa_career_df = filtered_epa_career_df.reset_index()

#Renaming same name columns
filtered_passing_career_df = filtered_passing_career_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_passing', 'drops': 'drops_passing', 'first_downs': 'first_downs_passing',
        'interceptions':'interceptions_passing', 'touchdowns':'touchdowns_passing', 'yards':'yards_passing'})
filtered_rushing_career_df = filtered_rushing_career_df.rename(columns={'attempts' : 'attempts_rushing', 'first_downs':'first_downs_rushing',
        'touchdowns':'touchdowns_rushing', 'yards':'yards_rushing', 'longest': 'longest_rush'})
filtered_receiving_career_df = filtered_receiving_career_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_receiving', 'drops': 'drops_receiving', 'first_downs': 'first_downs_receiving',
        'interceptions':'interceptions_receiving', 'touchdowns':'touchdowns_receiving', 'yards':'yards_receiving',
        'targets':'targets_receiving'})
filtered_defensive_career_df = filtered_defensive_career_df.rename(columns={'fumble_recoveries':'fumble_recoveries_defensive',
        'interceptions': 'interceptions_defensive', 'receptions':'receptions_allowed', 'sacks':'sacks_defensive',
        'targets':'targets_allowed', 'touchdowns':'coverage_touchdowns_allowed', 'yards':'coverage_yards_allowed'})
filtered_epa_career_df = filtered_epa_career_df.rename(columns={'Id':'player_id', 'Name':'player', 'Position':'position'})

career_dfs = [filtered_passing_career_df, filtered_rushing_career_df, filtered_receiving_career_df, filtered_blocking_career_df, filtered_defensive_career_df]

In [130]:
def combine_dfs(df_list, on_columns):
    combined_df = pd.concat(df_list, axis=0)
    return combined_df

on_columns = ['player_id', 'player']

career_df = combine_dfs(career_dfs, on_columns)

career_df = career_df[career_df['position'].isin(positions)]
career_df = career_df.groupby(['player_id', 'player']).max()
career_df = career_df.fillna(0)
career_df['offensive/defensive_grade'] = career_df[['grades_offense','grades_defense']].max(axis=1)
career_df.sort_values(by='offensive/defensive_grade', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Season,position,player_game_count,team_name,aimed_passes,attempts_passing,avg_depth_of_target_passing,avg_time_to_throw,bats,big_time_throws,...,sacks_defensive,safeties,stops,tackles,tackles_for_loss,targets_allowed,total_pressures,coverage_touchdowns_allowed,coverage_yards_allowed,offensive/defensive_grade
player_id,player,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
13334,Vincent Hall,1,ED,2,HOUSTON,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,97.4
81706,Max Yarbrough,1,G,2,LA LAFAYET,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.1
175385,Marcellius Pulliam,1,LB,2,MIAMI FL,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,95.1
28022,Joe Burrow,4,QB,37,OHIO STATE,895.0,943.0,8.025,2.5725,14.0,66.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.0
10019,Greg Mancz,1,C,11,TOLEDO,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145058,Cayden Saunders,1,HB,1,OHIO STATE,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16120,Jacob Martinez,1,HB,1,UTEP,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26280,Tyler Henderson,1,HB,1,WAKE,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
156865,Tae Meadows,1,HB,1,TROY,0.0,0.0,0.000,0.0000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [131]:
#Adding Career Rates

#'AveragePPA All', 'AveragePPA Pass','AveragePPA Rush', 'AveragePPA FirstDown', 'AveragePPA SecondDown','AveragePPA ThirdDown', 'AveragePPA StandardDowns', 'AveragePPA PassingDowns'

#Passing Career Rates
career_df['yards_passing/att_career'] = career_df['yards_passing'] / career_df['attempts_passing']
career_df['completion_pct_career'] = career_df['completions'] / career_df['attempts_passing']
career_df['adj_completion_pct_career'] = (career_df['completions'] + career_df['drops_passing']) / career_df['aimed_passes']
career_df['touchdown_pct_career'] = career_df['touchdowns_passing'] / career_df['attempts_passing']
career_df['interception_pct_career'] = career_df['interceptions_passing'] / career_df['attempts_passing']
career_df['dangerous_play_pct_career'] = career_df['turnover_worthy_plays'] / career_df['attempts_passing']
career_df['money_throw_pct_career'] = career_df['big_time_throws'] / career_df['attempts_passing']

#Rushing Career Rates
career_df['breakaway_runs/att_career'] = career_df['breakaway_attempts'] / career_df['attempts_rushing']
career_df['breakaway_runs/att_career'] = career_df['breakaway_attempts'] / career_df['attempts_rushing']
career_df['fumbles/att_career'] = career_df['fumbles'] / career_df['attempts_rushing']
career_df['touchdowns_rushing/att_career'] = career_df['touchdowns_rushing'] / career_df['attempts_rushing']
career_df['yards_rushing/att_career'] = career_df['yards_rushing'] / career_df['attempts_rushing']

#Receiving Career Rates
career_df['catch_pct_career'] = career_df['receptions'] / career_df['targets_receiving']
career_df['contested_catch_pct_career'] = career_df['contested_receptions'] / career_df['contested_targets']
career_df['touchdowns_receiving/target_career'] = career_df['touchdowns_receiving'] / career_df['targets_receiving']
career_df['interceptions_receiving/target_career'] = career_df['interceptions_receiving'] / career_df['targets_receiving']

career_df['targets/route_ran_career'] = career_df['targets_receiving'] / career_df['routes']

career_df['drops/target_career'] = career_df['drops_receiving'] / career_df['targets_receiving']

career_df['yards/reception_career'] = career_df['yards_receiving'] / career_df['receptions']
career_df['yards/target_career'] = career_df['yards_receiving'] / career_df['targets_receiving']
career_df['yards/route_ran_career'] = career_df['yards_receiving'] / career_df['routes']

career_df['yards_after_catch/reception_career'] = career_df['yards_after_catch'] / career_df['receptions']
career_df['yards_after_catch/target_career'] = career_df['yards_after_catch'] / career_df['targets_receiving']
career_df['yards_after_catch/route_ran_career'] = career_df['yards_after_catch'] / career_df['routes']

career_df['wide_snaps_routes_pct_career'] = career_df['wide_snaps'] / career_df['routes']
career_df['slot_snaps_routes_pct_career'] = career_df['slot_snaps'] / career_df['routes']
career_df['slot_snaps_routes_pct_career'] = career_df['inline_snaps'] / career_df['routes']

#Blocking Career Rates
career_df['pass_blocking_efficency'] = career_df['pressures_allowed'] / career_df['non_spike_pass_block']
career_df['blocking_pct_per play'] = career_df['snap_counts_block'] / career_df['snap_counts_offense']

#General Offensive Career Rates
career_df['avoided_tackles/touches_career'] = career_df['avoided_tackles'] / (career_df['attempts_rushing'] + career_df['targets_receiving'])
career_df['fumbles/touches_career'] = career_df['fumbles'] / (career_df['attempts_rushing'] + career_df['targets_receiving'])

#Defensive Career Rates
career_df['allowed_catch_pct'] = career_df['receptions_allowed'] / career_df['targets_allowed']
career_df['yards_per_catch_allowed'] = career_df['coverage_yards_allowed'] / career_df['receptions_allowed']

career_df = career_df.fillna(0)
career_df = career_df.reset_index()
career_df

Unnamed: 0,player_id,player,Season,position,player_game_count,team_name,aimed_passes,attempts_passing,avg_depth_of_target_passing,avg_time_to_throw,...,yards_after_catch/target_career,yards_after_catch/route_ran_career,wide_snaps_routes_pct_career,slot_snaps_routes_pct_career,pass_blocking_efficency,blocking_pct_per play,avoided_tackles/touches_career,fumbles/touches_career,allowed_catch_pct,yards_per_catch_allowed
0,9434,Jameis Winston,1,QB,13,FLORIDA ST,450.0,467.0,8.8,2.74,...,0.000000,0.000000,0.000000,0.0,0.000000,0.381006,0.333333,0.375000,0.000000,0.0
1,9435,Marcus Mariota,1,QB,15,OREGON,434.0,444.0,9.8,2.83,...,10.000000,10.000000,0.000000,0.0,0.000000,0.415870,0.211268,0.112676,0.000000,0.0
2,9436,Dante Fowler Jr.,1,ED,12,FLORIDA,0.0,0.0,0.0,0.00,...,inf,inf,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.666667,1.0
3,9437,Amari Cooper,1,WR,14,ALABAMA,0.0,0.0,0.0,0.00,...,5.045977,2.027714,0.819861,0.0,0.000000,0.447304,0.145251,0.000000,0.000000,0.0
4,9438,Brandon Scherff,1,T,13,IOWA,0.0,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.0,0.049689,1.000000,0.000000,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37819,183052,Marlon McClendon,1,CB,1,GA STATE,0.0,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
37820,183053,Brooks Hickman,1,WR,1,GA STATE,0.0,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.0,0.000000,1.000000,0.000000,0.000000,0.000000,0.0
37821,183054,Jace Larsen,1,WR,1,S ALABAMA,0.0,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.0,0.000000,1.000000,0.000000,0.000000,0.000000,0.0
37822,183055,Jamal Meriweather,1,T,1,GEORGIA,0.0,0.0,0.0,0.00,...,0.000000,0.000000,0.000000,0.0,0.000000,1.000000,0.000000,0.000000,0.000000,0.0


In [132]:
filtered_epa_career_df['player_key'] = filtered_epa_career_df['player'] + "_" + filtered_epa_career_df['position']
career_df['player_key'] = career_df['player'] + "_" + career_df['position']

team_name_dict = dict(zip(power_5_team['epa_team_name'], power_5_team['pff_ team_name']))
power_5_dict = dict(zip(power_5_team['pff_ team_name'], power_5_team['P5?']))

filtered_epa_career_df['Team'] = filtered_epa_career_df['Team'].map(team_name_dict)
filtered_epa_career_df['power_5_team'] = filtered_epa_career_df['Team'].map(power_5_dict)
filtered_epa_career_df = filtered_epa_career_df.fillna(0)
filtered_epa_career_df

Unnamed: 0,player_id,player,Season,position,Team,CountablePlays,TotalPPA All,TotalPPA Pass,TotalPPA Rush,TotalPPA FirstDown,TotalPPA SecondDown,TotalPPA ThirdDown,TotalPPA StandardDowns,TotalPPA PassingDowns,player_key,power_5_team
0,100009,Johnny Jackson,1,WR,ARIZONA,62,41.813,41.813,0.000,9.542,16.772,22.345,13.214,28.600,Johnny Jackson_WR,1.0
1,102597,Will Rogers,2,QB,MISS STATE,902,212.821,209.129,3.692,36.180,73.329,108.438,81.998,130.823,Will Rogers_QB,1.0
2,107494,Trey Sanders,4,RB,TCU,175,17.284,3.987,13.297,2.464,-4.324,17.137,13.579,3.704,Trey Sanders_RB,1.0
3,109907,Jon Lee,1,RB,AIR FORCE,9,1.120,0.044,1.076,0.444,1.040,-0.363,-0.770,1.890,Jon Lee_RB,0.0
4,136429,Cedric Patterson III,2,WR,RICE,48,38.081,36.642,1.439,19.821,6.436,11.824,18.589,19.492,Cedric Patterson III_WR,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10481,5161147,Marquis Montgomery,1,WR,CAL,1,0.711,0.711,0.000,0.711,0.000,0.000,0.711,0.000,Marquis Montgomery_WR,1.0
10482,5162646,Marvin Sims,1,WR,W KENTUCKY,1,1.297,1.297,0.000,0.000,0.000,0.000,1.297,0.000,Marvin Sims_WR,0.0
10483,5162845,Jacqez Barksdale,1,RB,BUFFALO,48,5.417,-2.307,7.724,-0.636,4.516,1.537,1.224,4.194,Jacqez Barksdale_RB,0.0
10484,5163021,Jacob Godfrey,1,WR,NEW MEXICO,1,0.180,0.180,0.000,0.180,0.000,0.000,0.180,0.000,Jacob Godfrey_WR,0.0


In [133]:
career_df = career_df.merge(filtered_epa_career_df, on='player_key', suffixes=['','_epa'])
career_df

Unnamed: 0,player_id,player,Season,position,player_game_count,team_name,aimed_passes,attempts_passing,avg_depth_of_target_passing,avg_time_to_throw,...,CountablePlays,TotalPPA All,TotalPPA Pass,TotalPPA Rush,TotalPPA FirstDown,TotalPPA SecondDown,TotalPPA ThirdDown,TotalPPA StandardDowns,TotalPPA PassingDowns,power_5_team
0,9434,Jameis Winston,1,QB,13,FLORIDA ST,450.0,467.0,8.8,2.74,...,520,218.145,206.742,11.403,60.858,64.711,98.300,104.078,114.066,1.0
1,9435,Marcus Mariota,1,QB,15,OREGON,434.0,444.0,9.8,2.83,...,578,338.945,249.678,89.267,134.438,91.456,103.840,213.143,125.802,1.0
2,9437,Amari Cooper,1,WR,14,ALABAMA,0.0,0.0,0.0,0.00,...,175,134.560,136.060,-1.501,59.015,20.406,52.060,75.096,59.464,1.0
3,9440,Kevin White,1,WR,13,W VIRGINIA,0.0,0.0,0.0,0.00,...,157,80.148,80.148,0.000,24.238,27.532,37.282,32.963,47.185,1.0
4,9447,DeVante Parker,1,WR,6,LOUISVILLE,0.0,0.0,0.0,0.00,...,67,60.399,60.399,0.000,28.299,14.167,17.932,37.819,22.580,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7135,176638,Ja'Kobi Lane,1,WR,6,USC,0.0,0.0,0.0,0.00,...,7,10.980,10.980,0.000,2.889,4.674,3.416,4.323,6.657,1.0
7136,176642,Duce Robinson,1,WR,13,USC,0.0,0.0,0.0,0.00,...,19,24.561,24.561,0.000,5.203,14.576,4.782,7.300,17.261,1.0
7137,176648,Dallen Bentley,1,TE,6,UTAH,0.0,0.0,0.0,0.00,...,1,0.072,0.072,0.000,0.000,0.072,0.000,0.000,0.072,1.0
7138,181169,Blake Murphy,1,QB,4,LA MONROE,95.0,105.0,11.3,2.92,...,120,20.196,17.924,2.272,5.781,0.753,13.906,1.595,18.601,0.0


In [134]:
career_df['Total_EPA_avg'] = career_df['TotalPPA All'] / career_df['CountablePlays']
career_df['EPA_Pass_avg'] = career_df['TotalPPA Pass'] / career_df['CountablePlays']
career_df['EPA_Rush_avg'] = career_df['TotalPPA Rush'] / career_df['CountablePlays']
career_df['EPA_1st_down_avg'] = career_df['TotalPPA FirstDown'] / career_df['CountablePlays']
career_df['EPA_2nd_down_avg'] = career_df['TotalPPA SecondDown'] / career_df['CountablePlays']
career_df['EPA_3rd_down_avg'] = career_df['TotalPPA ThirdDown'] / career_df['CountablePlays']
career_df['EPA_StandardDowns_avg'] = career_df['TotalPPA StandardDowns'] / career_df['CountablePlays']
career_df['EPA_PassingDowns_avg'] = career_df['TotalPPA PassingDowns'] / career_df['CountablePlays']

career_df

Unnamed: 0,player_id,player,Season,position,player_game_count,team_name,aimed_passes,attempts_passing,avg_depth_of_target_passing,avg_time_to_throw,...,TotalPPA PassingDowns,power_5_team,Total_EPA_avg,EPA_Pass_avg,EPA_Rush_avg,EPA_1st_down_avg,EPA_2nd_down_avg,EPA_3rd_down_avg,EPA_StandardDowns_avg,EPA_PassingDowns_avg
0,9434,Jameis Winston,1,QB,13,FLORIDA ST,450.0,467.0,8.8,2.74,...,114.066,1.0,0.419510,0.397581,0.021929,0.117035,0.124444,0.189038,0.200150,0.219358
1,9435,Marcus Mariota,1,QB,15,OREGON,434.0,444.0,9.8,2.83,...,125.802,1.0,0.586410,0.431969,0.154441,0.232592,0.158228,0.179654,0.368760,0.217651
2,9437,Amari Cooper,1,WR,14,ALABAMA,0.0,0.0,0.0,0.00,...,59.464,1.0,0.768914,0.777486,-0.008577,0.337229,0.116606,0.297486,0.429120,0.339794
3,9440,Kevin White,1,WR,13,W VIRGINIA,0.0,0.0,0.0,0.00,...,47.185,1.0,0.510497,0.510497,0.000000,0.154382,0.175363,0.237465,0.209955,0.300541
4,9447,DeVante Parker,1,WR,6,LOUISVILLE,0.0,0.0,0.0,0.00,...,22.580,1.0,0.901478,0.901478,0.000000,0.422373,0.211448,0.267642,0.564463,0.337015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7135,176638,Ja'Kobi Lane,1,WR,6,USC,0.0,0.0,0.0,0.00,...,6.657,1.0,1.568571,1.568571,0.000000,0.412714,0.667714,0.488000,0.617571,0.951000
7136,176642,Duce Robinson,1,WR,13,USC,0.0,0.0,0.0,0.00,...,17.261,1.0,1.292684,1.292684,0.000000,0.273842,0.767158,0.251684,0.384211,0.908474
7137,176648,Dallen Bentley,1,TE,6,UTAH,0.0,0.0,0.0,0.00,...,0.072,1.0,0.072000,0.072000,0.000000,0.000000,0.072000,0.000000,0.000000,0.072000
7138,181169,Blake Murphy,1,QB,4,LA MONROE,95.0,105.0,11.3,2.92,...,18.601,0.0,0.168300,0.149367,0.018933,0.048175,0.006275,0.115883,0.013292,0.155008


In [135]:
#Creating final season DFs

#Creating Passing DF
filtered_passing_final_season_df = passing_df.fillna(0).sort_values(by='Season')

filtered_passing_final_season_df = filtered_passing_final_season_df.groupby(['player_id', 'player']).last()

filtered_passing_final_season_df = filtered_passing_final_season_df.reset_index()

#Creating Rushing DF

filtered_rushing_final_season_df = rushing_df.fillna(0).sort_values(by='Season')

filtered_rushing_final_season_df = filtered_rushing_final_season_df.groupby(['player_id', 'player']).last()

filtered_rushing_final_season_df = filtered_rushing_final_season_df.reset_index()

#Creating Receiving DF
filtered_receiving_final_season_df = receiving_df.fillna(0).sort_values(by='Season')

filtered_receiving_final_season_df = filtered_receiving_final_season_df.groupby(['player_id', 'player']).last()

filtered_receiving_final_season_df = filtered_receiving_final_season_df.reset_index()

#Creating Blocking DF
filtered_blocking_final_season_df = blocking_df.fillna(0).sort_values(by='Season')

filtered_blocking_final_season_df = filtered_blocking_final_season_df.groupby(['player_id', 'player']).last()

filtered_blocking_final_season_df = filtered_blocking_final_season_df.reset_index()

#Creating Blocking DF
filtered_defensive_final_season_df = defensive_df.fillna(0).sort_values(by='Season')

filtered_defensive_final_season_df = filtered_defensive_final_season_df.groupby(['player_id', 'player']).last()

filtered_defensive_final_season_df = filtered_defensive_final_season_df.reset_index()

#Creating EPA DF
filtered_epa_final_season_df = epa_df.fillna(0).sort_values(by='Season')

filtered_epa_final_season_df = filtered_epa_final_season_df.groupby(['Id', 'Name']).last()

filtered_epa_final_season_df = filtered_epa_final_season_df.reset_index()

#Renaming same name columns
filtered_passing_final_season_df = filtered_passing_final_season_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_passing', 'drops': 'drops_passing', 'first_downs': 'first_downs_passing',
        'interceptions':'interceptions_passing', 'touchdowns':'touchdowns_passing', 'yards':'yards_passing'})
filtered_rushing_final_season_df = filtered_rushing_final_season_df.rename(columns={'attempts' : 'attempts_rushing', 'first_downs':'first_downs_rushing',
        'touchdowns':'touchdowns_rushing', 'yards':'yards_rushing', 'longest': 'longest_rush'})
filtered_receiving_final_season_df = filtered_receiving_final_season_df.rename(columns={'attempts' : 'attempts_passing', 
        'avg_depth_of_target':'avg_depth_of_target_receiving', 'drops': 'drops_receiving', 'first_downs': 'first_downs_receiving',
        'interceptions':'interceptions_receiving', 'touchdowns':'touchdowns_receiving', 'yards':'yards_receiving',
        'targets':'targets_receiving'})
filtered_defensive_final_season_df = filtered_defensive_final_season_df.rename(columns={'fumble_recoveries':'fumble_recoveries_defensive',
        'interceptions': 'interceptions_defensive', 'receptions':'receptions_allowed', 'sacks':'sacks_defensive',
        'targets':'targets_allowed', 'touchdowns':'coverage_touchdowns_allowed', 'yards':'coverage_yards_allowed'})
filtered_epa_final_season_df = filtered_epa_final_season_df.rename(columns={'Id':'player_id', 'Name':'player', 'Position':'position'})

final_season_dfs = [filtered_passing_final_season_df, filtered_rushing_final_season_df, filtered_receiving_final_season_df,
      filtered_blocking_final_season_df, filtered_defensive_final_season_df]

In [136]:
final_season_df = pd.DataFrame()

final_season_df = combine_dfs(final_season_dfs, on_columns)

final_season_df = final_season_df[final_season_df['position'].isin(positions)]
final_season_df = final_season_df.groupby(['player_id', 'player']).last()
final_season_df = final_season_df.fillna(0)
final_season_df['offensive/defensive_grade'] = final_season_df[['grades_offense','grades_defense']].max(axis=1)

final_season_df = final_season_df.reset_index()
#final_season_df = final_season_df.drop('player_id',axis=1)
final_season_df.sort_values(by='offensive/defensive_grade', ascending=False)

Unnamed: 0,player_id,player,Season,position,team_name,player_game_count,accuracy_percent,aimed_passes,attempts_passing,avg_depth_of_target_passing,...,snap_counts_run_defense,snap_counts_slot,stops,tackles,tackles_for_loss,targets_allowed,total_pressures,coverage_touchdowns_allowed,coverage_yards_allowed,offensive/defensive_grade
2198,13334,Vincent Hall,2016,ED,HOUSTON,2,0.0,0.0,0.0,0.0,...,8.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,97.4
26673,100551,Laiatu Latu,2023,ED,UCLA,12,0.0,0.0,0.0,0.0,...,210.0,0.0,33.0,26.0,5.0,5.0,62.0,0.0,19.0,96.3
12803,44216,Quinnen Williams,2019,DI,ALABAMA,15,0.0,0.0,0.0,0.0,...,301.0,0.0,52.0,53.0,13.0,0.0,56.0,0.0,0.0,96.0
24023,83964,Kyle Pitts,2020,TE,FLORIDA,8,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,96.0
971,11072,Darrell Greene,2015,G,S DIEGO ST,7,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30386,130376,Tucker Melton,2021,QB,BOWL GREEN,4,33.3,9.0,10.0,6.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.8
18113,55444,Goran Jovanovic,2020,C,WAKE,2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.3
30983,141710,Bronson Barron,2023,QB,W KENTUCKY,1,50.0,2.0,2.0,3.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.1
19505,58132,Kyle Blaskovich,2020,LB,SMU,1,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,11.0,20.1


In [137]:
filtered_epa_final_season_df['player_key'] = filtered_epa_final_season_df['player'] + "_" + filtered_epa_final_season_df['position']
final_season_df['player_key'] = final_season_df['player'] + "_" + final_season_df['position']

filtered_epa_final_season_df['Team'] = filtered_epa_final_season_df['Team'].map(team_name_dict)
filtered_epa_final_season_df = filtered_epa_final_season_df.fillna(0)
filtered_epa_final_season_df

Unnamed: 0,player_id,player,Season,position,Team,Conference,CountablePlays,AveragePPA All,AveragePPA Pass,AveragePPA Rush,...,AveragePPA PassingDowns,TotalPPA All,TotalPPA Pass,TotalPPA Rush,TotalPPA FirstDown,TotalPPA SecondDown,TotalPPA ThirdDown,TotalPPA StandardDowns,TotalPPA PassingDowns,player_key
0,100009,Johnny Jackson,2015,WR,ARIZONA,Pac-12,62,0.674,0.674,0.000,...,0.953,41.813,41.813,0.000,9.542,16.772,22.345,13.214,28.600,Johnny Jackson_WR
1,102597,Will Rogers,2023,QB,MISS STATE,SEC,260,0.254,0.248,0.349,...,0.478,66.060,60.473,5.587,-15.777,43.812,39.511,10.589,55.471,Will Rogers_QB
2,107494,Trey Sanders,2023,RB,TCU,Big 12,51,0.127,-0.115,0.160,...,-0.189,6.499,-0.691,7.190,-0.318,-2.179,6.167,8.385,-1.886,Trey Sanders_RB
3,109907,Jon Lee,2014,RB,AIR FORCE,Mountain West,9,0.124,0.015,0.179,...,0.630,1.120,0.044,1.076,0.444,1.040,-0.363,-0.770,1.890,Jon Lee_RB
4,136429,Cedric Patterson III,2022,WR,RICE,American Athletic,3,-0.142,-0.192,-0.043,...,-0.049,-0.427,-0.385,-0.043,-0.330,0.000,-0.097,-0.330,-0.097,Cedric Patterson III_WR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10481,5161147,Marquis Montgomery,2023,WR,CAL,Pac-12,1,0.711,0.711,0.000,...,0.000,0.711,0.711,0.000,0.711,0.000,0.000,0.711,0.000,Marquis Montgomery_WR
10482,5162646,Marvin Sims,2023,WR,W KENTUCKY,Conference USA,1,1.297,1.297,0.000,...,0.000,1.297,1.297,0.000,0.000,0.000,0.000,1.297,0.000,Marvin Sims_WR
10483,5162845,Jacqez Barksdale,2023,RB,BUFFALO,Mid-American,48,0.113,-0.577,0.176,...,0.419,5.417,-2.307,7.724,-0.636,4.516,1.537,1.224,4.194,Jacqez Barksdale_RB
10484,5163021,Jacob Godfrey,2023,WR,NEW MEXICO,Mountain West,1,0.180,0.180,0.000,...,0.000,0.180,0.180,0.000,0.180,0.000,0.000,0.180,0.000,Jacob Godfrey_WR


In [138]:
final_season_df = final_season_df.merge(filtered_epa_final_season_df, on='player_key', suffixes=['','_epa'])
final_season_df

Unnamed: 0,player_id,player,Season,position,team_name,player_game_count,accuracy_percent,aimed_passes,attempts_passing,avg_depth_of_target_passing,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All,TotalPPA Pass,TotalPPA Rush,TotalPPA FirstDown,TotalPPA SecondDown,TotalPPA ThirdDown,TotalPPA StandardDowns,TotalPPA PassingDowns
0,9434,Jameis Winston,2014,QB,FLORIDA ST,13,72.7,450.0,467.0,8.8,...,0.322,0.579,218.145,206.742,11.403,60.858,64.711,98.300,104.078,114.066
1,9435,Marcus Mariota,2014,QB,OREGON,15,76.7,434.0,444.0,9.8,...,0.549,0.662,338.945,249.678,89.267,134.438,91.456,103.840,213.143,125.802
2,9437,Amari Cooper,2014,WR,ALABAMA,14,0.0,0.0,0.0,0.0,...,0.636,1.043,134.560,136.060,-1.501,59.015,20.406,52.060,75.096,59.464
3,9440,Kevin White,2014,WR,W VIRGINIA,13,0.0,0.0,0.0,0.0,...,0.300,1.004,80.148,80.148,0.000,24.238,27.532,37.282,32.963,47.185
4,9447,DeVante Parker,2014,WR,LOUISVILLE,6,0.0,0.0,0.0,0.0,...,1.022,0.753,60.399,60.399,0.000,28.299,14.167,17.932,37.819,22.580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7025,176638,Ja'Kobi Lane,2023,WR,USC,6,0.0,0.0,0.0,0.0,...,0.865,3.328,10.980,10.980,0.000,2.889,4.674,3.416,4.323,6.657
7026,176642,Duce Robinson,2023,WR,USC,13,0.0,0.0,0.0,0.0,...,1.043,1.438,24.561,24.561,0.000,5.203,14.576,4.782,7.300,17.261
7027,176648,Dallen Bentley,2023,TE,UTAH,6,0.0,0.0,0.0,0.0,...,0.000,0.072,0.072,0.072,0.000,0.000,0.072,0.000,0.000,0.072
7028,181169,Blake Murphy,2023,QB,LA MONROE,4,62.1,95.0,105.0,11.3,...,0.025,0.332,20.196,17.924,2.272,5.781,0.753,13.906,1.595,18.601


In [139]:
player_df = career_df.merge(final_season_df, on='player_id', suffixes=['_career','_final_season'])
player_df

Unnamed: 0,player_id,player_career,Season_career,position_career,player_game_count_career,team_name_career,aimed_passes_career,attempts_passing_career,avg_depth_of_target_passing_career,avg_time_to_throw_career,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_final_season,TotalPPA Pass_final_season,TotalPPA Rush_final_season,TotalPPA FirstDown_final_season,TotalPPA SecondDown_final_season,TotalPPA ThirdDown_final_season,TotalPPA StandardDowns_final_season,TotalPPA PassingDowns_final_season
0,9434,Jameis Winston,1,QB,13,FLORIDA ST,450.0,467.0,8.8,2.74,...,0.322,0.579,218.145,206.742,11.403,60.858,64.711,98.300,104.078,114.066
1,9435,Marcus Mariota,1,QB,15,OREGON,434.0,444.0,9.8,2.83,...,0.549,0.662,338.945,249.678,89.267,134.438,91.456,103.840,213.143,125.802
2,9437,Amari Cooper,1,WR,14,ALABAMA,0.0,0.0,0.0,0.00,...,0.636,1.043,134.560,136.060,-1.501,59.015,20.406,52.060,75.096,59.464
3,9440,Kevin White,1,WR,13,W VIRGINIA,0.0,0.0,0.0,0.00,...,0.300,1.004,80.148,80.148,0.000,24.238,27.532,37.282,32.963,47.185
4,9447,DeVante Parker,1,WR,6,LOUISVILLE,0.0,0.0,0.0,0.00,...,1.022,0.753,60.399,60.399,0.000,28.299,14.167,17.932,37.819,22.580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7188,176638,Ja'Kobi Lane,1,WR,6,USC,0.0,0.0,0.0,0.00,...,0.865,3.328,10.980,10.980,0.000,2.889,4.674,3.416,4.323,6.657
7189,176642,Duce Robinson,1,WR,13,USC,0.0,0.0,0.0,0.00,...,1.043,1.438,24.561,24.561,0.000,5.203,14.576,4.782,7.300,17.261
7190,176648,Dallen Bentley,1,TE,6,UTAH,0.0,0.0,0.0,0.00,...,0.000,0.072,0.072,0.072,0.000,0.000,0.072,0.000,0.000,0.072
7191,181169,Blake Murphy,1,QB,4,LA MONROE,95.0,105.0,11.3,2.92,...,0.025,0.332,20.196,17.924,2.272,5.781,0.753,13.906,1.595,18.601


In [157]:
#importing combine & draft dfs from API
combine_df  = nfl.import_combine_data(years)
draft_df = nfl.import_draft_picks(years)
id_df = nfl.import_ids()

draft_df = draft_df[['season', 'round', 'pick', 'team', 'gsis_id', 'pfr_player_id',
       'cfb_player_id', 'pfr_player_name', 'position', 'category','college', 'age']]

id_df = id_df[['gsis_id', 'pff_id', 'pfr_id','cfbref_id', 'name', 'merge_name', 'position', 'team', 'birthdate', 'age',
       'draft_year', 'draft_round', 'draft_pick', 'draft_ovr', 'height', 'weight']]

combine_df = combine_df[['season', 'draft_year', 'pfr_id', 'cfb_id', 'player_name', 'pos', 'school', 'forty',
       'bench', 'vertical', 'broad_jump', 'cone', 'shuttle']]

In [158]:
#combine_df = combine_df[~combine_df['cfb_id'].isna()]
combine_df = combine_df.merge(id_df[['cfbref_id','pff_id','gsis_id', 'height', 'weight']], left_on='cfb_id', right_on='cfbref_id')
combine_df = combine_df.drop('cfbref_id', axis=1)

combine_df

Unnamed: 0,season,draft_year,pfr_id,cfb_id,player_name,pos,school,forty,bench,vertical,broad_jump,cone,shuttle,pff_id,gsis_id,height,weight
0,2014,2014.0,AbbrJa00,jared-abbrederis-1,Jared Abbrederis,WR,Wisconsin,4.50,4.0,30.5,117.0,6.80,4.08,8811.0,00-0031021,73.0,195.0
1,2014,2014.0,AdamDa01,davante-adams-1,Davante Adams,WR,Fresno State,4.56,14.0,39.5,123.0,6.82,4.30,8688.0,00-0031381,73.0,215.0
2,2014,2014.0,AlexMa00,maurice-alexander-1,Mo Alexander,S,Utah State,4.50,,38.0,123.0,7.05,4.51,8745.0,00-0031393,74.0,220.0
3,2014,2014.0,AlleRi00,ricardo-allen-1,Ricardo Allen,CB,Purdue,4.61,13.0,35.5,117.0,,4.15,8782.0,00-0031181,69.0,186.0
4,2014,2014.0,AmarJa00,jace-amaro-1,Jace Amaro,TE,Texas Tech,4.74,28.0,33.0,118.0,7.42,4.30,8684.0,00-0031042,77.0,265.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2167090,2023,2023.0,WoodCo01,colby-wooden-1,Colby Wooden,DE,Auburn,4.79,23.0,,115.0,,4.52,,00-0038387,76.0,273.0
2167091,2023,2023.0,YounBr01,bryce-young-1,Bryce Young,QB,Alabama,,,,,,,,00-0039150,70.0,204.0
2167092,2023,2023.0,YounBy00,byron-young-1,Byron Young,DT,Alabama,,24.0,26.0,108.0,7.68,,,00-0038978,75.0,292.0
2167093,2023,2023.0,YounBy00,byron-young-1,Byron Young,DT,Alabama,,24.0,26.0,108.0,7.68,,,00-0039137,74.0,250.0


In [159]:
draft_dict = dict(zip(draft_df['cfb_player_id'], draft_df['pick']))
draft_round_dict = dict(zip(draft_df['cfb_player_id'], draft_df['round']))

combine_df['draft_ovr_pick'] = combine_df['cfb_id'].map(draft_dict)
combine_df['draft_round_pick'] = combine_df['cfb_id'].map(draft_round_dict)

column_order = ['season', 'draft_year', 'pfr_id', 'cfb_id', 'pff_id', 'gsis_id', 'player_name', 'pos', 'school', 'height', 
           'weight', 'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle','draft_ovr_pick', 'draft_round_pick']

combine_df = combine_df[column_order]

#Assigning undrafted players pick 263 & round 8
combine_df['draft_ovr_pick'] = combine_df['draft_ovr_pick'].fillna(263)
combine_df['draft_round_pick'] = combine_df['draft_round_pick'].fillna(8)

combine_df

Unnamed: 0,season,draft_year,pfr_id,cfb_id,pff_id,gsis_id,player_name,pos,school,height,weight,forty,bench,vertical,broad_jump,cone,shuttle,draft_ovr_pick,draft_round_pick
0,2014,2014.0,AbbrJa00,jared-abbrederis-1,8811.0,00-0031021,Jared Abbrederis,WR,Wisconsin,73.0,195.0,4.50,4.0,30.5,117.0,6.80,4.08,176,5
1,2014,2014.0,AdamDa01,davante-adams-1,8688.0,00-0031381,Davante Adams,WR,Fresno State,73.0,215.0,4.56,14.0,39.5,123.0,6.82,4.30,53,2
2,2014,2014.0,AlexMa00,maurice-alexander-1,8745.0,00-0031393,Mo Alexander,S,Utah State,74.0,220.0,4.50,,38.0,123.0,7.05,4.51,110,4
3,2014,2014.0,AlleRi00,ricardo-allen-1,8782.0,00-0031181,Ricardo Allen,CB,Purdue,69.0,186.0,4.61,13.0,35.5,117.0,,4.15,147,5
4,2014,2014.0,AmarJa00,jace-amaro-1,8684.0,00-0031042,Jace Amaro,TE,Texas Tech,77.0,265.0,4.74,28.0,33.0,118.0,7.42,4.30,49,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2167090,2023,2023.0,WoodCo01,colby-wooden-1,,00-0038387,Colby Wooden,DE,Auburn,76.0,273.0,4.79,23.0,,115.0,,4.52,116,4
2167091,2023,2023.0,YounBr01,bryce-young-1,,00-0039150,Bryce Young,QB,Alabama,70.0,204.0,,,,,,,1,1
2167092,2023,2023.0,YounBy00,byron-young-1,,00-0038978,Byron Young,DT,Alabama,75.0,292.0,,24.0,26.0,108.0,7.68,,70,3
2167093,2023,2023.0,YounBy00,byron-young-1,,00-0039137,Byron Young,DT,Alabama,74.0,250.0,,24.0,26.0,108.0,7.68,,70,3


In [None]:
qb_combine_df = combine_df[combine_df['pos'] == 'QB']

#removing brady davis from df to create simplicity within coding
qb_combine_df = qb_combine_df[qb_combine_df['cfb_id'] != 'brady-davis-2']

#editing height from Foot-Inches to only Inches
def extract_height(height_str):
    feet, inches = height_str.split('-')
    return int(feet) * 12 + int(inches)

qb_combine_df['ht'] = qb_combine_df['ht'].apply(extract_height) 

#Update player_name to player for merging simplicity
qb_combine_df = qb_combine_df.rename(columns={'player_name': 'player'})

#qb_combine_df.sort_values(by='ht')

In [None]:
#Merging College Stats with Combine Measurements
prospect_df = qb_combine_df.merge(qb_df, on='player')

#Data Mungering
prospect_df = prospect_df[prospect_df['season'] != 2024] #as of 5/9/24 2024 data was blank. Will add later to make predictions
prospect_df['draft_ovr'] = prospect_df['draft_ovr'].fillna(256) #pick 256 will represent undrafted
prospect_df['draft_round'] = prospect_df['draft_round'].fillna(8) #round 8 will represent undrafted
prospect_df['draft_team'] = prospect_df['draft_team'].fillna('Undrafted')
prospect_df['draft_year'] = prospect_df['season']
#prospect_df

In [None]:
#columns to drop from the combined df
cols_to_remove = ['player_id','position_career','team_name_career','position_final_season', 'franchise_id', 'player_key', 
                  'Id', 'Name','Position_career', 'Season_career_last_season', 'Team_career', 'Season_last_season', 
                  'Position_last_season', 'Team_last_season', 'Conference']

prospect_df = prospect_df.drop(cols_to_remove, axis=1)
#prospect_df

In [None]:
#getting a list of attributes we will use for our model
var_stats = prospect_df.columns
var_stats = var_stats.drop(['draft_team','draft_round','draft_ovr','pfr_id','cfb_id','player','pos','school','draft_year',
                            'team_name_final_season'])

#filling in the empty combine stats with the median for that event
combine_stats = ['wt', 'forty','bench', 'vertical', 'broad_jump', 'cone', 'shuttle']

for stat in combine_stats:
    prospect_df[stat] = prospect_df[stat].fillna(prospect_df[stat].median())

#prospect_df

In [None]:
warnings.filterwarnings("ignore")

X = prospect_df[var_stats]
Y = prospect_df['draft_ovr']

reg_model = LinearRegression()
reg_model.fit(X,Y)
y_pred_regression = reg_model.predict(X)
prospect_df['y_pred_regression'] = y_pred_regression

rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
rand_model.fit(X,Y)
y_pred_rand = rand_model.predict(X)
prospect_df['y_pred_random_forrest'] = y_pred_rand

svr_model = SVR(kernel='rbf', C=1.0)
svr_model.fit(X, Y)
svr_predictions = svr_model.predict(X)
prospect_df['y_pred_svr'] = svr_predictions

gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
gbr_model.fit(X, Y)
gbr_predictions = gbr_model.predict(X)
prospect_df['y_pred_gbf'] = gbr_predictions

'''print("\nMean Absolute Error for ", prospect_df['pos'].iloc[0], " (Closest to 0, the better.)")
print("Linear Regression: ", mean_absolute_error(prospect_df['draft_ovr'], y_pred_regression))
print("Random Forrest: ", mean_absolute_error(prospect_df['draft_ovr'], y_pred_rand))
print("SVR Model: ", mean_absolute_error(prospect_df['draft_ovr'], svr_predictions))
print("GBR Model: ", mean_absolute_error(prospect_df['draft_ovr'], gbr_predictions))'''

In [None]:
'''coefficients = reg_model.coef_
intercept = reg_model.intercept_

# Sort features and coefficients together by absolute coefficient value (descending order)
sorted_features_and_coefs = sorted(zip(var_stats, coefficients), key=lambda x: abs(x[1]), reverse=True)

# Print the formula with sorted features and coefficients
formula = "y = "
for feature, coef in sorted_features_and_coefs:
    if coef > 0:
        formula += f" +{abs(coef):.4f} {feature} \n"  # Add '+' for positive coefficients
    else:
        formula += f" -{abs(coef):.4f} {feature} \n"  # Add '-' for negative coefficients

formula += f" + {intercept:.4f}"  # Add intercept

print(formula)'''

In [None]:
#prospect_df[['player', 'school', 'draft_year', 'draft_ovr', 'y_pred_regression','y_pred_gbf']].sort_values('y_pred_gbf').head(25)

In [None]:
#Merging College Stats with Combine Measurements
rookie_prospect_df = qb_combine_df.merge(qb_df, on='player')

#Data Mungering
for stat in combine_stats:
    rookie_prospect_df[stat] = rookie_prospect_df[stat].fillna(rookie_prospect_df[stat].median())
    
rookie_prospect_df = rookie_prospect_df[rookie_prospect_df['season'] == 2024]
    
X = rookie_prospect_df[var_stats]
y_pred_regression = reg_model.predict(X)
rookie_prospect_df['y_pred_regression'] = y_pred_regression

gbr_predictions = gbr_model.predict(X)
rookie_prospect_df['y_pred_gbf'] = gbr_predictions

df_print = rookie_prospect_df[['player', 'school', 'draft_year', 'draft_ovr', 'y_pred_regression', 'y_pred_gbf']].sort_values(by='y_pred_gbf')
df_print['gbf_proj_round'] = (df_print['y_pred_gbf']//32) + 1
df_print

# Creating Rookie Projections

In [None]:
rookie_stats_df = nfl.import_seasonal_data(years[:-1])

rookie_stats_df = rookie_stats_df[['player_id', 'season', 'games', 'season_type', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions']]

rookie_stats_df = rookie_stats_df.sort_values(by='season')
rookie_stats_df = rookie_stats_df.groupby(by='player_id').first()
rookie_stats_df = rookie_stats_df.reset_index()

rookie_stats_df

In [None]:
id_df = nfl.import_ids()
print(id_df.columns)
id_df = id_df[['pfr_id','gsis_id','name']]
#id_df

In [None]:
proj_df = prospect_df.merge(id_df, on='pfr_id')
proj_df = proj_df.merge(rookie_stats_df, left_on='gsis_id', right_on='player_id')

proj_df.sort_values(by='passing_yards', ascending=False)

In [None]:
duplicates = proj_df['pfr_id'].duplicated()
proj_df = proj_df[~duplicates]

proj_df = proj_df.rename(columns={'season_x':'season'})

proj_df.sort_values(by='passing_yards', ascending=False)

In [None]:
#getting a list of attributes we will use for our model
var_stats = prospect_df.columns
var_stats = var_stats.drop(['draft_team','draft_round','pfr_id','cfb_id','player','pos','school','draft_year',
                            'team_name_final_season','y_pred_random_forrest', 'y_pred_svr'])

In [None]:
perdicted_stats = ['games', 'passing_yards','passing_tds', 'interceptions', 'passing_2pt_conversions', 'sack_fumbles_lost',
 'rushing_yards','rushing_tds','rushing_2pt_conversions', 'rushing_fumbles_lost', 'rushing_first_downs']

gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
reg_model = LinearRegression()
rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
svr_model = SVR(kernel='rbf', C=1.0)

for stat in perdicted_stats:
    X = proj_df[var_stats]
    Y = proj_df[stat]

    reg_model.fit(X,Y)
    y_pred_regression = reg_model.predict(X)
    proj_df[stat + '_y_pred_regression'] = y_pred_regression

    rand_model.fit(X,Y)
    y_pred_rand = rand_model.predict(X)
    proj_df[stat + '_y_pred_random_forrest'] = y_pred_rand

    svr_model.fit(X, Y)
    svr_predictions = svr_model.predict(X)
    proj_df[stat + '_y_pred_svr'] = svr_predictions
    
    gbr_model.fit(X, Y)
    gbr_predictions = gbr_model.predict(X)
    proj_df[stat + '_y_pred_gbf'] = gbr_predictions

    '''print("\nMean Absolute Error for ", stat, " (Closest to 0, the better.)")
    print("GBR Model: ", mean_absolute_error(proj_df[stat], gbr_predictions))    
    print("Linear Regression: ", mean_absolute_error(proj_df[stat], y_pred_regression))
    print("Random Forrest: ", mean_absolute_error(proj_df[stat], y_pred_rand))
    print("SVR Model: ", mean_absolute_error(proj_df[stat], svr_predictions))'''

In [None]:
ff_scoring = {'passing_yards':.04,'passing_tds':4, 'interceptions':-2, 'passing_2pt_conversions':2, 'sack_fumbles_lost':-2,
 'rushing_yards':.1,'rushing_tds':6,'rushing_2pt_conversions':2, 'rushing_fumbles_lost':-2, 'rushing_first_downs':.5}

def ff_scoring_method(df, text_mod):
    proj_df['ff_scoring'+ text_mod] = 0
    for key, value in ff_scoring.items():
        df['ff_scoring' + text_mod] = (df[key+text_mod] * value) + df['ff_scoring' + text_mod]

    df['ff_scoring/g' + text_mod] = df['ff_scoring' + text_mod] / df['games' + text_mod]

ff_scoring_method(proj_df, '')
proj_df[['season','draft_team', 'player', 'games', 'ff_scoring', 'ff_scoring/g']].sort_values(by='ff_scoring/g', ascending=False).head(10)

In [None]:
#proj_df[['season', 'draft_team', 'player', 'games', 'games_y_pred_gbf', 'games_y_pred_random_forrest', 'passing_yards', 'passing_yards_y_pred_gbf', 'passing_yards_y_pred_random_forrest']].sort_values(by='passing_yards_y_pred_random_forrest', ascending=False).head(10)

In [None]:
draft_df = draft_df.rename(columns={'cfb_player_id':'cfb_id','pick':'draft_ovr'})

rookie_proj_df = rookie_prospect_df.merge(draft_df[['cfb_id', 'draft_ovr']], on='cfb_id')
rookie_proj_df['draft_ovr_x'] = rookie_proj_df['draft_ovr_y']
rookie_proj_df = rookie_proj_df.drop(columns=['draft_ovr_y'])
rookie_proj_df = rookie_proj_df.rename(columns={'draft_ovr_x':'draft_ovr'})

#rookie_proj_df

In [None]:
gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
reg_model = LinearRegression()
rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
svr_model = SVR(kernel='rbf', C=1.0)

for stat in perdicted_stats:
    X = proj_df[var_stats]
    Y = proj_df[stat]

    reg_model.fit(X,Y)
    rand_model.fit(X,Y)
    svr_model.fit(X,Y)
    gbr_model.fit(X,Y)

    X = rookie_proj_df[var_stats]
    y_pred_regression = reg_model.predict(X)
    rookie_proj_df[stat + '_y_pred_regression'] = y_pred_regression
        
    y_pred_rand = rand_model.predict(X)
    rookie_proj_df[stat + '_y_pred_random_forrest'] = y_pred_rand

    svr_predictions = svr_model.predict(X)
    rookie_proj_df[stat + '_y_pred_svr'] = svr_predictions
    
    gbr_predictions = gbr_model.predict(X)
    rookie_proj_df[stat + '_y_pred_gbf'] = gbr_predictions

    '''print("\nMean Absolute Error for ", stat, " (Closest to 0, the better.)")
    print("GBR Model: ", mean_absolute_error(rookie_proj_df[stat+ '_y_pred_regression'], gbr_predictions))    
    print("Linear Regression: ", mean_absolute_error(rookie_proj_df[stat + '_y_pred_random_forrest'], y_pred_regression))
    print("Random Forrest: ", mean_absolute_error(rookie_proj_df[stat + '_y_pred_svr'], y_pred_rand))
    print("SVR Model: ", mean_absolute_error(rookie_proj_df[stat+ '_y_pred_gbf'], svr_predictions))'''
        
rookie_proj_df

# Perdicting Passing Yards Per Game & Fantasy Points Per Game

In [None]:
#Perdicting Passing Yards Per Game
predicted_stats = ['player']
model_list = ['_y_pred_regression','_y_pred_random_forrest', '_y_pred_gbf','_y_pred_svr']
filtered_stats = ['passing_yards', 'games']

for stat in filtered_stats:
    for model in model_list:
        mod_col = stat + model
        predicted_stats.append(mod_col)

for i in model_list:
    rookie_proj_df['pass_yards/g' + i] = rookie_proj_df[filtered_stats[0]+i] / rookie_proj_df[filtered_stats[1]+i]
    mod_col = 'pass_yards/g' + i
    predicted_stats.append(mod_col)
        
rookie_proj_df[predicted_stats].sort_values(by='pass_yards/g_y_pred_gbf', ascending=False)

In [None]:
#Perdicting Fantasy Pts Per Game
for model in model_list:
    rookie_proj_df['ff_scoring' + model] = 0
    for key, value in ff_scoring.items():
        rookie_proj_df['ff_scoring' + model] = (rookie_proj_df[key+model] * value) + rookie_proj_df['ff_scoring' + model]

for model in model_list:
    rookie_proj_df['ff_scoring/g' + model] = rookie_proj_df['ff_scoring' + model] / rookie_proj_df['games' + model]
    rookie_proj_df['ff_scoring/g' + model + '_rank'] = rookie_proj_df['ff_scoring/g' + model].rank(ascending=False)

model_rank_list = []
for model in model_list:
    mod_col = 'ff_scoring/g' + model + '_rank'
    model_rank_list.append(mod_col)
    
rookie_proj_df['ff_scoring/g_ovr_rank'] = rookie_proj_df[model_rank_list].mean(axis=1)
            
rookie_proj_df = rookie_proj_df.sort_values(by=['ff_scoring/g_ovr_rank','draft_ovr'], ascending=True)
    
sorted_col = ['player', 'draft_ovr','ff_scoring/g_y_pred_regression','ff_scoring/g_y_pred_random_forrest',
              'ff_scoring/g_y_pred_gbf','ff_scoring/g_y_pred_svr','ff_scoring/g_y_pred_regression_rank',
              'ff_scoring/g_y_pred_random_forrest_rank','ff_scoring/g_y_pred_gbf_rank','ff_scoring/g_y_pred_svr_rank',
              'ff_scoring/g_ovr_rank']

rookie_proj_df[sorted_col]