In [133]:
#Import Packages

import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import nfl_data_py as nfl
import time
import warnings
import cfbd

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [134]:
#Specifying Year Range
years = range(2014,2025)

In [135]:
#Pull in CSVs from Personal GitHubs
epa_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/Combined_EPA.csv")
blocking_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/blocking_stats.csv")
passing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/passing_stats.csv")
receiving_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/receiving_stats.csv")
rushing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/rushing_stats.csv")

power_5_team  = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/power_5_teams.csv")

# QB draft projections

In [136]:
#Creating Passing DF

qb_passing_df = passing_df[passing_df['position']=='QB'].fillna(0).sort_values(by='Season')

qb_passing_df = qb_passing_df.groupby(['player_id', 'player']).agg({'Season':'count', 'position':'last', 'team_name':'last',
       'player_game_count':'sum',  'aimed_passes':'sum', 'attempts':'sum', 'avg_depth_of_target':'mean', 'avg_time_to_throw':'mean', 'bats':'sum',
       'big_time_throws':'sum', 'completions':'sum','declined_penalties':'sum', 'def_gen_pressures':'sum', 'dropbacks':'sum',
       'drops':'sum', 'first_downs':'sum', 'grades_hands_fumble':'max','grades_offense':'max', 'grades_pass':'max', 'grades_run':'max', 'hit_as_threw':'sum',
       'interceptions':'sum', 'passing_snaps':'sum', 'penalties':'sum', 'pressure_to_sack_rate':'last',
       'qb_rating':'last', 'sacks':'sum', 'scrambles':'sum', 'spikes':'sum',
       'thrown_aways':'sum', 'touchdowns':'sum', 'turnover_worthy_plays':'sum','yards':'sum'})

qb_passing_df = qb_passing_df.reset_index()

#Creating Rushing DF

qb_rushing_df = rushing_df[rushing_df['position']=='QB'].fillna(0)

qb_rushing_df = qb_rushing_df.groupby(['player_id', 'player']).agg({'attempts':'sum',
       'avoided_tackles':'sum', 'breakaway_attempts':'sum',
       'breakaway_yards':'sum', 'designed_yards':'sum', 'elu_recv_mtf':'last', 'elu_rush_mtf':'last',
       'elu_yco':'last', 'elusive_rating':'last', 'explosive':'sum', 'first_downs':'sum',
       'fumbles':'sum', 'gap_attempts':'sum', 'grades_offense_penalty':'last',
       'grades_pass_block':'last', 'grades_pass_route':'last', 'grades_run_block':'last', 'longest':'max',
       'run_plays':'sum', 'scramble_yards':'sum','total_touches':'sum', 'touchdowns':'sum', 'yards':'sum',
       'yards_after_contact':'sum', 'yco_attempt':'sum', 'ypa':'mean', 'yprr':'mean',
       'zone_attempts':'sum'})

qb_rushing_df = qb_rushing_df.reset_index()

#Merging DFs into one
qb_career_df = pd.merge(qb_passing_df, qb_rushing_df, on=['player_id', 'player'], suffixes=('_passing','_rushing'))
qb_career_df = qb_career_df.fillna(0)

qb_career_df['yards_passing/G_career'] = qb_career_df['yards_passing'] / qb_career_df['player_game_count']
qb_career_df['yards_passing/att_career'] = qb_career_df['yards_passing'] / qb_career_df['attempts_passing']
qb_career_df['completion_pct_career'] = qb_career_df['completions'] / qb_career_df['attempts_passing']
qb_career_df['adj_completion_pct_career'] = (qb_career_df['completions'] + qb_career_df['drops']) / qb_career_df['aimed_passes']
qb_career_df['touchdown_pct_career'] = qb_career_df['touchdowns_passing'] / qb_career_df['attempts_passing']
qb_career_df['interception_pct_career'] = qb_career_df['interceptions'] / qb_career_df['attempts_passing']
qb_career_df['dangerous_play_pct_career'] = qb_career_df['turnover_worthy_plays'] / qb_career_df['attempts_passing']
qb_career_df['interception_pct_career'] = qb_career_df['interceptions'] / qb_career_df['attempts_passing']
qb_career_df['money_throw_pct_career'] = qb_career_df['big_time_throws'] / qb_career_df['attempts_passing']

qb_career_df.sort_values(by='player_game_count', ascending=False).head(15)

#qb_career_df.columns

Unnamed: 0,player_id,player,Season,position,team_name,player_game_count,aimed_passes,attempts_passing,avg_depth_of_target,avg_time_to_throw,...,yprr,zone_attempts,yards_passing/G_career,yards_passing/att_career,completion_pct_career,adj_completion_pct_career,touchdown_pct_career,interception_pct_career,dangerous_play_pct_career,money_throw_pct_career
1052,97790,Bo Nix,5,QB,OREGON,61,1826,1945,8.24,2.676,...,3.6,96,250.590164,7.859126,0.659126,0.761774,0.058098,0.013368,0.029306,0.045244
897,77618,Sam Hartman,6,QB,NOTRE DAME,60,1787,1907,11.6,2.92,...,0.0,42,261.166667,8.217095,0.594651,0.688304,0.070267,0.025695,0.039329,0.058731
888,75886,Chevan Cordeiro,6,QB,S JOSE ST,60,1529,1631,10.733333,2.891667,...,3.166667,42,202.7,7.456775,0.595953,0.70242,0.053955,0.01962,0.033722,0.059473
761,60363,Chase Brice,5,QB,APP STATE,58,1163,1231,10.4,2.374,...,0.0,41,162.982759,7.679123,0.597076,0.690456,0.059301,0.029245,0.045491,0.062551
785,60428,Levi Lewis,5,QB,LA LAFAYET,57,1085,1185,9.5,2.996,...,0.0,71,162.175439,7.800844,0.610127,0.731797,0.062447,0.01519,0.037975,0.048101
500,40291,Jalen Hurts,4,QB,OKLAHOMA,56,954,1050,9.5,3.11,...,3.0825,151,168.982143,9.012381,0.649524,0.765199,0.07619,0.019048,0.034286,0.049524
1059,97993,Jayden Daniels,5,QB,LSU,55,1372,1448,9.44,2.812,...,1.0,123,231.618182,8.797652,0.658149,0.755102,0.061464,0.013812,0.022099,0.047652
918,77651,Holton Ahlers,5,QB,E CAROLINA,55,1744,1869,9.26,2.666,...,4.15,99,252.109091,7.418941,0.602996,0.696674,0.051364,0.019797,0.037988,0.046549
575,46401,Jake Browning,4,QB,WASHINGTON,53,1388,1490,9.5,2.9375,...,0.5,13,231.698113,8.241611,0.642282,0.73487,0.063087,0.022819,0.046309,0.051678
638,46501,Clayton Thorson,4,QB,NWESTERN,53,1562,1702,8.525,2.7775,...,3.0,69,202.396226,6.302585,0.582844,0.703585,0.03584,0.026439,0.051116,0.03349


In [137]:
qb_passing_df = passing_df[passing_df['position']=='QB'].fillna(0).sort_values(by='Season')
qb_rushing_df = rushing_df[rushing_df['position']=='QB'].fillna(0).sort_values(by='Season')

qb_passing_df = qb_passing_df.groupby(['player_id', 'player']).last()
qb_rushing_df = qb_rushing_df.groupby(['player_id', 'player']).last()

qb_passing_df = qb_passing_df.reset_index()
qb_rushing_df = qb_rushing_df.reset_index()

qb_rushing_df = qb_rushing_df.drop(['Season', 'position', 'team_name', 'player_game_count','declined_penalties',
                                   'drops', 'franchise_id', 'grades_hands_fumble', 'grades_offense', 'grades_pass', 
                                    'grades_run', 'scrambles', 'rec_yards','receptions', 'routes', 'yprr'], axis=1)

qb_final_season_df = pd.merge(qb_passing_df, qb_rushing_df, on=['player_id', 'player'], suffixes=('_passing','_rushing'))

qb_final_season_df = qb_final_season_df.merge(power_5_team, how='left', on='team_name')

qb_final_season_df

Unnamed: 0,player_id,player,Season,position,team_name,player_game_count,accuracy_percent,aimed_passes,attempts_passing,avg_depth_of_target,...,scramble_yards,targets,total_touches,touchdowns_rushing,yards_rushing,yards_after_contact,yco_attempt,ypa_rushing,zone_attempts,power_5_team?
0,9434,Jameis Winston,2014,QB,FLORIDA ST,13,72.7,450,467,8.8,...,159,0,24,3,204,152.0,3.23,4.3,0,1
1,9435,Marcus Mariota,2014,QB,OREGON,15,76.7,434,444,9.8,...,325,1,71,15,942,339.0,3.17,8.8,1,1
2,9508,Garrett Grayson,2014,QB,COLO STATE,13,75.4,395,423,10.1,...,146,1,15,0,125,14.0,0.36,3.2,0,0
3,9522,Sean Mannion,2014,QB,OREGON ST,12,71.3,436,456,8.9,...,37,0,16,1,10,2.0,0.09,0.4,0,0
4,9536,Bryce Petty,2014,QB,BAYLOR,12,72.9,410,430,12.0,...,99,0,48,6,250,89.0,1.35,3.8,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1498,175940,Marcel Reed,2023,QB,TEXAS A&M,3,65.7,35,37,12.9,...,52,0,3,1,60,14.0,1.27,5.5,1,1
1499,176226,Hayden Timosciek,2023,QB,BOWL GREEN,1,60.0,10,10,6.5,...,2,0,2,0,0,7.0,1.75,0.0,0,0
1500,176367,D'Wanye Winfield,2023,QB,LA LAFAYET,1,0.0,0,1,0.0,...,0,0,4,0,33,14.0,3.50,8.3,4,0
1501,176573,Gavin Kuld,2023,QB,COLORADO,1,33.3,3,3,4.3,...,1,1,2,0,-2,9.0,3.00,-0.7,0,1


In [138]:
qb_stats_df = pd.merge(qb_career_df, qb_final_season_df, on=['player_id', 'player'], suffixes=('_career','_final_season'))
qb_stats_df['player_key'] = qb_stats_df['player'] +"_"+ qb_stats_df['position_career']

qb_stats_df

Unnamed: 0,player_id,player,Season_career,position_career,team_name_career,player_game_count_career,aimed_passes_career,attempts_passing_career,avg_depth_of_target_career,avg_time_to_throw_career,...,targets,total_touches_final_season,touchdowns_rushing_final_season,yards_rushing_final_season,yards_after_contact_final_season,yco_attempt_final_season,ypa_rushing,zone_attempts_final_season,power_5_team?,player_key
0,9434,Jameis Winston,1,QB,FLORIDA ST,13,450,467,8.8,2.74,...,0,24,3,204,152.0,3.23,4.3,0,1,Jameis Winston_QB
1,9435,Marcus Mariota,1,QB,OREGON,15,434,444,9.8,2.83,...,1,71,15,942,339.0,3.17,8.8,1,1,Marcus Mariota_QB
2,9508,Garrett Grayson,1,QB,COLO STATE,13,395,423,10.1,2.71,...,1,15,0,125,14.0,0.36,3.2,0,0,Garrett Grayson_QB
3,9522,Sean Mannion,1,QB,OREGON ST,12,436,456,8.9,2.76,...,0,16,1,10,2.0,0.09,0.4,0,0,Sean Mannion_QB
4,9536,Bryce Petty,1,QB,BAYLOR,12,410,430,12.0,2.38,...,0,48,6,250,89.0,1.35,3.8,0,1,Bryce Petty_QB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1498,175940,Marcel Reed,1,QB,TEXAS A&M,3,35,37,12.9,2.93,...,0,3,1,60,14.0,1.27,5.5,1,1,Marcel Reed_QB
1499,176226,Hayden Timosciek,1,QB,BOWL GREEN,1,10,10,6.5,2.45,...,0,2,0,0,7.0,1.75,0.0,0,0,Hayden Timosciek_QB
1500,176367,D'Wanye Winfield,1,QB,LA LAFAYET,1,0,1,0.0,5.10,...,0,4,0,33,14.0,3.50,8.3,4,0,D'Wanye Winfield_QB
1501,176573,Gavin Kuld,1,QB,COLORADO,1,3,3,4.3,3.32,...,1,2,0,-2,9.0,3.00,-0.7,0,1,Gavin Kuld_QB


In [139]:
#Adding Career EPA Numbers
epa_career_df = epa_df

#Bo Nix has 2 different plaer_ID correcting it here
epa_career_df.loc[epa_career_df['Id'] == 4567218, 'Id'] = 4426338

#Continued to add Career EPA Numbers
epa_career_df = epa_career_df.groupby(['Id', 'Name']).agg({'Position':'last', 'Season':'count', 'Team':'last', 'CountablePlays':'sum',
                                                                       'TotalPPA All':'sum', 'TotalPPA Pass':'sum',
                                                                       'TotalPPA Rush':'sum', 'TotalPPA FirstDown':'sum',
                                                                       'TotalPPA SecondDown':'sum','TotalPPA ThirdDown':'sum',
                                                                       'TotalPPA StandardDowns':'sum',
                                                                       'TotalPPA PassingDowns':'sum'})

column_list = ['TotalPPA All', 'TotalPPA Pass','TotalPPA Rush', 'TotalPPA FirstDown', 'TotalPPA SecondDown',
               'TotalPPA ThirdDown', 'TotalPPA StandardDowns','TotalPPA PassingDowns']

for col in column_list:
    average = epa_career_df[col] / epa_career_df['Season']
    epa_career_df[f"{col}_Season_avg"] = average
    
for col in column_list:
    average = epa_career_df[col] / epa_career_df['CountablePlays']
    epa_career_df[f"{col}_play_avg"] = average
    
epa_career_df = epa_career_df.reset_index()

epa_career_df.sort_values(by='TotalPPA All_Season_avg', ascending=False).head(10)

Unnamed: 0,Id,Name,Position,Season,Team,CountablePlays,TotalPPA All,TotalPPA Pass,TotalPPA Rush,TotalPPA FirstDown,...,TotalPPA StandardDowns_Season_avg,TotalPPA PassingDowns_Season_avg,TotalPPA All_play_avg,TotalPPA Pass_play_avg,TotalPPA Rush_play_avg,TotalPPA FirstDown_play_avg,TotalPPA SecondDown_play_avg,TotalPPA ThirdDown_play_avg,TotalPPA StandardDowns_play_avg,TotalPPA PassingDowns_play_avg
56,511459,Marcus Mariota,QB,1,Oregon,560,328.197,243.902,84.295,131.159,...,201.008,127.189,0.586066,0.435539,0.150527,0.234212,0.155539,0.186343,0.358943,0.227123
191,531316,Ryan Higgins,QB,1,Louisiana Tech,577,285.822,263.329,22.494,92.197,...,160.745,125.077,0.495359,0.456376,0.038984,0.159787,0.182549,0.154752,0.278588,0.216771
433,550373,Baker Mayfield,QB,3,Oklahoma,1419,820.991,737.414,83.577,247.273,...,156.393,117.271,0.57857,0.519672,0.058899,0.174259,0.22687,0.168548,0.330641,0.24793
42,504866,Brandon Doughty,QB,2,Western Kentucky,1068,534.204,532.437,1.768,182.125,...,149.2955,117.8065,0.500191,0.498537,0.001655,0.170529,0.130681,0.199272,0.27958,0.220611
784,3915511,Joe Burrow,QB,2,LSU,1073,525.125,450.47,74.655,230.34,...,154.707,107.8555,0.489399,0.419823,0.069576,0.214669,0.127656,0.143977,0.288363,0.201035
1028,4040616,Dwayne Haskins,QB,1,Ohio State,588,262.122,259.446,2.676,73.786,...,118.614,143.508,0.445786,0.441235,0.004551,0.125486,0.134153,0.175095,0.201724,0.244061
1238,4250360,Bailey Zappe,QB,1,Western Kentucky,558,258.147,255.04,3.107,81.78,...,113.791,144.357,0.462629,0.457061,0.005568,0.146559,0.193351,0.116787,0.203927,0.258704
1660,4431452,Drake Maye,QB,2,North Carolina,1176,500.32,384.954,115.365,109.738,...,119.493,130.6665,0.425442,0.327342,0.098099,0.093315,0.134134,0.174004,0.203219,0.222222
303,545238,Mitch Trubisky,QB,1,North Carolina,523,236.507,211.421,25.086,40.322,...,92.843,143.664,0.452212,0.404247,0.047966,0.077098,0.159438,0.215631,0.17752,0.274692
1636,4430841,Carson Beck,QB,1,Georgia,453,233.85,208.826,25.024,58.135,...,100.948,132.902,0.516225,0.460985,0.055241,0.128333,0.16581,0.22,0.222843,0.293382


In [140]:
epa_last_season_df = epa_df

epa_last_season_df = epa_last_season_df.groupby(['Id', 'Name']).last()
epa_last_season_df = epa_last_season_df.reset_index()
    
epa_last_season_df.sort_values(by='AveragePPA All', ascending=False)

Unnamed: 0,Id,Name,Season,Position,Team,Conference,CountablePlays,AveragePPA All,AveragePPA Pass,AveragePPA Rush,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All,TotalPPA Pass,TotalPPA Rush,TotalPPA FirstDown,TotalPPA SecondDown,TotalPPA ThirdDown,TotalPPA StandardDowns,TotalPPA PassingDowns
1640,4430878,Jaxon Smith-Njigba,2021,WR,Ohio State,Big Ten,100,1.158,1.158,,...,1.161,1.152,115.808,115.808,,47.454,32.300,28.633,78.940,36.868
739,3892889,Dede Westbrook,2016,WR,Oklahoma,Big 12,112,1.120,1.163,0.688,...,1.019,1.353,125.481,118.599,6.882,32.761,55.996,29.023,79.473,46.009
1403,4362628,Ja'Marr Chase,2019,WR,LSU,SEC,115,1.118,1.118,,...,0.979,1.478,128.568,128.568,,79.092,27.276,26.349,81.283,47.285
1284,4262921,Justin Jefferson,2019,WR,LSU,SEC,120,1.065,1.065,,...,0.843,1.526,127.791,127.791,,43.865,44.363,39.563,68.261,59.530
1150,4241478,DeVonta Smith,2020,WR,Alabama,SEC,133,1.062,1.087,-0.041,...,0.752,1.809,141.225,141.347,-0.122,50.576,60.079,32.535,70.679,70.546
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,531458,Martez Walker,2015,RB,Central Michigan,Mid-American,116,-0.144,-0.158,-0.140,...,-0.162,-0.086,-16.702,-3.960,-12.742,-11.296,-4.809,-0.596,-14.387,-2.315
850,3921652,Juwan Washington,2019,RB,San Diego State,Mountain West,175,-0.145,0.250,-0.205,...,-0.186,-0.004,-25.453,5.755,-31.209,-13.749,-6.830,-0.442,-25.285,-0.168
465,551918,Lee McNeill,2015,QB,Charlotte,American Athletic,198,-0.151,-0.123,-0.525,...,-0.168,-0.135,-29.891,-22.546,-7.345,-6.627,-9.123,-1.968,-15.990,-13.900
801,3916369,Emmanuel Esukpa,2018,RB,Rice,American Athletic,118,-0.153,-0.271,-0.151,...,-0.227,0.138,-17.996,-0.271,-17.725,-12.249,-3.664,-0.029,-21.306,3.310


In [141]:
epa_combined = pd.merge(epa_career_df, epa_last_season_df, on=['Id', 'Name'], suffixes=('_career','_last_season'))
epa_combined['player_key'] = epa_combined['Name'] + "_" + epa_combined['Position_last_season']
epa_combined

Unnamed: 0,Id,Name,Position_career,Season_career,Team_career,CountablePlays_career,TotalPPA All_career,TotalPPA Pass_career,TotalPPA Rush_career,TotalPPA FirstDown_career,...,AveragePPA PassingDowns,TotalPPA All_last_season,TotalPPA Pass_last_season,TotalPPA Rush_last_season,TotalPPA FirstDown_last_season,TotalPPA SecondDown_last_season,TotalPPA ThirdDown_last_season,TotalPPA StandardDowns_last_season,TotalPPA PassingDowns_last_season,player_key
0,102597,Will Rogers,QB,2,Mississippi State,823,189.209,186.001,3.207,30.545,...,0.472,63.981,57.961,6.020,-13.384,39.334,36.857,13.001,50.980,Will Rogers_QB
1,246044,Tyler Johnson,WR,2,Minnesota,221,184.509,182.168,2.341,53.416,...,1.253,100.674,98.333,2.341,27.260,53.544,23.887,48.058,52.615,Tyler Johnson_WR
2,381494,Christian Stewart,QB,1,BYU,410,124.076,102.243,21.833,9.459,...,0.402,124.076,102.243,21.833,9.459,46.485,60.699,54.553,69.524,Christian Stewart_QB
3,480746,Bryce Petty,QB,1,Baylor,496,164.458,154.135,10.323,57.695,...,0.399,164.458,154.135,10.323,57.695,68.513,36.135,98.582,65.876,Bryce Petty_QB
4,480846,C.J. Brown,QB,1,Maryland,456,37.636,7.241,30.395,-8.579,...,0.190,37.636,7.241,30.395,-8.579,-4.689,46.501,2.218,35.418,C.J. Brown_QB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,5150611,Jake Retzlaff,QB,1,BYU,122,2.491,-2.125,4.616,-25.692,...,0.313,2.491,-2.125,4.616,-25.692,4.521,16.249,-10.966,13.457,Jake Retzlaff_QB
1996,5151317,Robert Henry,RB,1,UT San Antonio,136,23.696,6.260,17.436,-1.802,...,0.303,23.696,6.260,17.436,-1.802,10.729,13.666,15.506,8.189,Robert Henry_RB
1997,5151618,Kirk Francis,QB,1,Tulsa,130,50.083,52.635,-2.552,12.702,...,0.516,50.083,52.635,-2.552,12.702,7.070,31.970,15.492,34.591,Kirk Francis_QB
1998,5151948,Joey Aguilar,QB,1,Appalachian State,506,233.142,206.784,26.358,53.645,...,0.758,233.142,206.784,26.358,53.645,55.998,94.937,91.408,141.734,Joey Aguilar_QB


In [142]:
qb_df = pd.merge(qb_stats_df, epa_combined, on=['player_key'], suffixes=('_career','_last_season'))
qb_df

Unnamed: 0,player_id,player,Season_career_career,position_career,team_name_career,player_game_count_career,aimed_passes_career,attempts_passing_career,avg_depth_of_target_career,avg_time_to_throw_career,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_last_season,TotalPPA Pass_last_season,TotalPPA Rush_last_season,TotalPPA FirstDown_last_season,TotalPPA SecondDown_last_season,TotalPPA ThirdDown_last_season,TotalPPA StandardDowns_last_season,TotalPPA PassingDowns_last_season
0,9434,Jameis Winston,1,QB,FLORIDA ST,13,450,467,8.8,2.74,...,0.314,0.562,204.921,193.518,11.403,60.167,58.180,92.300,99.334,105.587
1,9435,Marcus Mariota,1,QB,OREGON,15,434,444,9.8,2.83,...,0.535,0.691,328.197,243.902,84.295,131.159,87.102,104.352,201.008,127.189
2,9508,Garrett Grayson,1,QB,COLO STATE,13,395,423,10.1,2.71,...,0.351,0.573,191.450,190.264,1.187,46.817,48.079,91.420,83.128,108.323
3,9522,Sean Mannion,1,QB,OREGON ST,12,436,456,8.9,2.76,...,0.032,0.310,64.151,71.764,-7.612,17.645,14.385,41.326,8.733,55.418
4,9536,Bryce Petty,1,QB,BAYLOR,12,410,430,12.0,2.38,...,0.298,0.399,164.458,154.135,10.323,57.695,68.513,36.135,98.582,65.876
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
785,173296,Kirk Francis,1,QB,TULSA,4,117,121,14.6,3.01,...,0.246,0.516,50.083,52.635,-2.552,12.702,7.070,31.970,15.492,34.591
786,173983,Dante Moore,1,QB,UCLA,9,185,214,10.3,2.72,...,0.183,0.339,55.563,48.017,7.546,20.395,4.078,24.055,23.019,32.544
787,174761,McCae Hillstead,1,QB,UTAH ST,8,151,159,10.4,2.76,...,0.298,0.304,30.061,26.950,3.111,11.024,1.176,12.275,14.882,15.179
788,174978,Joey Aguilar,1,QB,APP STATE,14,441,464,10.8,2.51,...,0.287,0.758,233.142,206.784,26.358,53.645,55.998,94.937,91.408,141.734


In [143]:
#importing combine & draft dfs from API
combine_df  = nfl.import_combine_data(years)
draft_df = nfl.import_draft_picks(years)

In [144]:
qb_combine_df = combine_df[combine_df['pos'] == 'QB']

#removing brady davis from df to create simplicity within coding
qb_combine_df = qb_combine_df[qb_combine_df['cfb_id'] != 'brady-davis-2']

#editing height from Foot-Inches to only Inches
def extract_height(height_str):
    feet, inches = height_str.split('-')
    return int(feet) * 12 + int(inches)

qb_combine_df['ht'] = qb_combine_df['ht'].apply(extract_height) 

#Update player_name to player for merging simplicity
qb_combine_df = qb_combine_df.rename(columns={'player_name': 'player'})

qb_combine_df.sort_values(by='ht')

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
7516,2022,,,,,KingDE02,deriq-king-1,D'Eriq King,QB,Miami,69,196.0,,,,,,
7995,2023,2023.0,Carolina Panthers,1.0,1.0,YounBr01,bryce-young-1,Bryce Young,QB,Alabama,70,204.0,,,,,,
6444,2019,2019.0,Arizona Cardinals,1.0,1.0,MurrKy00,kyler-murray-1,Kyler Murray,QB,Oklahoma,70,207.0,,,,,,
5980,2018,,,,,,quinton-flowers-1,Quinton Flowers,QB,South Florida,70,214.0,4.63,,,112.0,6.81,4.57
5175,2015,,,,,SimsBl01,blake-sims-2,Blake Sims,QB,Alabama,71,218.0,4.57,,30.5,115.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4867,2014,2014.0,Arizona Cardinals,4.0,120.0,ThomLo00,logan-thomas-1,Logan Thomas,QB,Virginia Tech,78,248.0,4.61,,35.5,118.0,7.05,4.18
6690,2020,2020.0,Los Angeles Chargers,1.0,6.0,HerbJu00,justin-herbert-1,Justin Herbert,QB,Oregon,78,236.0,4.68,,35.5,123.0,7.06,4.46
5415,2016,2016.0,Denver Broncos,1.0,26.0,LyncPa00,paxton-lynch-1,Paxton Lynch,QB,Memphis,79,244.0,4.86,,36.0,118.0,7.14,4.26
6381,2019,,,,,JackTy01,tyree-jackson-1,Tyree Jackson,QB,Buffalo,79,249.0,4.59,,34.5,120.0,7.09,4.28


In [145]:
#Merging College Stats with Combine Measurements
prospect_df = qb_combine_df.merge(qb_df, on='player')

#Data Mungering
prospect_df = prospect_df[prospect_df['season'] != 2024] #as of 5/9/24 2024 data was blank. Will add later to make predictions
prospect_df['draft_ovr'] = prospect_df['draft_ovr'].fillna(256) #pick 256 will represent undrafted
prospect_df['draft_round'] = prospect_df['draft_round'].fillna(8) #round 8 will represent undrafted
prospect_df['draft_team'] = prospect_df['draft_team'].fillna('Undrafted')
prospect_df['draft_year'] = prospect_df['season']
prospect_df

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_last_season,TotalPPA Pass_last_season,TotalPPA Rush_last_season,TotalPPA FirstDown_last_season,TotalPPA SecondDown_last_season,TotalPPA ThirdDown_last_season,TotalPPA StandardDowns_last_season,TotalPPA PassingDowns_last_season
0,2015,2015,Undrafted,8.0,256.0,BoonAn00,anthony-boone-1,Anthony Boone,QB,Duke,...,0.062,0.255,71.989,53.743,18.246,3.804,10.231,58.490,18.413,53.576
1,2015,2015,Undrafted,8.0,256.0,BridBr00,brandon-bridge-1,Brandon Bridge,QB,South Alabama,...,0.007,0.330,53.410,37.205,16.205,-0.190,31.017,24.416,1.545,51.865
2,2015,2015,Undrafted,8.0,256.0,CardSh00,shane-carden-1,Shane Carden,QB,East Carolina,...,0.206,0.502,215.469,201.271,14.198,35.485,90.032,92.508,94.583,120.886
3,2015,2015,Undrafted,8.0,256.0,FajaCo00,cody-fajardo-1,Cody Fajardo,QB,Nevada,...,0.175,0.370,134.506,78.578,55.928,6.392,34.864,91.708,53.775,80.731
4,2015,2015,New Orleans Saints,3.0,75.0,GrayGa00,garrett-grayson-1,Garrett Grayson,QB,Colorado State,...,0.351,0.573,191.450,190.264,1.187,46.817,48.079,91.420,83.128,108.323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,2023,2023,Indianapolis Colts,1.0,4.0,RichAn03,anthony-richardson-2,Anthony Richardson,QB,Florida,...,0.258,0.579,165.958,99.343,66.614,9.656,76.207,63.780,62.891,103.067
127,2023,2023,Houston Texans,1.0,2.0,StroCJ00,cj-stroud-1,C.J. Stroud,QB,Ohio St.,...,0.428,0.599,195.922,189.971,5.952,64.655,42.245,84.105,115.047,80.876
128,2023,2023,Cleveland Browns,5.0,140.0,ThomDo02,dorian-thompson-robinson-1,Dorian Thompson-Robinson,QB,UCLA,...,0.361,0.602,217.221,151.899,65.322,81.775,46.858,89.071,119.016,98.206
129,2023,2023,Arizona Cardinals,5.0,139.0,TuneCl00,clayton-tune-1,Clayton Tune,QB,Houston,...,0.353,0.744,286.314,235.480,50.834,74.935,82.748,96.867,134.540,151.774


In [146]:
#columns to drop from the combined df
cols_to_remove = ['player_id','position_career','team_name_career','position_final_season', 'franchise_id', 'player_key', 
                  'Id', 'Name','Position_career', 'Season_career_last_season', 'Team_career', 'Season_last_season', 
                  'Position_last_season', 'Team_last_season', 'Conference']

prospect_df = prospect_df.drop(cols_to_remove, axis=1)
prospect_df

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_last_season,TotalPPA Pass_last_season,TotalPPA Rush_last_season,TotalPPA FirstDown_last_season,TotalPPA SecondDown_last_season,TotalPPA ThirdDown_last_season,TotalPPA StandardDowns_last_season,TotalPPA PassingDowns_last_season
0,2015,2015,Undrafted,8.0,256.0,BoonAn00,anthony-boone-1,Anthony Boone,QB,Duke,...,0.062,0.255,71.989,53.743,18.246,3.804,10.231,58.490,18.413,53.576
1,2015,2015,Undrafted,8.0,256.0,BridBr00,brandon-bridge-1,Brandon Bridge,QB,South Alabama,...,0.007,0.330,53.410,37.205,16.205,-0.190,31.017,24.416,1.545,51.865
2,2015,2015,Undrafted,8.0,256.0,CardSh00,shane-carden-1,Shane Carden,QB,East Carolina,...,0.206,0.502,215.469,201.271,14.198,35.485,90.032,92.508,94.583,120.886
3,2015,2015,Undrafted,8.0,256.0,FajaCo00,cody-fajardo-1,Cody Fajardo,QB,Nevada,...,0.175,0.370,134.506,78.578,55.928,6.392,34.864,91.708,53.775,80.731
4,2015,2015,New Orleans Saints,3.0,75.0,GrayGa00,garrett-grayson-1,Garrett Grayson,QB,Colorado State,...,0.351,0.573,191.450,190.264,1.187,46.817,48.079,91.420,83.128,108.323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,2023,2023,Indianapolis Colts,1.0,4.0,RichAn03,anthony-richardson-2,Anthony Richardson,QB,Florida,...,0.258,0.579,165.958,99.343,66.614,9.656,76.207,63.780,62.891,103.067
127,2023,2023,Houston Texans,1.0,2.0,StroCJ00,cj-stroud-1,C.J. Stroud,QB,Ohio St.,...,0.428,0.599,195.922,189.971,5.952,64.655,42.245,84.105,115.047,80.876
128,2023,2023,Cleveland Browns,5.0,140.0,ThomDo02,dorian-thompson-robinson-1,Dorian Thompson-Robinson,QB,UCLA,...,0.361,0.602,217.221,151.899,65.322,81.775,46.858,89.071,119.016,98.206
129,2023,2023,Arizona Cardinals,5.0,139.0,TuneCl00,clayton-tune-1,Clayton Tune,QB,Houston,...,0.353,0.744,286.314,235.480,50.834,74.935,82.748,96.867,134.540,151.774


In [147]:
#getting a list of attributes we will use for our model
var_stats = prospect_df.columns
var_stats = var_stats.drop(['draft_team','draft_round','draft_ovr','pfr_id','cfb_id','player','pos','school','draft_year',
                            'team_name_final_season'])

#filling in the empty combine stats with the median for that event
combine_stats = ['wt', 'forty','bench', 'vertical', 'broad_jump', 'cone', 'shuttle']

for stat in combine_stats:
    prospect_df[stat] = prospect_df[stat].fillna(prospect_df[stat].median())

prospect_df

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,AveragePPA StandardDowns,AveragePPA PassingDowns,TotalPPA All_last_season,TotalPPA Pass_last_season,TotalPPA Rush_last_season,TotalPPA FirstDown_last_season,TotalPPA SecondDown_last_season,TotalPPA ThirdDown_last_season,TotalPPA StandardDowns_last_season,TotalPPA PassingDowns_last_season
0,2015,2015,Undrafted,8.0,256.0,BoonAn00,anthony-boone-1,Anthony Boone,QB,Duke,...,0.062,0.255,71.989,53.743,18.246,3.804,10.231,58.490,18.413,53.576
1,2015,2015,Undrafted,8.0,256.0,BridBr00,brandon-bridge-1,Brandon Bridge,QB,South Alabama,...,0.007,0.330,53.410,37.205,16.205,-0.190,31.017,24.416,1.545,51.865
2,2015,2015,Undrafted,8.0,256.0,CardSh00,shane-carden-1,Shane Carden,QB,East Carolina,...,0.206,0.502,215.469,201.271,14.198,35.485,90.032,92.508,94.583,120.886
3,2015,2015,Undrafted,8.0,256.0,FajaCo00,cody-fajardo-1,Cody Fajardo,QB,Nevada,...,0.175,0.370,134.506,78.578,55.928,6.392,34.864,91.708,53.775,80.731
4,2015,2015,New Orleans Saints,3.0,75.0,GrayGa00,garrett-grayson-1,Garrett Grayson,QB,Colorado State,...,0.351,0.573,191.450,190.264,1.187,46.817,48.079,91.420,83.128,108.323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,2023,2023,Indianapolis Colts,1.0,4.0,RichAn03,anthony-richardson-2,Anthony Richardson,QB,Florida,...,0.258,0.579,165.958,99.343,66.614,9.656,76.207,63.780,62.891,103.067
127,2023,2023,Houston Texans,1.0,2.0,StroCJ00,cj-stroud-1,C.J. Stroud,QB,Ohio St.,...,0.428,0.599,195.922,189.971,5.952,64.655,42.245,84.105,115.047,80.876
128,2023,2023,Cleveland Browns,5.0,140.0,ThomDo02,dorian-thompson-robinson-1,Dorian Thompson-Robinson,QB,UCLA,...,0.361,0.602,217.221,151.899,65.322,81.775,46.858,89.071,119.016,98.206
129,2023,2023,Arizona Cardinals,5.0,139.0,TuneCl00,clayton-tune-1,Clayton Tune,QB,Houston,...,0.353,0.744,286.314,235.480,50.834,74.935,82.748,96.867,134.540,151.774


In [148]:
warnings.filterwarnings("ignore")

X = prospect_df[var_stats]
Y = prospect_df['draft_ovr']

reg_model = LinearRegression()
reg_model.fit(X,Y)
y_pred_regression = reg_model.predict(X)
prospect_df['y_pred_regression'] = y_pred_regression

rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
rand_model.fit(X,Y)
y_pred_rand = rand_model.predict(X)
prospect_df['y_pred_random_forrest'] = y_pred_rand

svr_model = SVR(kernel='rbf', C=1.0)
svr_model.fit(X, Y)
svr_predictions = svr_model.predict(X)
prospect_df['y_pred_svr'] = svr_predictions

gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
gbr_model.fit(X, Y)
gbr_predictions = gbr_model.predict(X)
prospect_df['y_pred_gbf'] = gbr_predictions

print("\nMean Absolute Error for ", prospect_df['pos'].iloc[0], " (Closest to 0, the better.)")
print("Linear Regression: ", mean_absolute_error(prospect_df['draft_ovr'], y_pred_regression))
print("Random Forrest: ", mean_absolute_error(prospect_df['draft_ovr'], y_pred_rand))
print("SVR Model: ", mean_absolute_error(prospect_df['draft_ovr'], svr_predictions))
print("GBR Model: ", mean_absolute_error(prospect_df['draft_ovr'], gbr_predictions))


Mean Absolute Error for  QB  (Closest to 0, the better.)
Linear Regression:  4.043432933683614e-11
Random Forrest:  27.28891090262805
SVR Model:  86.45140193280284
GBR Model:  2.7960930288023746


In [149]:
coefficients = reg_model.coef_
intercept = reg_model.intercept_

# Sort features and coefficients together by absolute coefficient value (descending order)
sorted_features_and_coefs = sorted(zip(var_stats, coefficients), key=lambda x: abs(x[1]), reverse=True)

# Print the formula with sorted features and coefficients
formula = "y = "
for feature, coef in sorted_features_and_coefs:
    if coef > 0:
        formula += f" +{abs(coef):.4f} {feature} \n"  # Add '+' for positive coefficients
    else:
        formula += f" -{abs(coef):.4f} {feature} \n"  # Add '-' for negative coefficients

formula += f" + {intercept:.4f}"  # Add intercept

print(formula)

y =  -34.1144 btt_rate 
 -34.0322 declined_penalties_final_season 
 -31.2394 yards_passing/att_career 
 -30.3045 penalties_rushing 
 +25.5781 player_game_count_career 
 +23.7008 avg_depth_of_target_final_season 
 +22.1452 spikes_final_season 
 -20.8701 completion_percent 
 +20.0678 grades_pass_final_season 
 +19.0833 grades_run_block_career 
 +19.0558 targets 
 -18.9984 twp_rate 
 -17.8501 breakaway_attempts_career 
 +17.4202 power_5_team? 
 -17.0641 bats_final_season 
 -16.8425 avg_depth_of_target_career 
 -16.6513 hit_as_threw_career 
 -16.5798 Season_career_career 
 -16.1835 explosive_final_season 
 -15.3673 sack_percent 
 -15.3514 TotalPPA ThirdDown_Season_avg 
 -14.8883 grades_offense_final_season 
 -14.8474 total_touches_final_season 
 -13.8607 penalties_passing 
 +13.7361 penalties 
 +13.4481 forty 
 +13.3526 interceptions_final_season 
 -13.2513 elu_recv_mtf_final_season 
 +12.7749 elu_recv_mtf_career 
 -12.5015 breakaway_attempts_final_season 
 +11.8607 grades_offense_career 


In [150]:
prospect_df[['player', 'school', 'draft_year', 'draft_ovr', 'y_pred_regression','y_pred_gbf']].sort_values('y_pred_gbf').head(25)

Unnamed: 0,player,school,draft_year,draft_ovr,y_pred_regression,y_pred_gbf
75,Joe Burrow,LSU,2020,1.0,1.0,-0.450313
94,Trevor Lawrence,Clemson,2021,1.0,1.0,-0.023432
68,Kyler Murray,Oklahoma,2019,1.0,1.0,1.254515
8,Marcus Mariota,Oregon,2015,2.0,2.0,2.177403
52,Baker Mayfield,Oklahoma,2018,1.0,1.0,2.477194
130,Bryce Young,Alabama,2023,1.0,1.0,3.010381
11,Jameis Winston,Florida State,2015,1.0,1.0,5.249567
18,Jared Goff,California,2016,1.0,1.0,5.426507
88,Tua Tagovailoa,Alabama,2020,5.0,5.0,6.10553
103,Zach Wilson,BYU,2021,2.0,2.0,7.34279


In [151]:
#Merging College Stats with Combine Measurements
rookie_prospect_df = qb_combine_df.merge(qb_df, on='player')

#Data Mungering
for stat in combine_stats:
    rookie_prospect_df[stat] = rookie_prospect_df[stat].fillna(rookie_prospect_df[stat].median())
    
rookie_prospect_df = rookie_prospect_df[rookie_prospect_df['season'] == 2024]
    
X = rookie_prospect_df[var_stats]
y_pred_regression = reg_model.predict(X)
rookie_prospect_df['y_pred_regression'] = y_pred_regression

gbr_predictions = gbr_model.predict(X)
rookie_prospect_df['y_pred_gbf'] = gbr_predictions

df_print = rookie_prospect_df[['player', 'school', 'draft_year', 'draft_ovr', 'y_pred_regression', 'y_pred_gbf']].sort_values(by='y_pred_gbf')
df_print['gbf_proj_round'] = (df_print['y_pred_gbf']//32) + 1
df_print

Unnamed: 0,player,school,draft_year,draft_ovr,y_pred_regression,y_pred_gbf,gbf_proj_round
143,Caleb Williams,USC,,,41.17332,20.61019,1.0
135,Drake Maye,North Carolina,,,-382.457433,32.880836,2.0
131,Jayden Daniels,LSU,,,-96.040407,34.806356,2.0
136,J.J. McCarthy,Michigan,,,310.873978,38.668959,2.0
137,Bo Nix,Oregon,,,-110.360977,39.001342,2.0
133,Michael Penix Jr.,Washington,,,-268.420384,121.387055,4.0
142,Jordan Travis,Florida St.,,,15.520852,133.555505,5.0
139,Spencer Rattler,South Carolina,,,-306.342061,147.623447,5.0
141,Kedon Slovis,BYU,,,-156.190931,168.987864,6.0
134,Devin Leary,Kentucky,,,-126.631095,184.637983,6.0


# Creating Rookie Projections

In [152]:
rookie_stats_df = nfl.import_seasonal_data(years[:-1])

rookie_stats_df = rookie_stats_df[['player_id', 'season', 'games', 'season_type', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions']]

rookie_stats_df = rookie_stats_df.sort_values(by='season')
rookie_stats_df = rookie_stats_df.groupby(by='player_id').first()
rookie_stats_df = rookie_stats_df.reset_index()

rookie_stats_df

Unnamed: 0,player_id,season,games,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,...,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions
0,00-0007091,2014,4,REG,30,44,301.0,2,0.0,2.0,...,2.540580,0.304226,8,-11.0,0,0.0,0.0,0.0,-7.756981,0
1,00-0010346,2014,16,REG,395,597,4727.0,39,15.0,17.0,...,15.767253,2.556234,24,-24.0,0,3.0,1.0,0.0,-24.812411,0
2,00-0018227,2015,1,REG,0,0,0.0,0,0.0,0.0,...,0.000000,0.000000,1,-3.0,0,0.0,0.0,0.0,-0.917858,0
3,00-0019596,2014,16,REG,373,582,4109.0,33,9.0,21.0,...,14.613688,2.111506,36,57.0,0,1.0,0.0,11.0,-4.367861,0
4,00-0019714,2014,1,REG,1,1,10.0,0,0.0,0.0,...,0.000000,0.000000,0,0.0,0,0.0,0.0,0.0,0.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1962,00-0039150,2023,16,REG,315,527,2877.0,11,10.0,62.0,...,12.786642,0.672251,39,253.0,0,2.0,0.0,18.0,21.616300,1
1963,00-0039152,2023,9,REG,149,255,1808.0,8,4.0,28.0,...,6.151852,0.462094,25,57.0,1,2.0,2.0,7.0,-9.428327,0
1964,00-0039163,2023,15,REG,319,499,4108.0,23,5.0,38.0,...,13.869473,1.603286,39,157.0,3,2.0,1.0,17.0,6.004496,1
1965,00-0039164,2023,4,REG,50,84,577.0,3,1.0,7.0,...,5.912336,0.224194,25,136.0,4,2.0,1.0,8.0,0.430104,0


In [153]:
id_df = nfl.import_ids()
print(id_df.columns)
id_df = id_df[['pfr_id','gsis_id','name']]
id_df

Index(['mfl_id', 'sportradar_id', 'fantasypros_id', 'gsis_id', 'pff_id',
       'sleeper_id', 'nfl_id', 'espn_id', 'yahoo_id', 'fleaflicker_id',
       'cbs_id', 'pfr_id', 'cfbref_id', 'rotowire_id', 'rotoworld_id',
       'ktc_id', 'stats_id', 'stats_global_id', 'fantasy_data_id', 'swish_id',
       'name', 'merge_name', 'position', 'team', 'birthdate', 'age',
       'draft_year', 'draft_round', 'draft_pick', 'draft_ovr',
       'twitter_username', 'height', 'weight', 'college', 'db_season'],
      dtype='object')


Unnamed: 0,pfr_id,gsis_id,name
0,,,Caleb Williams
1,,,Drake Maye
2,,,Jayden Daniels
3,,,Bo Nix
4,,,Michael Penix Jr.
...,...,...,...
11651,,,Doug Brien
11652,,,Jeremy Brigham
11653,,,Vincent Brisby
11654,,,Bubby Brister


In [154]:
proj_df = prospect_df.merge(id_df, on='pfr_id')
proj_df = proj_df.merge(rookie_stats_df, left_on='gsis_id', right_on='player_id')

proj_df.sort_values(by='passing_yards', ascending=False)

Unnamed: 0,season_x,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions
1546,2020,2020,Los Angeles Chargers,1.0,6.0,HerbJu00,justin-herbert-1,Justin Herbert,QB,Oregon,...,15.286666,1.934134,55,234.0,5,4.0,0.0,20.0,0.664679,0
1570,2023,2023,Houston Texans,1.0,2.0,StroCJ00,cj-stroud-1,C.J. Stroud,QB,Ohio St.,...,13.869473,1.603286,39,157.0,3,2.0,1.0,17.0,6.004496,1
4,2015,2015,Tampa Bay Buccaneers,1.0,1.0,WinsJa00,jameis-winston-1,Jameis Winston,QB,Florida State,...,12.848774,1.819690,54,213.0,6,3.0,1.0,18.0,2.977389,0
1554,2021,2021,New England Patriots,1.0,15.0,JoneMa05,mac-jones-1,Mac Jones,QB,Alabama,...,18.102397,1.989122,44,129.0,0,3.0,1.0,22.0,-0.205186,0
1529,2018,2018,Cleveland Browns,1.0,1.0,MayfBa00,baker-mayfield-1,Baker Mayfield,QB,Oklahoma,...,12.114106,1.773421,39,131.0,0,3.0,1.0,8.0,-9.446474,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,2018,2018,Undrafted,8.0,256.0,,quinton-flowers-1,Quinton Flowers,QB,South Florida,...,0.000000,0.000000,0,0.0,0,0.0,0.0,0.0,0.000000,0
655,2018,2018,Undrafted,8.0,256.0,,austin-allen-1,Austin Allen,QB,Arkansas,...,0.000000,0.000000,0,0.0,0,0.0,0.0,0.0,0.000000,0
654,2022,2022,Undrafted,8.0,256.0,,,Cole Kelley,QB,Southeastern Louisiana,...,0.000000,0.000000,2,12.0,0,0.0,0.0,1.0,0.569428,0
653,2021,2021,Undrafted,8.0,256.0,,david-moore-5,David Moore,QB,Central Michigan,...,0.000000,0.000000,2,12.0,0,0.0,0.0,1.0,0.569428,0


In [155]:
duplicates = proj_df['pfr_id'].duplicated()
proj_df = proj_df[~duplicates]

proj_df = proj_df.rename(columns={'season_x':'season'})

proj_df.sort_values(by='passing_yards', ascending=False)

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions
1546,2020,2020,Los Angeles Chargers,1.0,6.0,HerbJu00,justin-herbert-1,Justin Herbert,QB,Oregon,...,15.286666,1.934134,55,234.0,5,4.0,0.0,20.0,0.664679,0
1570,2023,2023,Houston Texans,1.0,2.0,StroCJ00,cj-stroud-1,C.J. Stroud,QB,Ohio St.,...,13.869473,1.603286,39,157.0,3,2.0,1.0,17.0,6.004496,1
4,2015,2015,Tampa Bay Buccaneers,1.0,1.0,WinsJa00,jameis-winston-1,Jameis Winston,QB,Florida State,...,12.848774,1.819690,54,213.0,6,3.0,1.0,18.0,2.977389,0
1554,2021,2021,New England Patriots,1.0,15.0,JoneMa05,mac-jones-1,Mac Jones,QB,Alabama,...,18.102397,1.989122,44,129.0,0,3.0,1.0,22.0,-0.205186,0
1529,2018,2018,Cleveland Browns,1.0,1.0,MayfBa00,baker-mayfield-1,Baker Mayfield,QB,Oklahoma,...,12.114106,1.773421,39,131.0,0,3.0,1.0,8.0,-9.446474,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1539,2019,2019,Baltimore Ravens,6.0,197.0,McSoTr00,trace-mcsorley-1,Trace McSorley,QB,Penn State,...,0.000000,0.000000,1,1.0,0,0.0,0.0,1.0,0.453161,0
24,2017,2017,New York Giants,3.0,87.0,WebbDa00,davis-webb-1,Davis Webb,QB,California,...,0.000000,0.000000,2,-3.0,0,0.0,0.0,0.0,0.000000,0
25,2018,2018,Undrafted,8.0,256.0,,austin-allen-1,Austin Allen,QB,Arkansas,...,0.000000,0.000000,1,1.0,0,0.0,0.0,1.0,1.848790,0
1552,2021,2021,Indianapolis Colts,6.0,218.0,EhliSa00,sam-ehlinger-1,Sam Ehlinger,QB,Texas,...,0.000000,0.000000,3,9.0,0,0.0,0.0,1.0,0.041919,0


In [156]:
#getting a list of attributes we will use for our model
var_stats = prospect_df.columns
var_stats = var_stats.drop(['draft_team','draft_round','pfr_id','cfb_id','player','pos','school','draft_year',
                            'team_name_final_season'])

In [157]:
perdicted_stats = ['games', 'passing_yards','passing_tds', 'interceptions', 'passing_2pt_conversions', 'sack_fumbles_lost',
 'rushing_yards','rushing_tds','rushing_2pt_conversions', 'rushing_fumbles_lost', 'rushing_first_downs']

gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
reg_model = LinearRegression()
rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
svr_model = SVR(kernel='rbf', C=1.0)

for stat in perdicted_stats:
    X = proj_df[var_stats]
    Y = proj_df[stat]

    reg_model.fit(X,Y)
    y_pred_regression = reg_model.predict(X)
    proj_df[stat + '_y_pred_regression'] = y_pred_regression

    rand_model.fit(X,Y)
    y_pred_rand = rand_model.predict(X)
    proj_df[stat + '_y_pred_random_forrest'] = y_pred_rand

    svr_model.fit(X, Y)
    svr_predictions = svr_model.predict(X)
    proj_df[stat + '_y_pred_svr'] = svr_predictions
    
    gbr_model.fit(X, Y)
    gbr_predictions = gbr_model.predict(X)
    proj_df[stat + '_y_pred_gbf'] = gbr_predictions

    print("\nMean Absolute Error for ", stat, " (Closest to 0, the better.)")
    print("GBR Model: ", mean_absolute_error(proj_df[stat], gbr_predictions))    
    print("Linear Regression: ", mean_absolute_error(proj_df[stat], y_pred_regression))
    print("Random Forrest: ", mean_absolute_error(proj_df[stat], y_pred_rand))
    print("SVR Model: ", mean_absolute_error(proj_df[stat], svr_predictions))


Mean Absolute Error for  games  (Closest to 0, the better.)
GBR Model:  1.7223208208624954
Linear Regression:  1.1866063687193673e-13
Random Forrest:  1.2584615384615385
SVR Model:  4.478168247884045

Mean Absolute Error for  passing_yards  (Closest to 0, the better.)
GBR Model:  449.7712007091242
Linear Regression:  3.222946058182667e-11
Random Forrest:  307.57641025641027
SVR Model:  1059.1832417965886

Mean Absolute Error for  passing_tds  (Closest to 0, the better.)
GBR Model:  2.779803951756834
Linear Regression:  1.9554136088117957e-13
Random Forrest:  2.1148717948717946
SVR Model:  6.06427599656568

Mean Absolute Error for  interceptions  (Closest to 0, the better.)
GBR Model:  1.6960617024062288
Linear Regression:  1.48787648868165e-13
Random Forrest:  1.3425641025641024
SVR Model:  3.752201344383211

Mean Absolute Error for  passing_2pt_conversions  (Closest to 0, the better.)
GBR Model:  0.24627474682644154
Linear Regression:  4.6801081528731934e-14
Random Forrest:  0.152820

In [158]:
ff_scoring = {'passing_yards':.04,'passing_tds':4, 'interceptions':-2, 'passing_2pt_conversions':2, 'sack_fumbles_lost':-2,
 'rushing_yards':.1,'rushing_tds':6,'rushing_2pt_conversions':2, 'rushing_fumbles_lost':-2, 'rushing_first_downs':.5}

proj_df['ff_scoring'] = 0

for key, value in ff_scoring.items():
    proj_df['ff_scoring'] = (proj_df[key] * value) + proj_df['ff_scoring']
    
proj_df['ff_scoring/g'] = proj_df['ff_scoring'] / proj_df['games']

proj_df[['season','draft_team', 'player', 'games', 'ff_scoring', 'ff_scoring/g']].sort_values(by='ff_scoring/g', ascending=False).head(10)

Unnamed: 0,season,draft_team,player,games,ff_scoring,ff_scoring/g
23,2017,Houston Texans,Deshaun Watson,7,175.86,25.122857
1546,2020,Los Angeles Chargers,Justin Herbert,15,342.84,22.856
1559,2022,Washington Commanders,Sam Howell,1,20.26,20.26
1569,2023,Indianapolis Colts,Anthony Richardson,4,76.68,19.17
1525,2018,Buffalo Bills,Josh Allen,12,227.36,18.946667
1570,2023,Houston Texans,C.J. Stroud,15,282.52,18.834667
1541,2019,Arizona Cardinals,Kyler Murray,16,298.78,18.67375
15,2016,Dallas Cowboys,Dak Prescott,16,297.38,18.58625
1543,2020,Cincinnati Bengals,Joe Burrow,10,180.72,18.072
4,2015,Tampa Bay Buccaneers,Jameis Winston,16,283.98,17.74875


In [159]:
proj_df[['season', 'draft_team', 'player', 'games', 'games_y_pred_gbf', 'games_y_pred_random_forrest', 'passing_yards', 'passing_yards_y_pred_gbf', 'passing_yards_y_pred_random_forrest']].sort_values(by='passing_yards_y_pred_random_forrest', ascending=False).head(10)

Unnamed: 0,season,draft_team,player,games,games_y_pred_gbf,games_y_pred_random_forrest,passing_yards,passing_yards_y_pred_gbf,passing_yards_y_pred_random_forrest
1570,2023,Houston Texans,C.J. Stroud,15,12.507361,14.846154,4108.0,3128.318243,4084.384615
1546,2020,Los Angeles Chargers,Justin Herbert,15,12.507361,14.230769,4336.0,3065.37864,3923.846154
1554,2021,New England Patriots,Mac Jones,17,12.079434,15.615385,3801.0,2975.474575,3449.846154
4,2015,Tampa Bay Buccaneers,Jameis Winston,16,12.673493,13.461538,4042.0,3002.625124,3444.769231
1541,2019,Arizona Cardinals,Kyler Murray,16,12.77837,14.692308,3722.0,2966.700922,3402.615385
1540,2019,Jacksonville Jaguars,Gardner Minshew,14,11.868787,13.923077,3271.0,2644.354491,3301.461538
1529,2018,Cleveland Browns,Baker Mayfield,14,12.467808,13.307692,3725.0,3052.656859,3223.615385
1555,2021,Jacksonville Jaguars,Trevor Lawrence,17,12.507361,15.615385,3641.0,2888.549598,3123.153846
1573,2023,Carolina Panthers,Bryce Young,16,12.957634,14.307692,2877.0,2475.787618,3051.846154
1537,2019,New York Giants,Daniel Jones,13,11.890303,11.384615,3027.0,2341.625155,2892.076923


In [164]:
'''draft_df[draft_df['season'] == 2024]
draft_df = draft_df.rename(columns={'cfb_player_id':'cfb_id','pick':'draft_ovr'})

rookie_proj_df = rookie_prospect_df.merge(draft_df, on='cfb_id', how='inner')

rookie_proj_df.drop(['season_x','draft_ovr_x'])
for col in rookie_proj_df.columns:
    print(col)'''

season_x
draft_year
draft_team
draft_round
draft_ovr_x
pfr_id
cfb_id
player
pos
school
ht
wt
forty
bench
vertical
broad_jump
cone
shuttle
player_id
Season_career_career
position_career
team_name_career
player_game_count_career
aimed_passes_career
attempts_passing_career
avg_depth_of_target_career
avg_time_to_throw_career
bats_career
big_time_throws_career
completions_career
declined_penalties_career
def_gen_pressures_career
dropbacks_career
drops_career
first_downs_passing_career
grades_hands_fumble_career
grades_offense_career
grades_pass_career
grades_run_career
hit_as_threw_career
interceptions_career
passing_snaps_career
penalties
pressure_to_sack_rate_career
qb_rating_career
sacks_career
scrambles_career
spikes_career
thrown_aways_career
touchdowns_passing_career
turnover_worthy_plays_career
yards_passing_career
attempts_rushing_career
avoided_tackles_career
breakaway_attempts_career
breakaway_yards_career
designed_yards_career
elu_recv_mtf_career
elu_rush_mtf_career
elu_yco_caree

In [165]:
draft_df

Unnamed: 0,season,round,draft_ovr,team,gsis_id,pfr_player_id,cfb_id,pfr_player_name,hof,position,...,pass_ints,rush_atts,rush_yards,rush_tds,receptions,rec_yards,rec_tds,def_solo_tackles,def_ints,def_sacks
9593,2014,1,1,HOU,00-0031364,ClowJa00,jadeveon-clowney-1,Jadeveon Clowney,False,DE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,248.0,1.0,52.5
9594,2014,1,2,STL,,RobiGr01,greg-robinson-3,Greg Robinson,False,T,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
9595,2014,1,3,JAX,00-0031407,BortBl00,blake-bortles-1,Blake Bortles,False,QB,...,75.0,283.0,1766.0,8.0,1.0,20.0,1.0,,,
9596,2014,1,4,BUF,00-0031325,WatkSa00,sammy-watkins-1,Sammy Watkins,False,WR,...,1.0,11.0,76.0,0.0,364.0,5384.0,34.0,1.0,,
9597,2014,1,5,OAK,00-0031040,MackKh00,khalil-mack-1,Khalil Mack,False,LB,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,442.0,3.0,101.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12408,2024,7,253,LAC,,JohnCo02,cornelius-johnson-1,Cornelius Johnson,False,WR,...,,,,,,,,,,
12409,2024,7,254,LAR,,LeveKT00,kaitori-leveston-1,KT Leveston,False,OL,...,,,,,,,,,,
12410,2024,7,255,GNB,,KingKa00,kalen-king-1,Kalen King,False,CB,...,,,,,,,,,,
12411,2024,7,256,DEN,,GargNi00,nick-gargiulo-1,Nick Gargiulo,False,OL,...,,,,,,,,,,


In [161]:
'''for stat in perdicted_stats:
    X = proj_df[var_stats]
    Y = proj_df[stat]

    reg_model.fit(X,Y)
    gbr_model.fit(X, Y)
    
    X = rookie_prospect_df[var_stats]
    y_pred_regression = reg_model.predict(X)
    rookie_prospect_df[stat + '_y_pred_regression'] = y_pred_regression
    
    gbr_predictions = gbr_model.predict(X)
    rookie_prospect_df[stat + '_y_pred_gbf'] = gbr_predictions'''

rookie_prospect_df

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,TotalPPA All_last_season,TotalPPA Pass_last_season,TotalPPA Rush_last_season,TotalPPA FirstDown_last_season,TotalPPA SecondDown_last_season,TotalPPA ThirdDown_last_season,TotalPPA StandardDowns_last_season,TotalPPA PassingDowns_last_season,y_pred_regression,y_pred_gbf
131,2024,,,,,,jayden-daniels-1,Jayden Daniels,QB,LSU,...,304.103,209.381,94.722,128.812,100.006,76.436,190.917,113.186,-96.040407,34.806356
132,2024,,,,,,sam-hartman-1,Sam Hartman,QB,Notre Dame,...,160.738,149.084,11.654,27.465,68.102,61.326,83.163,77.575,9.079442,208.366817
133,2024,,,,,,michael-penix-jr-1,Michael Penix Jr.,QB,Washington,...,260.756,242.012,18.744,95.907,77.048,79.215,162.738,98.017,-268.420384,121.387055
134,2024,,,,,,devin-leary-1,Devin Leary,QB,Kentucky,...,127.943,118.574,9.369,28.185,46.038,63.954,64.285,63.658,-126.631095,184.637983
135,2024,,,,,,drake-maye-1,Drake Maye,QB,North Carolina,...,215.657,164.166,51.491,39.397,83.48,84.65,109.686,105.971,-382.457433,32.880836
136,2024,,,,,,jj-mccarthy-1,J.J. McCarthy,QB,Michigan,...,198.441,171.814,26.627,47.009,52.532,81.41,101.53,96.911,310.873978,38.668959
137,2024,,,,,,bo-nix-1,Bo Nix,QB,Oregon,...,285.842,259.785,26.058,78.016,110.44,92.002,159.46,126.382,-110.360977,39.001342
138,2024,,,,,,michael-pratt-1,Michael Pratt,QB,Tulane,...,146.728,123.083,23.645,34.6,43.911,62.176,81.137,65.591,-427.363868,213.848354
139,2024,,,,,,spencer-rattler-1,Spencer Rattler,QB,South Carolina,...,164.241,136.582,27.659,47.215,66.433,54.367,84.333,79.909,-306.342061,147.623447
140,2024,,,,,,austin-reed-2,Austin Reed,QB,Western Kentucky,...,186.845,166.228,20.617,28.055,45.713,104.335,83.471,103.374,-264.240713,190.8754


In [162]:
rookie_prospect_df[['season', 'draft_team', 'player','passing_yards_y_pred_gbf', 'passing_tds_y_pred_gbf','rushing_yards_y_pred_gbf', 'rushing_tds_y_pred_gbf']].sort_values(by='passing_yards_y_pred_gbf', ascending=False)

KeyError: "['passing_yards_y_pred_gbf', 'passing_tds_y_pred_gbf', 'rushing_yards_y_pred_gbf', 'rushing_tds_y_pred_gbf'] not in index"