In [53]:
#Import Packages

import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import nfl_data_py as nfl
import time
import warnings
import cfbd

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [54]:
#Specifying Year Range
years = range(2014,2025)

In [55]:
#Pull in CSVs from Personal GitHubs
epa_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/Combined_EPA.csv")
blocking_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/blocking_stats.csv")
passing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/passing_stats.csv")
receiving_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/receiving_stats.csv")
rushing_df = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/rushing_stats.csv")

power_5_team  = pd.read_csv("https://raw.githubusercontent.com/JoshKepler/Portfolio/main/NFL%20Project/Data/power_5_teams.csv")

# QB draft projections

In [56]:
#Creating Passing DF

qb_passing_df = passing_df[passing_df['position']=='QB'].fillna(0).sort_values(by='Season')

qb_passing_df = qb_passing_df.groupby(['player_id', 'player']).agg({'Season':'count', 'position':'last', 'team_name':'last',
       'player_game_count':'sum',  'aimed_passes':'sum', 'attempts':'sum', 'avg_depth_of_target':'mean', 'avg_time_to_throw':'mean', 'bats':'sum',
       'big_time_throws':'sum', 'completions':'sum','declined_penalties':'sum', 'def_gen_pressures':'sum', 'dropbacks':'sum',
       'drops':'sum', 'first_downs':'sum', 'grades_hands_fumble':'max','grades_offense':'max', 'grades_pass':'max', 'grades_run':'max', 'hit_as_threw':'sum',
       'interceptions':'sum', 'passing_snaps':'sum', 'penalties':'sum', 'pressure_to_sack_rate':'last',
       'qb_rating':'last', 'sacks':'sum', 'scrambles':'sum', 'spikes':'sum',
       'thrown_aways':'sum', 'touchdowns':'sum', 'turnover_worthy_plays':'sum','yards':'sum'})

qb_passing_df = qb_passing_df.reset_index()

#Creating Rushing DF

qb_rushing_df = rushing_df[rushing_df['position']=='QB'].fillna(0)

qb_rushing_df = qb_rushing_df.groupby(['player_id', 'player']).agg({'attempts':'sum',
       'avoided_tackles':'sum', 'breakaway_attempts':'sum',
       'breakaway_yards':'sum', 'designed_yards':'sum', 'elu_recv_mtf':'last', 'elu_rush_mtf':'last',
       'elu_yco':'last', 'elusive_rating':'last', 'explosive':'sum', 'first_downs':'sum',
       'fumbles':'sum', 'gap_attempts':'sum', 'grades_offense_penalty':'last',
       'grades_pass_block':'last', 'grades_pass_route':'last', 'grades_run_block':'last', 'longest':'max',
       'run_plays':'sum', 'scramble_yards':'sum','total_touches':'sum', 'touchdowns':'sum', 'yards':'sum',
       'yards_after_contact':'sum', 'yco_attempt':'sum', 'ypa':'mean', 'yprr':'mean',
       'zone_attempts':'sum'})

qb_rushing_df = qb_rushing_df.reset_index()

#Merging DFs into one
qb_career_df = pd.merge(qb_passing_df, qb_rushing_df, on=['player_id', 'player'], suffixes=('_passing','_rushing'))
qb_career_df = qb_career_df.fillna(0)

qb_career_df['yards_passing/G_career'] = qb_career_df['yards_passing'] / qb_career_df['player_game_count']
qb_career_df['yards_passing/att_career'] = qb_career_df['yards_passing'] / qb_career_df['attempts_passing']
qb_career_df['completion_pct_career'] = qb_career_df['completions'] / qb_career_df['attempts_passing']
qb_career_df['adj_completion_pct_career'] = (qb_career_df['completions'] + qb_career_df['drops']) / qb_career_df['aimed_passes']
qb_career_df['touchdown_pct_career'] = qb_career_df['touchdowns_passing'] / qb_career_df['attempts_passing']
qb_career_df['interception_pct_career'] = qb_career_df['interceptions'] / qb_career_df['attempts_passing']
qb_career_df['dangerous_play_pct_career'] = qb_career_df['turnover_worthy_plays'] / qb_career_df['attempts_passing']
qb_career_df['interception_pct_career'] = qb_career_df['interceptions'] / qb_career_df['attempts_passing']
qb_career_df['money_throw_pct_career'] = qb_career_df['big_time_throws'] / qb_career_df['attempts_passing']

#qb_career_df.sort_values(by='player_game_count', ascending=False).head(15)

In [57]:
qb_passing_df = passing_df[passing_df['position']=='QB'].fillna(0).sort_values(by='Season')
qb_rushing_df = rushing_df[rushing_df['position']=='QB'].fillna(0).sort_values(by='Season')

qb_passing_df = qb_passing_df.groupby(['player_id', 'player']).last()
qb_rushing_df = qb_rushing_df.groupby(['player_id', 'player']).last()

qb_passing_df = qb_passing_df.reset_index()
qb_rushing_df = qb_rushing_df.reset_index()

qb_rushing_df = qb_rushing_df.drop(['Season', 'position', 'team_name', 'player_game_count','declined_penalties',
                                   'drops', 'franchise_id', 'grades_hands_fumble', 'grades_offense', 'grades_pass', 
                                    'grades_run', 'scrambles', 'rec_yards','receptions', 'routes', 'yprr'], axis=1)

qb_final_season_df = pd.merge(qb_passing_df, qb_rushing_df, on=['player_id', 'player'], suffixes=('_passing','_rushing'))

qb_final_season_df = qb_final_season_df.merge(power_5_team, how='left', on='team_name')

#qb_final_season_df

In [58]:
qb_stats_df = pd.merge(qb_career_df, qb_final_season_df, on=['player_id', 'player'], suffixes=('_career','_final_season'))
qb_stats_df['player_key'] = qb_stats_df['player'] +"_"+ qb_stats_df['position_career']

#qb_stats_df

In [59]:
#Adding Career EPA Numbers
epa_career_df = epa_df

#Bo Nix has 2 different plaer_ID correcting it here
epa_career_df.loc[epa_career_df['Id'] == 4567218, 'Id'] = 4426338

#Continued to add Career EPA Numbers
epa_career_df = epa_career_df.groupby(['Id', 'Name']).agg({'Position':'last', 'Season':'count', 'Team':'last', 'CountablePlays':'sum',
                                                                       'TotalPPA All':'sum', 'TotalPPA Pass':'sum',
                                                                       'TotalPPA Rush':'sum', 'TotalPPA FirstDown':'sum',
                                                                       'TotalPPA SecondDown':'sum','TotalPPA ThirdDown':'sum',
                                                                       'TotalPPA StandardDowns':'sum',
                                                                       'TotalPPA PassingDowns':'sum'})

column_list = ['TotalPPA All', 'TotalPPA Pass','TotalPPA Rush', 'TotalPPA FirstDown', 'TotalPPA SecondDown',
               'TotalPPA ThirdDown', 'TotalPPA StandardDowns','TotalPPA PassingDowns']

for col in column_list:
    average = epa_career_df[col] / epa_career_df['Season']
    epa_career_df[f"{col}_Season_avg"] = average
    
for col in column_list:
    average = epa_career_df[col] / epa_career_df['CountablePlays']
    epa_career_df[f"{col}_play_avg"] = average
    
epa_career_df = epa_career_df.reset_index()

#epa_career_df.sort_values(by='TotalPPA All_Season_avg', ascending=False).head(10)

In [60]:
epa_last_season_df = epa_df

epa_last_season_df = epa_last_season_df.groupby(['Id', 'Name']).last()
epa_last_season_df = epa_last_season_df.reset_index()
    
#epa_last_season_df.sort_values(by='AveragePPA All', ascending=False)

In [61]:
epa_combined = pd.merge(epa_career_df, epa_last_season_df, on=['Id', 'Name'], suffixes=('_career','_last_season'))
epa_combined['player_key'] = epa_combined['Name'] + "_" + epa_combined['Position_last_season']
#epa_combined

In [62]:
qb_df = pd.merge(qb_stats_df, epa_combined, on=['player_key'], suffixes=('_career','_last_season'))
#qb_df

In [63]:
#importing combine & draft dfs from API
combine_df  = nfl.import_combine_data(years)
draft_df = nfl.import_draft_picks(years)

In [64]:
qb_combine_df = combine_df[combine_df['pos'] == 'QB']

#removing brady davis from df to create simplicity within coding
qb_combine_df = qb_combine_df[qb_combine_df['cfb_id'] != 'brady-davis-2']

#editing height from Foot-Inches to only Inches
def extract_height(height_str):
    feet, inches = height_str.split('-')
    return int(feet) * 12 + int(inches)

qb_combine_df['ht'] = qb_combine_df['ht'].apply(extract_height) 

#Update player_name to player for merging simplicity
qb_combine_df = qb_combine_df.rename(columns={'player_name': 'player'})

#qb_combine_df.sort_values(by='ht')

In [65]:
#Merging College Stats with Combine Measurements
prospect_df = qb_combine_df.merge(qb_df, on='player')

#Data Mungering
prospect_df = prospect_df[prospect_df['season'] != 2024] #as of 5/9/24 2024 data was blank. Will add later to make predictions
prospect_df['draft_ovr'] = prospect_df['draft_ovr'].fillna(256) #pick 256 will represent undrafted
prospect_df['draft_round'] = prospect_df['draft_round'].fillna(8) #round 8 will represent undrafted
prospect_df['draft_team'] = prospect_df['draft_team'].fillna('Undrafted')
prospect_df['draft_year'] = prospect_df['season']
#prospect_df

In [66]:
#columns to drop from the combined df
cols_to_remove = ['player_id','position_career','team_name_career','position_final_season', 'franchise_id', 'player_key', 
                  'Id', 'Name','Position_career', 'Season_career_last_season', 'Team_career', 'Season_last_season', 
                  'Position_last_season', 'Team_last_season', 'Conference']

prospect_df = prospect_df.drop(cols_to_remove, axis=1)
#prospect_df

In [67]:
#getting a list of attributes we will use for our model
var_stats = prospect_df.columns
var_stats = var_stats.drop(['draft_team','draft_round','draft_ovr','pfr_id','cfb_id','player','pos','school','draft_year',
                            'team_name_final_season'])

#filling in the empty combine stats with the median for that event
combine_stats = ['wt', 'forty','bench', 'vertical', 'broad_jump', 'cone', 'shuttle']

for stat in combine_stats:
    prospect_df[stat] = prospect_df[stat].fillna(prospect_df[stat].median())

#prospect_df

In [68]:
warnings.filterwarnings("ignore")

X = prospect_df[var_stats]
Y = prospect_df['draft_ovr']

reg_model = LinearRegression()
reg_model.fit(X,Y)
y_pred_regression = reg_model.predict(X)
prospect_df['y_pred_regression'] = y_pred_regression

rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
rand_model.fit(X,Y)
y_pred_rand = rand_model.predict(X)
prospect_df['y_pred_random_forrest'] = y_pred_rand

svr_model = SVR(kernel='rbf', C=1.0)
svr_model.fit(X, Y)
svr_predictions = svr_model.predict(X)
prospect_df['y_pred_svr'] = svr_predictions

gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
gbr_model.fit(X, Y)
gbr_predictions = gbr_model.predict(X)
prospect_df['y_pred_gbf'] = gbr_predictions

'''print("\nMean Absolute Error for ", prospect_df['pos'].iloc[0], " (Closest to 0, the better.)")
print("Linear Regression: ", mean_absolute_error(prospect_df['draft_ovr'], y_pred_regression))
print("Random Forrest: ", mean_absolute_error(prospect_df['draft_ovr'], y_pred_rand))
print("SVR Model: ", mean_absolute_error(prospect_df['draft_ovr'], svr_predictions))
print("GBR Model: ", mean_absolute_error(prospect_df['draft_ovr'], gbr_predictions))'''

'print("\nMean Absolute Error for ", prospect_df[\'pos\'].iloc[0], " (Closest to 0, the better.)")\nprint("Linear Regression: ", mean_absolute_error(prospect_df[\'draft_ovr\'], y_pred_regression))\nprint("Random Forrest: ", mean_absolute_error(prospect_df[\'draft_ovr\'], y_pred_rand))\nprint("SVR Model: ", mean_absolute_error(prospect_df[\'draft_ovr\'], svr_predictions))\nprint("GBR Model: ", mean_absolute_error(prospect_df[\'draft_ovr\'], gbr_predictions))'

In [69]:
'''coefficients = reg_model.coef_
intercept = reg_model.intercept_

# Sort features and coefficients together by absolute coefficient value (descending order)
sorted_features_and_coefs = sorted(zip(var_stats, coefficients), key=lambda x: abs(x[1]), reverse=True)

# Print the formula with sorted features and coefficients
formula = "y = "
for feature, coef in sorted_features_and_coefs:
    if coef > 0:
        formula += f" +{abs(coef):.4f} {feature} \n"  # Add '+' for positive coefficients
    else:
        formula += f" -{abs(coef):.4f} {feature} \n"  # Add '-' for negative coefficients

formula += f" + {intercept:.4f}"  # Add intercept

print(formula)'''

'coefficients = reg_model.coef_\nintercept = reg_model.intercept_\n\n# Sort features and coefficients together by absolute coefficient value (descending order)\nsorted_features_and_coefs = sorted(zip(var_stats, coefficients), key=lambda x: abs(x[1]), reverse=True)\n\n# Print the formula with sorted features and coefficients\nformula = "y = "\nfor feature, coef in sorted_features_and_coefs:\n    if coef > 0:\n        formula += f" +{abs(coef):.4f} {feature} \n"  # Add \'+\' for positive coefficients\n    else:\n        formula += f" -{abs(coef):.4f} {feature} \n"  # Add \'-\' for negative coefficients\n\nformula += f" + {intercept:.4f}"  # Add intercept\n\nprint(formula)'

In [70]:
#prospect_df[['player', 'school', 'draft_year', 'draft_ovr', 'y_pred_regression','y_pred_gbf']].sort_values('y_pred_gbf').head(25)

In [71]:
#Merging College Stats with Combine Measurements
rookie_prospect_df = qb_combine_df.merge(qb_df, on='player')

#Data Mungering
for stat in combine_stats:
    rookie_prospect_df[stat] = rookie_prospect_df[stat].fillna(rookie_prospect_df[stat].median())
    
rookie_prospect_df = rookie_prospect_df[rookie_prospect_df['season'] == 2024]
    
X = rookie_prospect_df[var_stats]
y_pred_regression = reg_model.predict(X)
rookie_prospect_df['y_pred_regression'] = y_pred_regression

gbr_predictions = gbr_model.predict(X)
rookie_prospect_df['y_pred_gbf'] = gbr_predictions

df_print = rookie_prospect_df[['player', 'school', 'draft_year', 'draft_ovr', 'y_pred_regression', 'y_pred_gbf']].sort_values(by='y_pred_gbf')
df_print['gbf_proj_round'] = (df_print['y_pred_gbf']//32) + 1
df_print

Unnamed: 0,player,school,draft_year,draft_ovr,y_pred_regression,y_pred_gbf,gbf_proj_round
143,Caleb Williams,USC,,,41.17332,15.12257,1.0
136,J.J. McCarthy,Michigan,,,310.873978,33.601572,2.0
135,Drake Maye,North Carolina,,,-382.457433,36.84053,2.0
131,Jayden Daniels,LSU,,,-96.040407,37.116519,2.0
137,Bo Nix,Oregon,,,-110.360977,45.493559,2.0
133,Michael Penix Jr.,Washington,,,-268.420384,120.527842,4.0
142,Jordan Travis,Florida St.,,,15.520852,138.975548,5.0
139,Spencer Rattler,South Carolina,,,-306.342061,147.623447,5.0
141,Kedon Slovis,BYU,,,-156.190931,160.28155,6.0
134,Devin Leary,Kentucky,,,-126.631095,182.272836,6.0


# Creating Rookie Projections

In [72]:
rookie_stats_df = nfl.import_seasonal_data(years[:-1])

rookie_stats_df = rookie_stats_df[['player_id', 'season', 'games', 'season_type', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions']]

rookie_stats_df = rookie_stats_df.sort_values(by='season')
rookie_stats_df = rookie_stats_df.groupby(by='player_id').first()
rookie_stats_df = rookie_stats_df.reset_index()

rookie_stats_df

Unnamed: 0,player_id,season,games,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,...,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions
0,00-0007091,2014,4,REG,30,44,301.0,2,0.0,2.0,...,2.540580,0.304226,8,-11.0,0,0.0,0.0,0.0,-7.756981,0
1,00-0010346,2014,16,REG,395,597,4727.0,39,15.0,17.0,...,15.767253,2.556234,24,-24.0,0,3.0,1.0,0.0,-24.812411,0
2,00-0018227,2015,1,REG,0,0,0.0,0,0.0,0.0,...,0.000000,0.000000,1,-3.0,0,0.0,0.0,0.0,-0.917858,0
3,00-0019596,2014,16,REG,373,582,4109.0,33,9.0,21.0,...,14.613688,2.111506,36,57.0,0,1.0,0.0,11.0,-4.367861,0
4,00-0019714,2014,1,REG,1,1,10.0,0,0.0,0.0,...,0.000000,0.000000,0,0.0,0,0.0,0.0,0.0,0.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1962,00-0039150,2023,16,REG,315,527,2877.0,11,10.0,62.0,...,12.786642,0.672251,39,253.0,0,2.0,0.0,18.0,21.616300,1
1963,00-0039152,2023,9,REG,149,255,1808.0,8,4.0,28.0,...,6.151852,0.462094,25,57.0,1,2.0,2.0,7.0,-9.428327,0
1964,00-0039163,2023,15,REG,319,499,4108.0,23,5.0,38.0,...,13.869473,1.603286,39,157.0,3,2.0,1.0,17.0,6.004496,1
1965,00-0039164,2023,4,REG,50,84,577.0,3,1.0,7.0,...,5.912336,0.224194,25,136.0,4,2.0,1.0,8.0,0.430104,0


In [73]:
id_df = nfl.import_ids()
print(id_df.columns)
id_df = id_df[['pfr_id','gsis_id','name']]
#id_df

Index(['mfl_id', 'sportradar_id', 'fantasypros_id', 'gsis_id', 'pff_id',
       'sleeper_id', 'nfl_id', 'espn_id', 'yahoo_id', 'fleaflicker_id',
       'cbs_id', 'pfr_id', 'cfbref_id', 'rotowire_id', 'rotoworld_id',
       'ktc_id', 'stats_id', 'stats_global_id', 'fantasy_data_id', 'swish_id',
       'name', 'merge_name', 'position', 'team', 'birthdate', 'age',
       'draft_year', 'draft_round', 'draft_pick', 'draft_ovr',
       'twitter_username', 'height', 'weight', 'college', 'db_season'],
      dtype='object')


In [74]:
proj_df = prospect_df.merge(id_df, on='pfr_id')
proj_df = proj_df.merge(rookie_stats_df, left_on='gsis_id', right_on='player_id')

proj_df.sort_values(by='passing_yards', ascending=False)

Unnamed: 0,season_x,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions
1546,2020,2020,Los Angeles Chargers,1.0,6.0,HerbJu00,justin-herbert-1,Justin Herbert,QB,Oregon,...,15.286666,1.934134,55,234.0,5,4.0,0.0,20.0,0.664679,0
1570,2023,2023,Houston Texans,1.0,2.0,StroCJ00,cj-stroud-1,C.J. Stroud,QB,Ohio St.,...,13.869473,1.603286,39,157.0,3,2.0,1.0,17.0,6.004496,1
4,2015,2015,Tampa Bay Buccaneers,1.0,1.0,WinsJa00,jameis-winston-1,Jameis Winston,QB,Florida State,...,12.848774,1.819690,54,213.0,6,3.0,1.0,18.0,2.977389,0
1554,2021,2021,New England Patriots,1.0,15.0,JoneMa05,mac-jones-1,Mac Jones,QB,Alabama,...,18.102397,1.989122,44,129.0,0,3.0,1.0,22.0,-0.205186,0
1529,2018,2018,Cleveland Browns,1.0,1.0,MayfBa00,baker-mayfield-1,Baker Mayfield,QB,Oklahoma,...,12.114106,1.773421,39,131.0,0,3.0,1.0,8.0,-9.446474,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,2018,2018,Undrafted,8.0,256.0,,quinton-flowers-1,Quinton Flowers,QB,South Florida,...,0.000000,0.000000,0,0.0,0,0.0,0.0,0.0,0.000000,0
655,2018,2018,Undrafted,8.0,256.0,,austin-allen-1,Austin Allen,QB,Arkansas,...,0.000000,0.000000,0,0.0,0,0.0,0.0,0.0,0.000000,0
654,2022,2022,Undrafted,8.0,256.0,,,Cole Kelley,QB,Southeastern Louisiana,...,0.000000,0.000000,2,12.0,0,0.0,0.0,1.0,0.569428,0
653,2021,2021,Undrafted,8.0,256.0,,david-moore-5,David Moore,QB,Central Michigan,...,0.000000,0.000000,2,12.0,0,0.0,0.0,1.0,0.569428,0


In [75]:
duplicates = proj_df['pfr_id'].duplicated()
proj_df = proj_df[~duplicates]

proj_df = proj_df.rename(columns={'season_x':'season'})

proj_df.sort_values(by='passing_yards', ascending=False)

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions
1546,2020,2020,Los Angeles Chargers,1.0,6.0,HerbJu00,justin-herbert-1,Justin Herbert,QB,Oregon,...,15.286666,1.934134,55,234.0,5,4.0,0.0,20.0,0.664679,0
1570,2023,2023,Houston Texans,1.0,2.0,StroCJ00,cj-stroud-1,C.J. Stroud,QB,Ohio St.,...,13.869473,1.603286,39,157.0,3,2.0,1.0,17.0,6.004496,1
4,2015,2015,Tampa Bay Buccaneers,1.0,1.0,WinsJa00,jameis-winston-1,Jameis Winston,QB,Florida State,...,12.848774,1.819690,54,213.0,6,3.0,1.0,18.0,2.977389,0
1554,2021,2021,New England Patriots,1.0,15.0,JoneMa05,mac-jones-1,Mac Jones,QB,Alabama,...,18.102397,1.989122,44,129.0,0,3.0,1.0,22.0,-0.205186,0
1529,2018,2018,Cleveland Browns,1.0,1.0,MayfBa00,baker-mayfield-1,Baker Mayfield,QB,Oklahoma,...,12.114106,1.773421,39,131.0,0,3.0,1.0,8.0,-9.446474,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1539,2019,2019,Baltimore Ravens,6.0,197.0,McSoTr00,trace-mcsorley-1,Trace McSorley,QB,Penn State,...,0.000000,0.000000,1,1.0,0,0.0,0.0,1.0,0.453161,0
24,2017,2017,New York Giants,3.0,87.0,WebbDa00,davis-webb-1,Davis Webb,QB,California,...,0.000000,0.000000,2,-3.0,0,0.0,0.0,0.0,0.000000,0
25,2018,2018,Undrafted,8.0,256.0,,austin-allen-1,Austin Allen,QB,Arkansas,...,0.000000,0.000000,1,1.0,0,0.0,0.0,1.0,1.848790,0
1552,2021,2021,Indianapolis Colts,6.0,218.0,EhliSa00,sam-ehlinger-1,Sam Ehlinger,QB,Texas,...,0.000000,0.000000,3,9.0,0,0.0,0.0,1.0,0.041919,0


In [76]:
#getting a list of attributes we will use for our model
var_stats = prospect_df.columns
var_stats = var_stats.drop(['draft_team','draft_round','pfr_id','cfb_id','player','pos','school','draft_year',
                            'team_name_final_season','y_pred_random_forrest', 'y_pred_svr'])

In [77]:
perdicted_stats = ['games', 'passing_yards','passing_tds', 'interceptions', 'passing_2pt_conversions', 'sack_fumbles_lost',
 'rushing_yards','rushing_tds','rushing_2pt_conversions', 'rushing_fumbles_lost', 'rushing_first_downs']

gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
reg_model = LinearRegression()
rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
svr_model = SVR(kernel='rbf', C=1.0)

for stat in perdicted_stats:
    X = proj_df[var_stats]
    Y = proj_df[stat]

    reg_model.fit(X,Y)
    y_pred_regression = reg_model.predict(X)
    proj_df[stat + '_y_pred_regression'] = y_pred_regression

    rand_model.fit(X,Y)
    y_pred_rand = rand_model.predict(X)
    proj_df[stat + '_y_pred_random_forrest'] = y_pred_rand

    svr_model.fit(X, Y)
    svr_predictions = svr_model.predict(X)
    proj_df[stat + '_y_pred_svr'] = svr_predictions
    
    gbr_model.fit(X, Y)
    gbr_predictions = gbr_model.predict(X)
    proj_df[stat + '_y_pred_gbf'] = gbr_predictions

    '''print("\nMean Absolute Error for ", stat, " (Closest to 0, the better.)")
    print("GBR Model: ", mean_absolute_error(proj_df[stat], gbr_predictions))    
    print("Linear Regression: ", mean_absolute_error(proj_df[stat], y_pred_regression))
    print("Random Forrest: ", mean_absolute_error(proj_df[stat], y_pred_rand))
    print("SVR Model: ", mean_absolute_error(proj_df[stat], svr_predictions))'''

In [78]:
ff_scoring = {'passing_yards':.04,'passing_tds':4, 'interceptions':-2, 'passing_2pt_conversions':2, 'sack_fumbles_lost':-2,
 'rushing_yards':.1,'rushing_tds':6,'rushing_2pt_conversions':2, 'rushing_fumbles_lost':-2, 'rushing_first_downs':.5}

def ff_scoring_method(df, text_mod):
    proj_df['ff_scoring'+ text_mod] = 0
    for key, value in ff_scoring.items():
        df['ff_scoring' + text_mod] = (df[key+text_mod] * value) + df['ff_scoring' + text_mod]

    df['ff_scoring/g' + text_mod] = df['ff_scoring' + text_mod] / df['games' + text_mod]

ff_scoring_method(proj_df, '')
proj_df[['season','draft_team', 'player', 'games', 'ff_scoring', 'ff_scoring/g']].sort_values(by='ff_scoring/g', ascending=False).head(10)

Unnamed: 0,season,draft_team,player,games,ff_scoring,ff_scoring/g
23,2017,Houston Texans,Deshaun Watson,7,175.86,25.122857
1546,2020,Los Angeles Chargers,Justin Herbert,15,342.84,22.856
1559,2022,Washington Commanders,Sam Howell,1,20.26,20.26
1569,2023,Indianapolis Colts,Anthony Richardson,4,76.68,19.17
1525,2018,Buffalo Bills,Josh Allen,12,227.36,18.946667
1570,2023,Houston Texans,C.J. Stroud,15,282.52,18.834667
1541,2019,Arizona Cardinals,Kyler Murray,16,298.78,18.67375
15,2016,Dallas Cowboys,Dak Prescott,16,297.38,18.58625
1543,2020,Cincinnati Bengals,Joe Burrow,10,180.72,18.072
4,2015,Tampa Bay Buccaneers,Jameis Winston,16,283.98,17.74875


In [79]:
#proj_df[['season', 'draft_team', 'player', 'games', 'games_y_pred_gbf', 'games_y_pred_random_forrest', 'passing_yards', 'passing_yards_y_pred_gbf', 'passing_yards_y_pred_random_forrest']].sort_values(by='passing_yards_y_pred_random_forrest', ascending=False).head(10)

In [80]:
draft_df = draft_df.rename(columns={'cfb_player_id':'cfb_id','pick':'draft_ovr'})

rookie_proj_df = rookie_prospect_df.merge(draft_df[['cfb_id', 'draft_ovr']], on='cfb_id')
rookie_proj_df['draft_ovr_x'] = rookie_proj_df['draft_ovr_y']
rookie_proj_df = rookie_proj_df.drop(columns=['draft_ovr_y'])
rookie_proj_df = rookie_proj_df.rename(columns={'draft_ovr_x':'draft_ovr'})

#rookie_proj_df

In [81]:
gbr_model = GradientBoostingRegressor(n_estimators=(X.shape[0]), learning_rate=0.1)
reg_model = LinearRegression()
rand_model = RandomForestRegressor(n_estimators=(X.shape[0]), random_state=42)
svr_model = SVR(kernel='rbf', C=1.0)

for stat in perdicted_stats:
    X = proj_df[var_stats]
    Y = proj_df[stat]

    reg_model.fit(X,Y)
    rand_model.fit(X,Y)
    svr_model.fit(X,Y)
    gbr_model.fit(X,Y)

    X = rookie_proj_df[var_stats]
    y_pred_regression = reg_model.predict(X)
    rookie_proj_df[stat + '_y_pred_regression'] = y_pred_regression
        
    y_pred_rand = rand_model.predict(X)
    rookie_proj_df[stat + '_y_pred_random_forrest'] = y_pred_rand

    svr_predictions = svr_model.predict(X)
    rookie_proj_df[stat + '_y_pred_svr'] = svr_predictions
    
    gbr_predictions = gbr_model.predict(X)
    rookie_proj_df[stat + '_y_pred_gbf'] = gbr_predictions

    '''print("\nMean Absolute Error for ", stat, " (Closest to 0, the better.)")
    print("GBR Model: ", mean_absolute_error(rookie_proj_df[stat+ '_y_pred_regression'], gbr_predictions))    
    print("Linear Regression: ", mean_absolute_error(rookie_proj_df[stat + '_y_pred_random_forrest'], y_pred_regression))
    print("Random Forrest: ", mean_absolute_error(rookie_proj_df[stat + '_y_pred_svr'], y_pred_rand))
    print("SVR Model: ", mean_absolute_error(rookie_proj_df[stat+ '_y_pred_gbf'], svr_predictions))'''
        
rookie_proj_df

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player,pos,school,...,rushing_2pt_conversions_y_pred_svr,rushing_2pt_conversions_y_pred_gbf,rushing_fumbles_lost_y_pred_regression,rushing_fumbles_lost_y_pred_random_forrest,rushing_fumbles_lost_y_pred_svr,rushing_fumbles_lost_y_pred_gbf,rushing_first_downs_y_pred_regression,rushing_first_downs_y_pred_random_forrest,rushing_first_downs_y_pred_svr,rushing_first_downs_y_pred_gbf
0,2024,,,,2,,jayden-daniels-1,Jayden Daniels,QB,LSU,...,0.112011,0.30094,2.318363,1.373333,0.194189,1.514555,40.397816,18.293333,4.497634,16.388092
1,2024,,,,8,,michael-penix-jr-1,Michael Penix Jr.,QB,Washington,...,0.088527,0.028067,-0.340495,0.186667,0.10717,0.315831,53.113761,9.626667,4.151787,12.167619
2,2024,,,,218,,devin-leary-1,Devin Leary,QB,Kentucky,...,0.097471,0.04804,0.818915,0.106667,0.055108,-0.001431,28.893148,5.546667,3.964002,2.948724
3,2024,,,,3,,drake-maye-1,Drake Maye,QB,North Carolina,...,0.101286,0.024609,1.644515,0.76,0.159806,0.849241,54.775274,14.04,4.631323,15.510851
4,2024,,,,10,,jj-mccarthy-1,J.J. McCarthy,QB,Michigan,...,0.09796,0.117486,0.291964,0.186667,0.117898,0.27499,-29.408895,10.533333,4.57977,8.370842
5,2024,,,,12,,bo-nix-1,Bo Nix,QB,Oregon,...,0.096266,1.011397,0.097402,0.906667,0.113632,0.979317,18.003764,16.693333,4.235968,16.01269
6,2024,,,,245,,michael-pratt-1,Michael Pratt,QB,Tulane,...,0.095919,0.021192,2.959919,0.173333,0.103516,-0.0026,54.198732,7.426667,4.261238,5.676745
7,2024,,,,150,,spencer-rattler-1,Spencer Rattler,QB,South Carolina,...,0.09973,0.021615,0.645059,0.08,0.064781,-0.003817,42.055538,6.72,4.06893,4.826319
8,2024,,,,171,,jordan-travis-1,Jordan Travis,QB,Florida St.,...,0.095061,0.027855,0.222386,0.6,0.170774,0.322881,23.960355,7.866667,4.614978,8.127617
9,2024,,,,1,,caleb-williams-3,Caleb Williams,QB,USC,...,0.102292,0.020276,-0.799356,0.44,0.117835,0.62897,-12.952113,16.013333,4.388153,20.731574


# Perdicting Passing Yards Per Game & Fantasy Points Per Game

In [82]:
#Perdicting Passing Yards Per Game
predicted_stats = ['player']
model_list = ['_y_pred_regression','_y_pred_random_forrest', '_y_pred_gbf','_y_pred_svr']
filtered_stats = ['passing_yards', 'games']

for stat in filtered_stats:
    for model in model_list:
        mod_col = stat + model
        predicted_stats.append(mod_col)

for i in model_list:
    rookie_proj_df['pass_yards/g' + i] = rookie_proj_df[filtered_stats[0]+i] / rookie_proj_df[filtered_stats[1]+i]
    mod_col = 'pass_yards/g' + i
    predicted_stats.append(mod_col)
        
rookie_proj_df[predicted_stats].sort_values(by='pass_yards/g_y_pred_gbf', ascending=False)

Unnamed: 0,player,passing_yards_y_pred_regression,passing_yards_y_pred_random_forrest,passing_yards_y_pred_gbf,passing_yards_y_pred_svr,games_y_pred_regression,games_y_pred_random_forrest,games_y_pred_gbf,games_y_pred_svr,pass_yards/g_y_pred_regression,pass_yards/g_y_pred_random_forrest,pass_yards/g_y_pred_gbf,pass_yards/g_y_pred_svr
1,Michael Penix Jr.,16089.472941,1772.8,1283.348741,781.460578,52.299919,5.986667,4.221818,5.9648,307.638583,296.124722,303.980136,131.012025
5,Bo Nix,8468.201114,2184.813333,1676.089794,781.0319,25.422856,8.026667,6.880464,5.834731,333.094014,272.194352,243.601268,133.859116
0,Jayden Daniels,9047.170494,2315.12,2193.785425,780.607795,25.840925,13.48,13.764807,6.267846,350.110164,171.744807,159.376401,124.54164
9,Caleb Williams,-1457.015025,2114.32,2226.740718,780.888456,2.977072,12.786667,14.267815,6.264987,-489.412028,165.353493,156.067396,124.643275
4,J.J. McCarthy,-6057.6918,1462.053333,1230.521951,779.98867,-15.516949,10.426667,8.110027,5.936754,390.391939,140.222506,151.728472,131.383026
3,Drake Maye,16689.722564,2127.88,1817.476483,780.528439,58.0428,12.013333,12.176503,6.330366,287.541652,177.126526,149.260957,123.299106
2,Devin Leary,7901.803278,921.266667,643.375106,780.595875,28.280602,5.066667,6.866087,5.786167,279.407186,181.828947,93.703309,134.90726
7,Spencer Rattler,15655.784034,1239.186667,864.76675,780.825157,54.569535,8.093333,9.296243,5.964579,286.896051,153.112026,93.023252,130.910348
8,Jordan Travis,4593.925091,831.413333,646.738878,780.227094,18.333412,5.213333,7.034735,6.206432,250.576663,159.478261,91.935077,125.71266
6,Michael Pratt,19058.842281,899.92,629.364571,780.300316,66.446859,5.653333,6.923266,5.949085,286.828342,159.183962,90.905731,131.163072


In [83]:
#Perdicting Fantasy Pts Per Game
for model in model_list:
    rookie_proj_df['ff_scoring' + model] = 0
    for key, value in ff_scoring.items():
        rookie_proj_df['ff_scoring' + model] = (rookie_proj_df[key+model] * value) + rookie_proj_df['ff_scoring' + model]

for model in model_list:
    rookie_proj_df['ff_scoring/g' + model] = rookie_proj_df['ff_scoring' + model] / rookie_proj_df['games' + model]
    rookie_proj_df['ff_scoring/g' + model + '_rank'] = rookie_proj_df['ff_scoring/g' + model].rank(ascending=False)

model_rank_list = []
for model in model_list:
    mod_col = 'ff_scoring/g' + model + '_rank'
    model_rank_list.append(mod_col)
    
rookie_proj_df['ff_scoring/g_ovr_rank'] = rookie_proj_df[model_rank_list].mean(axis=1)
            
rookie_proj_df = rookie_proj_df.sort_values(by=['ff_scoring/g_ovr_rank','draft_ovr'], ascending=True)
    
sorted_col = ['player', 'draft_ovr','ff_scoring/g_y_pred_regression','ff_scoring/g_y_pred_random_forrest',
              'ff_scoring/g_y_pred_gbf','ff_scoring/g_y_pred_svr','ff_scoring/g_y_pred_regression_rank',
              'ff_scoring/g_y_pred_random_forrest_rank','ff_scoring/g_y_pred_gbf_rank','ff_scoring/g_y_pred_svr_rank',
              'ff_scoring/g_ovr_rank']

rookie_proj_df[sorted_col]

Unnamed: 0,player,draft_ovr,ff_scoring/g_y_pred_regression,ff_scoring/g_y_pred_random_forrest,ff_scoring/g_y_pred_gbf,ff_scoring/g_y_pred_svr,ff_scoring/g_y_pred_regression_rank,ff_scoring/g_y_pred_random_forrest_rank,ff_scoring/g_y_pred_gbf_rank,ff_scoring/g_y_pred_svr_rank,ff_scoring/g_ovr_rank
5,Bo Nix,12,18.387715,20.448571,21.548438,8.507243,5.0,1.0,1.0,1.0,2.0
1,Michael Penix Jr.,8,19.414156,20.360134,18.540585,8.231089,3.0,2.0,2.0,3.0,2.5
0,Jayden Daniels,2,20.53049,13.145064,12.796726,8.239777,2.0,4.0,3.0,2.0,2.75
3,Drake Maye,3,18.454347,13.38384,10.780574,7.762163,4.0,3.0,6.0,10.0,5.75
4,J.J. McCarthy,10,29.517022,10.632302,11.616442,7.816119,1.0,10.0,4.0,9.0,6.0
7,Spencer Rattler,150,18.170372,11.315914,5.918521,8.074264,6.0,7.0,8.0,5.0,6.5
9,Caleb Williams,1,-49.670095,11.998603,11.352834,7.929195,10.0,5.0,5.0,8.0,7.0
8,Jordan Travis,171,14.925845,11.281432,7.396367,8.041201,9.0,8.0,7.0,6.0,7.5
2,Devin Leary,218,16.507319,11.515789,5.16025,8.007852,8.0,6.0,9.0,7.0,7.5
6,Michael Pratt,245,17.664402,10.901792,4.347487,8.143621,7.0,9.0,10.0,4.0,7.5
