In [27]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from typing import Tuple, List
import pickle
import os

In [34]:
parent_dir = os.path.dirname(os.getcwd())  # Get parent of current working directory

def load_models():

    model_path = os.path.join(parent_dir, 'yardage_model.pkl')
    with open(model_path, 'rb') as f:
        yardage_model = pickle.load(f)
    
    model_path = os.path.join(parent_dir, 'touchdown_model.pkl')
    with open(model_path, 'rb') as f:
        touchdown_model = pickle.load(f)

    return yardage_model, touchdown_model



In [11]:
YEARS = [2024]

# %%
data_all = pd.DataFrame()

def calculate_seconds(row):
    if row['qtr'] != 5:
        return 3600 - row['game_seconds_remaining']
    else:
        return 600 - row['game_seconds_remaining'] + 3600


def get_quarter_value(dataf):
    if 'END QUARTER' in dataf['desc']:
        return dataf['level_0']
    else:
        return None

for i in YEARS:  
    i_data = pd.read_csv('https://github.com/nflverse/nflverse-data/releases/download/pbp/' \
                   'play_by_play_' + str(i) + '.csv.gz',
                   compression= 'gzip', low_memory= False)

    data_all = pd.concat([data_all,i_data])

ppr = 1

data = data_all.loc[data_all.season_type=='REG']
#data = data_all.loc[(data_all.play_type.isin(['no_play','pass','run'])) & (data_all.epa.isna()==False)]
#data.loc[data['pass']==1, 'play_type'] = 'pass'
#data.loc[data.rush==1, 'play_type'] = 'run'
data.reset_index(drop=True, inplace=True)
data['turnover'] = data['interception'] + data['fumble_lost']
data = data.dropna(subset=['posteam'])
data['inside_10'] = (data['yardline_100'] < 10).astype(int)
data['20+_play'] = (data['yards_gained'] > 19).astype(int)
data['short_pass'] = (data['air_yards'] < 10).astype(int)
data['medium_pass'] = ((data['air_yards'] > 9)&(data['air_yards']<20)).astype(int)
data['deep_pass'] = (data['air_yards'] > 19).astype(int)
data['end_zone_target'] = (data['yardline_100'] - data['air_yards']) <= 0
data['fantasy_points'] = (
    data['complete_pass'] * ppr +          # 1 point per completion
    data['touchdown'] * 6 +           # 6 points per touchdown
    data['yards_gained'] * 0.1        # 0.1 points per yard gained
)
data['distance_to_EZ_after_target'] = data['yardline_100'] - data['air_yards']

  data['turnover'] = data['interception'] + data['fumble_lost']
  data['inside_10'] = (data['yardline_100'] < 10).astype(int)
  data['20+_play'] = (data['yards_gained'] > 19).astype(int)
  data['short_pass'] = (data['air_yards'] < 10).astype(int)
  data['medium_pass'] = ((data['air_yards'] > 9)&(data['air_yards']<20)).astype(int)
  data['deep_pass'] = (data['air_yards'] > 19).astype(int)
  data['end_zone_target'] = (data['yardline_100'] - data['air_yards']) <= 0
  data['fantasy_points'] = (
  data['distance_to_EZ_after_target'] = data['yardline_100'] - data['air_yards']


In [12]:
def total_finder(home_or_away,home_total,away_total):
    if home_or_away == 'home':
        total = home_total
    else:
        total = away_total 
    return total

In [13]:
    data.reset_index(drop=True, inplace=True)

    data = data[data['two_point_attempt']==0]


    # derive implied team total from betting market data
    data['home_implied_total'] = abs(data['total_line'] / 2 + data['spread_line'] / 2)
    data['away_implied_total'] = abs(data['total_line'] / 2 - data['spread_line'] / 2)

    # Use list comprehension with zip for more efficient row-wise operations
    data['implied_posteam_total'] = [
    total_finder(has_ball, home_number, away_number)
        for has_ball, home_number, away_number in zip(data['posteam_type'], data['home_implied_total'], data['away_implied_total'])
]


  data['home_implied_total'] = abs(data['total_line'] / 2 + data['spread_line'] / 2)
  data['away_implied_total'] = abs(data['total_line'] / 2 - data['spread_line'] / 2)
  data['implied_posteam_total'] = [


In [19]:
data[data['air_yards'].notna()]

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,20+_play,short_pass,medium_pass,deep_pass,end_zone_target,fantasy_points,distance_to_EZ_after_target,home_implied_total,away_implied_total,implied_posteam_total
2,83,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,...,1,1,0,0,False,3.2,70.0,26.25,19.75,19.75
3,108,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,...,0,1,0,0,False,1.9,43.0,26.25,19.75,19.75
7,199,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,...,0,1,0,0,False,1.8,24.0,26.25,19.75,19.75
8,224,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,...,0,0,1,0,False,0.0,10.0,26.25,19.75,19.75
13,381,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,...,0,1,0,0,True,7.5,0.0,26.25,19.75,19.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22795,4481,2024_10_CIN_BAL,2024110700,BAL,CIN,REG,10,CIN,away,BAL,...,0,0,1,0,False,0.0,48.0,29.50,23.50,23.50
22797,4568,2024_10_CIN_BAL,2024110700,BAL,CIN,REG,10,CIN,away,BAL,...,1,0,1,0,False,3.1,31.0,29.50,23.50,23.50
22798,4614,2024_10_CIN_BAL,2024110700,BAL,CIN,REG,10,CIN,away,BAL,...,0,0,1,0,True,0.0,0.0,29.50,23.50,23.50
22800,4673,2024_10_CIN_BAL,2024110700,BAL,CIN,REG,10,CIN,away,BAL,...,0,1,0,0,False,1.2,10.0,29.50,23.50,23.50


In [20]:
    
    # we only want throws to a receiver, aka plays with air yardage (no running plays, sacks, throwaways etc.)
    throws = data[data['air_yards'].notna()]

    throws = throws[throws['receiver_player_name'].notna()]
    throws = throws[throws['pass_location'].notna()]

    
    df = throws[['receiver_player_name','receiver_player_id','posteam','pass','cp','game_id','complete_pass','inside_10','air_yards','yardline_100','ydstogo','implied_posteam_total','yards_gained','fantasy_points','pass_touchdown','down','pass_location','week','season','home_implied_total','away_implied_total','posteam_type','qb_hit','end_zone_target', 'distance_to_EZ_after_target']]


In [21]:
df.head(10)

Unnamed: 0,receiver_player_name,receiver_player_id,posteam,pass,cp,game_id,complete_pass,inside_10,air_yards,yardline_100,...,down,pass_location,week,season,home_implied_total,away_implied_total,posteam_type,qb_hit,end_zone_target,distance_to_EZ_after_target
2,J.Conner,00-0033553,ARI,1,0.868591,2024_01_ARI_BUF,1.0,0,-3.0,67.0,...,2.0,left,1,2024,26.25,19.75,away,0.0,False,70.0
3,J.Conner,00-0033553,ARI,1,0.807773,2024_01_ARI_BUF,1.0,0,2.0,45.0,...,1.0,middle,1,2024,26.25,19.75,away,0.0,False,43.0
7,G.Dortch,00-0035500,ARI,1,0.688353,2024_01_ARI_BUF,1.0,0,6.0,30.0,...,3.0,middle,1,2024,26.25,19.75,away,0.0,False,24.0
8,M.Harrison,00-0039849,ARI,1,0.549081,2024_01_ARI_BUF,0.0,0,12.0,22.0,...,1.0,right,1,2024,26.25,19.75,away,0.0,False,10.0
13,Mi.Wilson,00-0038559,ARI,1,0.403381,2024_01_ARI_BUF,1.0,1,5.0,5.0,...,3.0,left,1,2024,26.25,19.75,away,0.0,True,0.0
18,M.Hollins,00-0033555,BUF,1,0.710253,2024_01_ARI_BUF,1.0,0,13.0,33.0,...,1.0,middle,1,2024,26.25,19.75,home,0.0,False,20.0
21,J.Conner,00-0033553,ARI,1,0.872271,2024_01_ARI_BUF,1.0,0,-3.0,67.0,...,2.0,left,1,2024,26.25,19.75,away,0.0,False,70.0
22,E.Demercado,00-0038705,ARI,1,0.675262,2024_01_ARI_BUF,1.0,0,0.0,65.0,...,3.0,right,1,2024,26.25,19.75,away,0.0,False,65.0
23,T.McBride,00-0037744,ARI,1,0.510629,2024_01_ARI_BUF,1.0,0,6.0,41.0,...,1.0,left,1,2024,26.25,19.75,away,1.0,False,35.0
24,E.Higgins,00-0039041,ARI,1,0.857707,2024_01_ARI_BUF,1.0,0,-1.0,35.0,...,2.0,right,1,2024,26.25,19.75,away,0.0,False,36.0


In [22]:
def predict_columns(data, yardage_model, touchdown_model):
    new_predictors = [
        'air_yards', 'yardline_100', 'ydstogo',
        'down', 'pass_location', 'season', 'qb_hit', 'end_zone_target', 'distance_to_EZ_after_target'
    ]
    
    new_X = data[new_predictors]
    new_X = pd.get_dummies(new_X, columns=['pass_location'], drop_first=True)
    
    # Perform predictions
    predictions = {
        'xYards': yardage_model.predict(new_X),
        'xTDs': touchdown_model.predict(new_X),
        'xFPs': (yardage_model.predict(new_X) * 0.1) + (touchdown_model.predict(new_X) * 6) + data['cp']
    }
    
    return pd.DataFrame(predictions)

In [35]:
yardage_model, touchdown_model = load_models()

In [36]:
new_columns_current = predict_columns(df, yardage_model, touchdown_model)
current_szn = pd.concat([df, new_columns_current], axis=1)

In [37]:
current_szn

Unnamed: 0,receiver_player_name,receiver_player_id,posteam,pass,cp,game_id,complete_pass,inside_10,air_yards,yardline_100,...,season,home_implied_total,away_implied_total,posteam_type,qb_hit,end_zone_target,distance_to_EZ_after_target,xYards,xTDs,xFPs
2,J.Conner,00-0033553,ARI,1,0.868591,2024_01_ARI_BUF,1.0,0,-3.0,67.0,...,2024,26.25,19.75,away,0.0,False,70.0,5.386990,0.003527,1.428454
3,J.Conner,00-0033553,ARI,1,0.807773,2024_01_ARI_BUF,1.0,0,2.0,45.0,...,2024,26.25,19.75,away,0.0,False,43.0,6.797067,0.003450,1.508180
7,G.Dortch,00-0035500,ARI,1,0.688353,2024_01_ARI_BUF,1.0,0,6.0,30.0,...,2024,26.25,19.75,away,0.0,False,24.0,7.845514,0.012487,1.547827
8,M.Harrison,00-0039849,ARI,1,0.549081,2024_01_ARI_BUF,0.0,0,12.0,22.0,...,2024,26.25,19.75,away,0.0,False,10.0,8.899340,0.057299,1.782806
13,Mi.Wilson,00-0038559,ARI,1,0.403381,2024_01_ARI_BUF,1.0,1,5.0,5.0,...,2024,26.25,19.75,away,0.0,True,0.0,1.910024,0.417880,3.101666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22795,M.Gesicki,00-0034829,CIN,1,0.636028,2024_10_CIN_BAL,0.0,0,12.0,60.0,...,2024,29.50,23.50,away,0.0,False,48.0,9.805048,0.002678,1.632603
22797,J.Chase,00-0036900,CIN,1,0.593795,2024_10_CIN_BAL,1.0,0,14.0,45.0,...,2024,29.50,23.50,away,0.0,False,31.0,11.537299,0.009334,1.803531
22798,J.Burton,00-0039810,CIN,1,0.367405,2024_10_CIN_BAL,0.0,0,12.0,12.0,...,2024,29.50,23.50,away,0.0,True,0.0,4.288757,0.324802,2.745095
22800,C.Brown,00-0038597,CIN,1,0.532098,2024_10_CIN_BAL,1.0,1,-3.0,7.0,...,2024,29.50,23.50,away,1.0,False,10.0,1.133263,0.141584,1.494927


In [None]:
def simulate_receiver_game(
    team_pass_attempts: int,
    target_rate: float,
    yards_mean: float,
    yards_std: float,
    num_simulations: int = 1000
) -> Tuple[List[int], List[float]]:
    """
    Simulate receiving statistics for a player based on team passing attempts and player metrics.
    
    Args:
        team_pass_attempts: Number of team pass attempts
        target_rate: Rate at which player is targeted (between 0 and 1)
        yards_mean: Mean yards per target
        yards_std: Standard deviation of yards per target
        num_simulations: Number of games to simulate
        
    Returns:
        Tuple containing:
        - List of targets for each simulation
        - List of receiving yards for each simulation
    """
    
    simulated_targets = []
    simulated_yards = []
    
    for _ in range(num_simulations):
        # Simulate targets using binomial distribution
        targets = np.random.binomial(n=team_pass_attempts, p=target_rate)
        
        # Simulate yards for each target using normal distribution
        if targets > 0:
            yards = np.random.normal(yards_mean, yards_std, targets)
            # Round to 1 decimal place and ensure no negative yards
            yards = np.maximum(0, np.round(yards, 1))
            total_yards = sum(yards)
        else:
            total_yards = 0
            
        simulated_targets.append(targets)
        simulated_yards.append(total_yards)
    
    return simulated_targets, simulated_yards

def analyze_simulation_results(
    targets: List[int],
    yards: List[float]
) -> dict:
    """
    Analyze the results of the simulation.
    
    Args:
        targets: List of simulated target counts
        yards: List of simulated receiving yards
        
    Returns:
        Dictionary containing summary statistics
    """
    return {
        'avg_targets': np.mean(targets),
        'median_targets': np.median(targets),
        'target_percentiles': np.percentile(targets, [10, 25, 75, 90]),
        'avg_yards': np.mean(yards),
        'median_yards': np.median(yards),
        'yard_percentiles': np.percentile(yards, [10, 25, 75, 90])
    }

# Example usage
if __name__ == "__main__":
    # Example parameters
    team_passes = 35  # Team averages 35 pass attempts
    player_target_rate = 0.20  # Player gets targeted on 20% of passes
    yards_per_target_mean = 8.5
    yards_per_target_std = 3.2
    
    # Run simulation
    targets, yards = simulate_receiver_game(
        team_pass_attempts=team_passes,
        target_rate=player_target_rate,
        yards_mean=yards_per_target_mean,
        yards_std=yards_per_target_std
    )
    
    # Analyze results
    results = analyze_simulation_results(targets, yards)
    
    print(f"Simulation Results:")
    print(f"Average Targets: {results['avg_targets']:.1f}")
    print(f"Average Yards: {results['avg_yards']:.1f}")
    print(f"Target Range (10th-90th percentile): {results['target_percentiles'][0]:.1f} - {results['target_percentiles'][3]:.1f}")
    print(f"Yards Range (10th-90th percentile): {results['yard_percentiles'][0]:.1f} - {results['yard_percentiles'][3]:.1f}")

Simulation Results:
Average Targets: 17.6
Average Yards: 149.4
Target Range (10th-90th percentile): 14.0 - 21.0
Yards Range (10th-90th percentile): 113.3 - 184.8
