$$
\text{Fantasy Draft Optimizer 2025: Enhanced PPR Projections Using 2024 Data} \\
\text{By: Isabelle Bernal and Jonathan Siegel} \\
\text{Data valid up to November 24th 2024}
$$

In [None]:
#imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from sklearn.metrics import roc_auc_score, r2_score, mean_squared_error

In [None]:
# Load datasets
depthcharts = pd.read_csv('depth_charts_2024.csv')
injuries = pd.read_csv('injuries_2024.csv')
playerstats = pd.read_csv('player_stats_2024.csv')
playerstats_kicking = pd.read_csv('player_stats_kicking_2024.csv')
player_stats_def = pd.read_csv('player_stats_def_2024.csv')

$$
\text{PreProcessing / Cleaning}
$$

In [None]:
depthcharts.rename(columns={'gsis_id': 'player_id'}, inplace=True)
injuries.rename(columns={'gsis_id': 'player_id'}, inplace=True)

In [None]:
def clean_player_id(df):
    if 'player_id' in df.columns:
        df['player_id'] = df['player_id'].str.strip().str.upper().astype(str)
    return df

playerstats = clean_player_id(playerstats)
depthcharts = clean_player_id(depthcharts)
injuries = clean_player_id(injuries)
playerstats_kicking = clean_player_id(playerstats_kicking)

In [None]:
#handle missing values in numeric col
numeric_cols_playerstats = playerstats.select_dtypes(include='number').columns
playerstats[numeric_cols_playerstats] = playerstats[numeric_cols_playerstats].fillna(playerstats[numeric_cols_playerstats].mean())
numeric_cols_kicking = playerstats_kicking.select_dtypes(include='number').columns
playerstats_kicking[numeric_cols_kicking] = playerstats_kicking[numeric_cols_kicking].fillna(playerstats_kicking[numeric_cols_kicking].mean())

In [None]:
#handle non numeric values
non_numeric_cols_playerstats = playerstats.select_dtypes(exclude='number').columns
playerstats[non_numeric_cols_playerstats] = playerstats[non_numeric_cols_playerstats].fillna(method='ffill')
non_numeric_cols_kicking = playerstats_kicking.select_dtypes(exclude='number').columns
playerstats_kicking[non_numeric_cols_kicking] = playerstats_kicking[non_numeric_cols_kicking].fillna(method='ffill')

  playerstats[non_numeric_cols_playerstats] = playerstats[non_numeric_cols_playerstats].fillna(method='ffill')
  playerstats_kicking[non_numeric_cols_kicking] = playerstats_kicking[non_numeric_cols_kicking].fillna(method='ffill')


In [None]:
depthcharts.fillna(method='ffill', inplace=True)  # Forward fill missing values in depth charts
injuries.fillna("Unknown", inplace=True)

  depthcharts.fillna(method='ffill', inplace=True)  # Forward fill missing values in depth charts


In [None]:
#handles kickers specifically
kickers_df = playerstats_kicking.copy()
non_kickers_df = playerstats[~playerstats['position'].isin(['K'])].copy()

In [None]:
playerstats_info = {
    "columns": playerstats.columns.tolist(),
    "missing_values": playerstats.isna().sum().to_dict(),
    "num_rows": playerstats.shape[0]
}

kickers_info = {
    "columns": kickers_df.columns.tolist(),
    "missing_values": kickers_df.isna().sum().to_dict(),
    "num_rows": kickers_df.shape[0]
}
non_kickers_info = {
    "columns": non_kickers_df.columns.tolist(),
    "missing_values": non_kickers_df.isna().sum().to_dict(),
    "num_rows": non_kickers_df.shape[0]
}

playerstats_info, kickers_info, non_kickers_info

({'columns': ['player_id',
   'player_name',
   'player_display_name',
   'position',
   'position_group',
   'headshot_url',
   'recent_team',
   'season',
   'week',
   'season_type',
   'opponent_team',
   'completions',
   'attempts',
   'passing_yards',
   'passing_tds',
   'interceptions',
   'sacks',
   'sack_yards',
   'sack_fumbles',
   'sack_fumbles_lost',
   'passing_air_yards',
   'passing_yards_after_catch',
   'passing_first_downs',
   'passing_epa',
   'passing_2pt_conversions',
   'pacr',
   'dakota',
   'carries',
   'rushing_yards',
   'rushing_tds',
   'rushing_fumbles',
   'rushing_fumbles_lost',
   'rushing_first_downs',
   'rushing_epa',
   'rushing_2pt_conversions',
   'receptions',
   'targets',
   'receiving_yards',
   'receiving_tds',
   'receiving_fumbles',
   'receiving_fumbles_lost',
   'receiving_air_yards',
   'receiving_yards_after_catch',
   'receiving_first_downs',
   'receiving_epa',
   'receiving_2pt_conversions',
   'racr',
   'target_share',
   'ai

$$
\text{Exploratory Data Analysis}
$$

In [None]:
# This is who has scored the most points as a WR(2) (based on top points)


# Top Quarterback (QB)
top_qb = playerstats[playerstats['position'] == 'QB'].sort_values(by='fantasy_points_ppr', ascending=False).head(1)

# Top 2 Distinct Wide Receivers (WR)
top_wr = (
    playerstats[playerstats['position'] == 'WR']
    .sort_values(by='fantasy_points', ascending=False)
    .drop_duplicates(subset=['player_name'])
    .head(2)
)

# Top 2 Distinct Running Backs (RB)
top_rb = (
    playerstats[playerstats['position'] == 'RB']
    .sort_values(by='fantasy_points_ppr', ascending=False)
    .drop_duplicates(subset=['player_name'])
    .head(2)
)

# Top Tight End (TE)
top_te = playerstats[playerstats['position'] == 'TE'].sort_values(by='fantasy_points_ppr', ascending=False).head(1)

# Top Kicker (K)
top_k = playerstats[playerstats['position'] == 'K'].sort_values(by='fantasy_points_ppr', ascending=False).head(1)

# Top Defense/Special Teams (DST)
top_dst = playerstats[playerstats['position'] == 'DST'].sort_values(by='fantasy_points_ppr', ascending=False).head(1)

# Step 2: Combine all the filtered data into a single DataFrame
best_fantasy_team_unique = pd.concat([top_qb, top_wr, top_rb, top_te, top_k, top_dst])

# Display the best fantasy team with distinct players
best_fantasy_team_unique_summary = best_fantasy_team_unique[['position', 'player_name', 'fantasy_points_ppr']].to_dict(orient='records')

best_fantasy_team_unique_summary


[{'position': 'QB', 'player_name': 'J.Hurts', 'fantasy_points_ppr': 35.14},
 {'position': 'WR', 'player_name': 'J.Chase', 'fantasy_points_ppr': 55.4},
 {'position': 'WR', 'player_name': 'J.Jennings', 'fantasy_points_ppr': 46.5},
 {'position': 'RB', 'player_name': 'S.Barkley', 'fantasy_points_ppr': 46.2},
 {'position': 'RB', 'player_name': 'A.Kamara', 'fantasy_points_ppr': 44.0},
 {'position': 'TE', 'player_name': 'T.Hill', 'fantasy_points_ppr': 41.52}]

In [None]:
# Group by 'player_name' to calculate the average fantasy points per player across the season (Points by Average)
average_fantasy_points = playerstats.groupby('player_name').agg({
    'fantasy_points': 'mean',
    'position': 'first'  # Include position for filtering
}).reset_index()
# Top Quarterback (QB)
top_qb = average_fantasy_points[average_fantasy_points['position'] == 'QB'].sort_values(by='fantasy_points', ascending=False).head(1)

# Top 2 Distinct Wide Receivers (WR)
top_wr = (
    average_fantasy_points[average_fantasy_points['position'] == 'WR']
    .sort_values(by='fantasy_points', ascending=False)
    .head(2)
)

# Top 2 Distinct Running Backs (RB)
top_rb = (
    average_fantasy_points[average_fantasy_points['position'] == 'RB']
    .sort_values(by='fantasy_points', ascending=False)
    .head(2)
)

# Top Tight End (TE)
top_te = average_fantasy_points[average_fantasy_points['position'] == 'TE'].sort_values(by='fantasy_points', ascending=False).head(1)

# Top Kicker (K)
top_k = average_fantasy_points[average_fantasy_points['position'] == 'K'].sort_values(by='fantasy_points', ascending=False).head(1)

# Top Defense/Special Teams (DST)
top_dst = average_fantasy_points[average_fantasy_points['position'] == 'DST'].sort_values(by='fantasy_points', ascending=False).head(1)

# Step 2: Combine all the filtered data into a single DataFrame
best_fantasy_team_average = pd.concat([top_qb, top_wr, top_rb, top_te, top_k, top_dst])

# Display the best fantasy team with distinct players based on average fantasy points per game
best_fantasy_team_average_summary = best_fantasy_team_average[['position', 'player_name', 'fantasy_points']].to_dict(orient='records')

best_fantasy_team_average_summary


[{'position': 'QB',
  'player_name': 'L.Jackson',
  'fantasy_points': 24.85818181818182},
 {'position': 'WR',
  'player_name': 'J.Chase',
  'fantasy_points': 16.245454545454546},
 {'position': 'WR',
  'player_name': 'R.Rice',
  'fantasy_points': 13.633333333333335},
 {'position': 'RB',
  'player_name': 'S.Barkley',
  'fantasy_points': 21.71818181818182},
 {'position': 'RB',
  'player_name': 'D.Henry',
  'fantasy_points': 19.645454545454545},
 {'position': 'TE',
  'player_name': 'G.Kittle',
  'fantasy_points': 12.466666666666667}]

In [None]:
#calculating fantasy points based on PPR system
#player used (ceedee lamb week 9 against SF 49ers team)
def calculate_ppr_fantasy_points_WR(row):
    fantasy_points = (
        (row.get('passing_yards', 0) / 25) +              # 1 point per 25 passing yards
        (row.get('passing_tds', 0) * 4) +                 # 4 points per passing touchdown
        (row.get('rushing_yards', 0) / 10) +              # 1 point per 10 rushing yards
        (row.get('rushing_tds', 0) * 6) +                 # 6 points per rushing touchdown
        (row.get('receptions', 0) * 1) +                  # 1 point per reception (PPR scoring)
        (row.get('receiving_yards', 0) / 10) +            # 1 point per 10 receiving yards
        (row.get('receiving_tds', 0) * 6) +               # 6 points per receiving touchdown
        (row.get('receiving_fumbles_lost', 0) * -2)       # -2 points per fumble lost
    )
    return fantasy_points

# Get CeeDee Lamb's stats against SF
ceedee_lamb_vs_sf_stats = playerstats[
    (playerstats['player_name'].str.contains('C.Lamb', case=False)) &
    (playerstats['opponent_team'] == 'SF')
]

# Apply the PPR calculation function to CeeDee Lamb's stats against SF
print(ceedee_lamb_vs_sf_stats.apply(calculate_ppr_fantasy_points_WR, axis=1)) #this is accurate


1557    39.6
dtype: float64


In [None]:
# Function to calculate fantasy points for Wide Receiver (WR) using PPR scoring (will use once we make our ML model)
def calculate_ppr_fantasy_points_WR(row):
    fantasy_points = (
        (row.get('passing_yards', 0) / 25) +              # 1 point per 25 passing yards (if applicable)
        (row.get('passing_tds', 0) * 4) +                 # 4 points per passing touchdown (if applicable)
        (row.get('rushing_yards', 0) / 10) +              # 1 point per 10 rushing yards
        (row.get('rushing_tds', 0) * 6) +                 # 6 points per rushing touchdown
        (row.get('receptions', 0) * 1) +                  # 1 point per reception (PPR scoring)
        (row.get('receiving_yards', 0) / 10) +            # 1 point per 10 receiving yards
        (row.get('receiving_tds', 0) * 6) +               # 6 points per receiving touchdown
        (row.get('receiving_fumbles_lost', 0) * -2)       # -2 points per fumble lost
    )
    return fantasy_points

# Function to calculate fantasy points for Quarterback (QB) using PPR scoring
def calculate_ppr_fantasy_points_QB(row):
    fantasy_points = (
        (row.get('passing_yards', 0) / 25) +              # 1 point per 25 passing yards
        (row.get('passing_tds', 0) * 4) +                 # 4 points per passing touchdown
        (row.get('passing_interceptions', 0) * -2) +      # -2 points per interception thrown
        (row.get('rushing_yards', 0) / 10) +              # 1 point per 10 rushing yards
        (row.get('rushing_tds', 0) * 6) +                 # 6 points per rushing touchdown
        (row.get('rushing_fumbles_lost', 0) * -2)         # -2 points per fumble lost
    )
    return fantasy_points

# Function to calculate fantasy points for Running Back (RB) using PPR scoring
def calculate_ppr_fantasy_points_RB(row):
    fantasy_points = (
        (row.get('rushing_yards', 0) / 10) +              # 1 point per 10 rushing yards
        (row.get('rushing_tds', 0) * 6) +                 # 6 points per rushing touchdown
        (row.get('receptions', 0) * 1) +                  # 1 point per reception (PPR scoring)
        (row.get('receiving_yards', 0) / 10) +            # 1 point per 10 receiving yards
        (row.get('receiving_tds', 0) * 6) +               # 6 points per receiving touchdown
        (row.get('rushing_fumbles_lost', 0) * -2)         # -2 points per fumble lost
    )
    return fantasy_points

# Function to calculate fantasy points for Tight End (TE) using PPR scoring
def calculate_ppr_fantasy_points_TE(row):
    fantasy_points = (
        (row.get('receptions', 0) * 1) +                  # 1 point per reception (PPR scoring)
        (row.get('receiving_yards', 0) / 10) +            # 1 point per 10 receiving yards
        (row.get('receiving_tds', 0) * 6) +               # 6 points per receiving touchdown
        (row.get('receiving_fumbles_lost', 0) * -2)       # -2 points per fumble lost
    )
    return fantasy_points

# Function to calculate fantasy points for Kicker (K) using PPR scoring
def calculate_ppr_fantasy_points_K(row):
    fantasy_points = (
        (row.get('field_goals_made', 0) * 3) +            # 3 points per field goal made
        (row.get('field_goals_made_50+', 0) * 1) +        # Bonus point for field goals of 50+ yards
        (row.get('pat_made', 0) * 1)                      # 1 point per PAT made
    )
    return fantasy_points

# Function to calculate fantasy points for Defense/Special Teams (DST) using PPR scoring
def calculate_ppr_fantasy_points_DST(row):
    fantasy_points = (
        (row.get('sacks', 0) * 1) +                       # 1 point per sack
        (row.get('interceptions', 0) * 2) +               # 2 points per interception
        (row.get('fumble_recoveries', 0) * 2) +           # 2 points per fumble recovery
        (row.get('touchdowns', 0) * 6) +                  # 6 points per defensive touchdown
        (row.get('safeties', 0) * 2)                      # 2 points per safety
    )
    return fantasy_points


In [None]:
!apt-get ace_tools

E: Invalid operation ace_tools


$$
\text{ML Algorithms}
$$

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, cross_val_score
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load datasets
depthcharts = pd.read_csv('depth_charts_2024.csv')
injuries = pd.read_csv('injuries_2024.csv')
playerstats = pd.read_csv('player_stats_2024.csv')
playerstats_kicking = pd.read_csv('player_stats_kicking_2024.csv')
player_stats_def = pd.read_csv('player_stats_def_2024.csv')

# Define target variable
target = 'fantasy_points_ppr'

# Preprocessing the dataset
# Keep relevant columns and drop rows with missing target values
player_stats_filtered = playerstats.dropna(subset=[target])

# Encode categorical positions as numerical
player_stats_filtered['position_encoded'] = player_stats_filtered['position'].astype('category').cat.codes

# Add new features for feature engineering
player_stats_filtered['total_touches'] = (
    player_stats_filtered['receptions'].fillna(0) +
    player_stats_filtered['carries'].fillna(0)
)
player_stats_filtered['yards_per_touch'] = (
    player_stats_filtered['rushing_yards'].fillna(0) +
    player_stats_filtered['receiving_yards'].fillna(0)
) / player_stats_filtered['total_touches'].replace(0, np.nan)
player_stats_filtered['tds_per_game'] = (
    player_stats_filtered['passing_tds'].fillna(0) +
    player_stats_filtered['rushing_tds'].fillna(0) +
    player_stats_filtered['receiving_tds'].fillna(0)
) / player_stats_filtered['week']

# Replace NaNs introduced by feature engineering with 0
player_stats_filtered.fillna(0, inplace=True)

# Select features and target for training
X = player_stats_filtered[['week', 'position_encoded', 'passing_yards', 'rushing_yards',
                           'receiving_yards', 'passing_tds', 'rushing_tds', 'receiving_tds',
                           'interceptions', 'total_touches', 'yards_per_touch', 'tds_per_game']]
y = player_stats_filtered[target]

# Use K-Fold Cross-Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Hyperparameter tuning for Gradient Boosting
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5]
}
grid_search_gb = GridSearchCV(GradientBoostingRegressor(random_state=42),
                              param_grid, cv=kfold, scoring='neg_mean_squared_error', verbose=2)
grid_search_gb.fit(X, y)

# Best Gradient Boosting model
best_gb_model = grid_search_gb.best_estimator_

# Train XGBoost as a second model
xgb_model = XGBRegressor(random_state=42, n_estimators=200, learning_rate=0.1, max_depth=5)
xgb_model.fit(X, y)

# Compare performance using Cross-Validation RMSE
gb_scores = np.sqrt(-cross_val_score(best_gb_model, X, y, cv=kfold, scoring='neg_mean_squared_error'))
xgb_scores = np.sqrt(-cross_val_score(xgb_model, X, y, cv=kfold, scoring='neg_mean_squared_error'))

# Evaluate performance
print(f"Gradient Boosting RMSE (CV): {gb_scores.mean():.4f}")
print(f"XGBoost RMSE (CV): {xgb_scores.mean():.4f}")

# Evaluate R^2 Score for Both Models
y_gb_pred = best_gb_model.predict(X)
y_xgb_pred = xgb_model.predict(X)

gb_r2 = r2_score(y, y_gb_pred)
xgb_r2 = r2_score(y, y_xgb_pred)

print(f"Gradient Boosting R²: {gb_r2:.4f}")
print(f"XGBoost R²: {xgb_r2:.4f}")

# Use the better model (based on RMSE) for predictions
final_model = best_gb_model if gb_scores.mean() < xgb_scores.mean() else xgb_model

# Predict 2025 fantasy points
player_stats_filtered['predicted_fantasy_points'] = final_model.predict(X)
players_2025 = player_stats_filtered.copy()
players_2025['season'] = 2025

# Select the best players based on predicted points
team_predictions = {
    'QB': players_2025[players_2025['position'] == 'QB'].sort_values('predicted_fantasy_points', ascending=False).head(1),
    'RB': players_2025[players_2025['position'] == 'RB'].sort_values('predicted_fantasy_points', ascending=False).head(2),
    'WR': players_2025[players_2025['position'] == 'WR'].sort_values('predicted_fantasy_points', ascending=False).head(2),
    'TE': players_2025[players_2025['position'] == 'TE'].sort_values('predicted_fantasy_points', ascending=False).head(1)
}

# FLEX (highest scoring RB, WR, TE not already in team)
eligible_flex = players_2025[players_2025['position'].isin(['RB', 'WR', 'TE'])].sort_values('predicted_fantasy_points', ascending=False)
selected_ids = pd.concat(team_predictions.values())['player_id']
team_predictions['FLEX'] = eligible_flex[~eligible_flex['player_id'].isin(selected_ids)].head(1)

# Combine all selected players
best_fantasy_team_2025 = pd.concat(team_predictions.values())

# Display the optimized predicted team
best_fantasy_team_2025.reset_index(drop=True, inplace=True)
print("Predicted Best Fantasy Team for 2025:")
print(best_fantasy_team_2025)


Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=   0.3s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=   0.3s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=   0.3s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=   0.3s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=   0.3s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=   0.7s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=   0.6s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=   0.7s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=   0.6s
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=200; total time=   0.7s
[CV] END ..learning_rate=0.01, max_depth=5, n_estimators=100; total time=   0.5s
[CV] END ..learning_rate=0.01, max_depth=5, n_est

Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr,position_encoded,total_touches,yards_per_touch,tds_per_game,predicted_fantasy_points
0,00-0036389,J.Hurts,Jalen Hurts,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,PHI,2025,8,REG,...,0.0,0.0,0,35.14,35.14,8,10,3.7,0.5,34.647248
1,00-0033906,A.Kamara,Alvin Kamara,RB,RB,https://static.www.nfl.com/image/private/f_aut...,NO,2025,2,REG,...,-0.028986,0.27971,0,42.0,44.0,9,22,8.181818,2.0,45.56855
2,00-0034844,S.Barkley,Saquon Barkley,RB,RB,https://static.www.nfl.com/image/upload/f_auto...,PHI,2025,12,REG,...,0.009174,0.322211,0,42.2,46.2,9,30,10.066667,0.166667,45.272281
3,00-0036900,J.Chase,Ja'Marr Chase,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,CIN,2025,10,REG,...,0.495918,0.819365,0,44.4,55.4,13,11,24.0,0.3,54.061247
4,00-0036259,J.Jennings,Jauan Jennings,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,SF,2025,3,REG,...,0.454268,0.917988,0,35.5,46.5,13,11,15.909091,1.0,46.44206
5,00-0033357,T.Hill,Taysom Hill,TE,TE,https://static.www.nfl.com/image/upload/f_auto...,NO,2025,11,REG,...,0.146119,0.637997,0,33.52,41.52,12,15,12.533333,0.272727,41.250009
6,00-0036358,C.Lamb,CeeDee Lamb,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,DAL,2025,8,REG,...,0.435673,0.99416,0,26.6,39.6,13,13,11.230769,0.25,40.307164


In [None]:
# Retrain Linear Regression on the entire dataset for final predictions
lin_reg_full = LinearRegression()
lin_reg_full.fit(X, y)

# Predict 2025 fantasy points
player_stats_filtered['predicted_fantasy_points'] = lin_reg_full.predict(X)
players_2025 = player_stats_filtered.copy()
players_2025['season'] = 2025

# Define the team structure
positions = {
    'QB': 1,  # 1 Quarterback
    'RB': 2,  # 2 Running Backs
    'WR': 2,  # 2 Wide Receivers
    'TE': 1,  # 1 Tight End
    'FLEX': 1  # 1 FLEX (RB/WR/TE)
}

# Select the best players for each position
team_predictions = {
    'QB': players_2025[players_2025['position'] == 'QB']
    .sort_values('predicted_fantasy_points', ascending=False)
    .head(positions['QB']),
    'RB': players_2025[players_2025['position'] == 'RB']
    .sort_values('predicted_fantasy_points', ascending=False)
    .head(positions['RB']),
    'WR': players_2025[players_2025['position'] == 'WR']
    .sort_values('predicted_fantasy_points', ascending=False)
    .head(positions['WR']),
    'TE': players_2025[players_2025['position'] == 'TE']
    .sort_values('predicted_fantasy_points', ascending=False)
    .head(positions['TE'])
}

# Select the FLEX player (highest-scoring RB, WR, TE not already in team)
eligible_flex = players_2025[players_2025['position'].isin(['RB', 'WR', 'TE'])]\
    .sort_values('predicted_fantasy_points', ascending=False)
selected_ids = pd.concat(team_predictions.values())['player_id']
team_predictions['FLEX'] = eligible_flex[~eligible_flex['player_id'].isin(selected_ids)].head(1)

# Combine all players into the final team
best_fantasy_team_2025 = pd.concat(team_predictions.values())

# Display the predicted best fantasy team for 2025
best_fantasy_team_2025.reset_index(drop=True, inplace=True)
best_fantasy_team_2025.head(10)


Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr,position_encoded,total_touches,yards_per_touch,tds_per_game,predicted_fantasy_points
0,00-0034796,L.Jackson,Lamar Jackson,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,BAL,2025,7,REG,...,0.0,0.0,0,34.44,34.44,8,9,5.777778,0.714286,36.172693
1,00-0033906,A.Kamara,Alvin Kamara,RB,RB,https://static.www.nfl.com/image/private/f_aut...,NO,2025,2,REG,...,-0.028986,0.27971,0,42.0,44.0,9,22,8.181818,2.0,46.167332
2,00-0034844,S.Barkley,Saquon Barkley,RB,RB,https://static.www.nfl.com/image/upload/f_auto...,PHI,2025,12,REG,...,0.009174,0.322211,0,42.2,46.2,9,30,10.066667,0.166667,43.364434
3,00-0036900,J.Chase,Ja'Marr Chase,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,CIN,2025,10,REG,...,0.495918,0.819365,0,44.4,55.4,13,11,24.0,0.3,61.412126
4,00-0036259,J.Jennings,Jauan Jennings,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,SF,2025,3,REG,...,0.454268,0.917988,0,35.5,46.5,13,11,15.909091,1.0,47.835065
5,00-0033357,T.Hill,Taysom Hill,TE,TE,https://static.www.nfl.com/image/upload/f_auto...,NO,2025,11,REG,...,0.146119,0.637997,0,33.52,41.52,12,15,12.533333,0.272727,37.143287
6,00-0038543,J.Smith-Njigba,Jaxon Smith-Njigba,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,SEA,2025,9,REG,...,0.65043,1.02883,0,30.0,37.0,13,7,25.714286,0.222222,41.154099


In [None]:
lin_reg_r2_full = r2_score(y, lin_reg_full.predict(X))
lin_reg_rmse_full = mean_squared_error(y, lin_reg_full.predict(X), squared=False)

print("Evaluation Metrics for Linear Regression on Full Dataset:")
print(f"R² Score: {lin_reg_r2_full:.4f}")
print(f"Root Mean Squared Error (RMSE): {lin_reg_rmse_full:.4f}")

print("\nPredicted Best Fantasy Team for 2025:")
best_fantasy_team_2025


Evaluation Metrics for Linear Regression on Full Dataset:
R² Score: 0.9799
Root Mean Squared Error (RMSE): 1.1118

Predicted Best Fantasy Team for 2025:




Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr,position_encoded,total_touches,yards_per_touch,tds_per_game,predicted_fantasy_points
0,00-0034796,L.Jackson,Lamar Jackson,QB,QB,https://static.www.nfl.com/image/upload/f_auto...,BAL,2025,7,REG,...,0.0,0.0,0,34.44,34.44,8,9,5.777778,0.714286,36.172693
1,00-0033906,A.Kamara,Alvin Kamara,RB,RB,https://static.www.nfl.com/image/private/f_aut...,NO,2025,2,REG,...,-0.028986,0.27971,0,42.0,44.0,9,22,8.181818,2.0,46.167332
2,00-0034844,S.Barkley,Saquon Barkley,RB,RB,https://static.www.nfl.com/image/upload/f_auto...,PHI,2025,12,REG,...,0.009174,0.322211,0,42.2,46.2,9,30,10.066667,0.166667,43.364434
3,00-0036900,J.Chase,Ja'Marr Chase,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,CIN,2025,10,REG,...,0.495918,0.819365,0,44.4,55.4,13,11,24.0,0.3,61.412126
4,00-0036259,J.Jennings,Jauan Jennings,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,SF,2025,3,REG,...,0.454268,0.917988,0,35.5,46.5,13,11,15.909091,1.0,47.835065
5,00-0033357,T.Hill,Taysom Hill,TE,TE,https://static.www.nfl.com/image/upload/f_auto...,NO,2025,11,REG,...,0.146119,0.637997,0,33.52,41.52,12,15,12.533333,0.272727,37.143287
6,00-0038543,J.Smith-Njigba,Jaxon Smith-Njigba,WR,WR,https://static.www.nfl.com/image/upload/f_auto...,SEA,2025,9,REG,...,0.65043,1.02883,0,30.0,37.0,13,7,25.714286,0.222222,41.154099


In [None]:
kicking_stats = pd.read_csv('player_stats_kicking_2024.csv')
kicking_stats['points_scored'] = (kicking_stats['fg_made'] * 3) + (kicking_stats['pat_made'] * 1)
kicking_stats_filtered = kicking_stats.dropna(subset=['fg_made', 'fg_att', 'pat_made', 'pat_att', 'points_scored'])

# Define features and target variable
X = kicking_stats_filtered[['fg_made', 'fg_att', 'pat_made', 'pat_att']]
y = kicking_stats_filtered['points_scored']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the best model (Random Forest in this case)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Predict on the full dataset
kicking_stats_filtered['predicted_points'] = model.predict(X)

# Generate rankings
player_ranking = kicking_stats_filtered.groupby(['player_id', 'player_name', 'team'], as_index=False)['predicted_points'].sum()
player_ranking = player_ranking.sort_values(by='predicted_points', ascending=False).reset_index(drop=True)
best_kicker = player_ranking.iloc[0]
print("\nBest Kicker:")
print(best_kicker)

Mean Squared Error: 0.07366724137931037
R^2 Score: 0.9943847778306483

Best Kicker:
player_id            00-0032726
player_name         K.Fairbairn
team                        HOU
predicted_points         110.98
Name: 0, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kicking_stats_filtered['predicted_points'] = model.predict(X)
