In [194]:
import pandas as pd
import matplotlib.pyplot as plt
import nfl_data_py as nfl

In [195]:
data = nfl.import_weekly_data(range(2020, 2025))

Downcasting floats.


In [196]:
data = data[data["season_type"] == "REG"]
data["player_display_name"] = pd.Categorical(data["player_display_name"])
group_dfs = {group: group_df for group, group_df in data.groupby('position_group')}

In [None]:
all_cols = ["player_id", "player_display_name","position_group", "recent_team", "season", "week"]
weapon_cols = ["carries", "rushing_yards","rushing_tds","rushing_fumbles_lost","receptions","receiving_yards","receiving_tds", "receiving_yards_after_catch", "receiving_fumbles_lost", "receiving_epa", "wopr","racr","rushing_epa"]
passing_cols = ["completions", "attempts","passing_yards","passing_tds","interceptions","sacks","sack_fumbles_lost", "passing_epa", "pacr", "dakota"]
QB_data = group_dfs["QB"][all_cols + passing_cols + weapon_cols].reset_index().fillna(0)
WR_data = group_dfs["WR"][all_cols + passing_cols + weapon_cols].reset_index().fillna(0)
TE_data = group_dfs["TE"][all_cols + passing_cols + weapon_cols].reset_index().fillna(0)
RB_data = group_dfs["RB"][all_cols + passing_cols + weapon_cols].reset_index().fillna(0)


In [198]:
import pandas as pd

def calculate_fantasy_points(df: pd.DataFrame) -> pd.Series:
    """
    Calculate fantasy points for offensive players using Old School settings.
    """

    # Initialize to 0
    fantasy_points = pd.Series(0, index=df.index, dtype=float)

    # Rushing
    if "rushing_yards" in df:
        fantasy_points += df["rushing_yards"] / 10
        fantasy_points += (df["rushing_yards"] >= 100).astype(int)
        fantasy_points += (df["rushing_yards"] >= 200).astype(int)
    if "rushing_tds" in df:
        fantasy_points += df["rushing_tds"] * 6
    if "rushing_fumbles_lost" in df:
        fantasy_points += df["rushing_fumbles_lost"] * -2

    # Receiving
    if "receptions" in df:
        fantasy_points += df["receptions"] * 0.5
    if "receiving_yards" in df:
        fantasy_points += df["receiving_yards"] / 10
        fantasy_points += (df["receiving_yards"] >= 100).astype(int)
        fantasy_points += (df["receiving_yards"] >= 200).astype(int)
    if "receiving_tds" in df:
        fantasy_points += df["receiving_tds"] * 6
    if "receiving_fumbles_lost" in df:
        fantasy_points += df["receiving_fumbles_lost"] * -2

    # Passing (for QBs)
    if "passing_yards" in df:
        fantasy_points += df["passing_yards"] / 25
        fantasy_points += (df["passing_yards"] >= 400).astype(int)
        fantasy_points += (df["passing_yards"] >= 500).astype(int)
    if "passing_tds" in df:
        fantasy_points += df["passing_tds"] * 4
    if "interceptions" in df:
        fantasy_points += df["interceptions"] * -2
    if "sacks" in df:
        fantasy_points += df["sacks"] * -1
    if "sack_fumbles_lost" in df:
        fantasy_points += df["sack_fumbles_lost"] * -2

    return fantasy_points


In [199]:
QB_data["fantasy_points"] = calculate_fantasy_points(QB_data)
WR_data["fantasy_points"] = calculate_fantasy_points(WR_data)
TE_data["fantasy_points"] = calculate_fantasy_points(TE_data)
RB_data["fantasy_points"] = calculate_fantasy_points(RB_data)

In [200]:
def group_data(data):
    # Group by the specified columns and calculate means
    grouped = data.drop(columns=["index", "week"]).groupby(["player_id", "player_display_name", "position_group", "season"]).agg("mean").reset_index()
    
    # Count observations per group to filter out groups with <5 observations
    group_counts = data.groupby(["player_id", "player_display_name", "position_group", "season"]).size().reset_index(name='count')
    
    # Filter to keep only groups with 5 or more observations
    valid_groups = group_counts[group_counts['count'] >= 5]
    
    # Merge to keep only the valid groups in the final result
    result = grouped.merge(valid_groups[["player_id", "player_display_name", "position_group", "season"]], 
                          on=["player_id", "player_display_name", "position_group", "season"])
    
    return result

In [201]:
def get_next_fpg(data):
    data = data.sort_values(by=["player_id", "player_display_name", "season"])
    return data.groupby(["player_id", "player_display_name"])["fantasy_points"].shift(-1)


In [202]:
QB_data = group_data(QB_data)
WR_data = group_data(WR_data)
TE_data = group_data(TE_data)
RB_data = group_data(RB_data)

QB_data["next_fantasy_points"] = get_next_fpg(QB_data)
WR_data["next_fantasy_points"] = get_next_fpg(WR_data)
TE_data["next_fantasy_points"] = get_next_fpg(TE_data)
RB_data["next_fantasy_points"] = get_next_fpg(RB_data)

QB_current = QB_data[QB_data["season"] == 2024]
WR_current = WR_data[WR_data["season"] == 2024]
TE_current = TE_data[TE_data["season"] == 2024]
RB_current = RB_data[RB_data["season"] == 2024]

QB_data = QB_data[(QB_data["season"] != 2024) & (~QB_data["next_fantasy_points"].isna())]
WR_data = WR_data[(WR_data["season"] != 2024) & (~WR_data["next_fantasy_points"].isna())]
TE_data = TE_data[(TE_data["season"] != 2024) & (~TE_data["next_fantasy_points"].isna())]
RB_data = RB_data[(RB_data["season"] != 2024) & (~RB_data["next_fantasy_points"].isna())]


  grouped = data.drop(columns=["index", "week"]).groupby(["player_id", "player_display_name", "position_group", "season"]).agg("mean").reset_index()
  grouped = data.drop(columns=["index", "week"]).groupby(["player_id", "player_display_name", "position_group", "season"]).agg("mean").reset_index()
  grouped = data.drop(columns=["index", "week"]).groupby(["player_id", "player_display_name", "position_group", "season"]).agg("mean").reset_index()
  grouped = data.drop(columns=["index", "week"]).groupby(["player_id", "player_display_name", "position_group", "season"]).agg("mean").reset_index()


In [203]:
def get_replacement_level(df, num_slots):
    """
    Returns the average fantasy points per game of the worst player of the position group that could be starting (num_teams * slots per team)
    """
    df = df.sort_values(by=["season", "fantasy_points"])
    df["rank"] = df.groupby(["season"])["fantasy_points"].rank(ascending=False)
    
    return df["fantasy_points"][df["rank"] == 10 * num_slots].mean()

def get_elite_level(df, elite_percentile=0.1, size=0.1):
    """
    Returns the mean fantasy points for top n% of players
    """
    df = df.sort_values(by=["season", "fantasy_points"])
    df["rank"] = df.groupby(["season"])["fantasy_points"].rank(ascending=False, pct=True)
    
    return df["fantasy_points"][(df["rank"] <= elite_percentile) & (df["rank"] > elite_percentile - size)].mean()

In [204]:
WR_PAR = get_elite_level(WR_data, 0.1) - get_replacement_level(WR_data, 3)
RB_PAR = get_elite_level(RB_data, 0.1) - get_replacement_level(RB_data, 2)
QB_PAR = get_elite_level(QB_data, 0.1) - get_replacement_level(QB_data, 1)
TE_PAR = get_elite_level(TE_data, 0.1) - get_replacement_level(TE_data, 1)

In [205]:
print(f"WR PAR: {WR_PAR}, RB PAR: {RB_PAR}, QB PAR: {QB_PAR}, TE PAR: {TE_PAR}")

WR PAR: 5.0541329518265545, RB PAR: 5.264447538412, QB PAR: 4.607333709488433, TE PAR: 3.145265456253883


In [206]:
import pandas as pd

def get_scarcity_ratios(data, starter_counts):
    """
    Compute number of startable players and scarcity ratio per season per position.

    Parameters:
    - data: DataFrame with columns ['position', 'season', 'fantasy_points']
    - starter_counts: dict, e.g., {'QB': 12, 'RB': 24, 'WR': 24, 'TE': 12}

    Returns:
    - DataFrame with columns ['position', 'season', 'startable_count', 'total_players', 'scarcity_ratio']
    """
    results = []

    grouped = data.groupby(['season'])

    for (season), group in grouped:
        total_players = len(group)
        starter_limit = starter_counts
        startable_count = min(starter_limit, total_players)  # in case there are fewer players than the limit
        scarcity_ratio = startable_count / total_players if total_players > 0 else None

        results.append({
            'season': season,
            'startable_count': startable_count,
            'total_players': total_players,
            'scarcity_ratio': scarcity_ratio
        })

    return scarcity_ratio


In [207]:
QB_scarcity = get_scarcity_ratios(QB_data, 10)
TE_scarcity = get_scarcity_ratios(TE_data, 10)
WR_scarcity = get_scarcity_ratios(WR_data, 30)
RB_scarcity = get_scarcity_ratios(RB_data, 20)

  for (season), group in grouped:
  for (season), group in grouped:
  for (season), group in grouped:
  for (season), group in grouped:


In [208]:
positions = ["QB", "WR", "RB", "TE"]
par_values = [QB_PAR, WR_PAR, RB_PAR, TE_PAR]
dropoff = [get_elite_level(QB_data, 0.1) - get_elite_level(QB_data, 0.2), 
           get_elite_level(WR_data, 0.1) - get_elite_level(WR_data, 0.2),
           get_elite_level(RB_data, 0.1) - get_elite_level(RB_data, 0.2),
           get_elite_level(TE_data, 0.1) - get_elite_level(TE_data, 0.2)]
scarcity_values = [QB_scarcity, WR_scarcity, RB_scarcity, TE_scarcity]
slots = [1, 3, 2, 1]

importance_df = pd.DataFrame({
    "position_group": positions,
    "PAR": par_values,
    "scarcity_ratio": scarcity_values,
    "dropoff" :dropoff,
    "slots":slots
})
importance_df["importance"] = 0.75 * (importance_df["PAR"] / importance_df["scarcity_ratio"]) + 0.25 * (importance_df["dropoff"] / importance_df["scarcity_ratio"])
importance_df["importance"] = importance_df["importance"] / importance_df["importance"].max()
print(importance_df)


  position_group       PAR  scarcity_ratio   dropoff  slots  importance
0             QB  4.607334        0.263158  2.927625      1    0.760810
1             WR  5.054133        0.267857  3.576067      3    0.836216
2             RB  5.264448        0.253165  3.685603      2    0.919708
3             TE  3.145265        0.149254  3.050619      1    1.000000


In [209]:
def generate_projection_model(data, raw_model):
    X = data[passing_cols + weapon_cols +["fantasy_points"]]
    y = data["next_fantasy_points"]
    raw_model.fit(X, y)
    return raw_model

In [216]:
from sklearn.ensemble import GradientBoostingRegressor

proj_base = GradientBoostingRegressor()

WR_model = generate_projection_model(WR_data, proj_base)
WR_current["next_fantasy_points"] = WR_model.predict(WR_current[passing_cols + weapon_cols + ["fantasy_points"]])

RB_model = generate_projection_model(RB_data, proj_base)
RB_current["next_fantasy_points"] = RB_model.predict(RB_current[passing_cols + weapon_cols + ["fantasy_points"]])

QB_model = generate_projection_model(QB_data, proj_base)
QB_current["next_fantasy_points"] = QB_model.predict(QB_current[passing_cols + weapon_cols + ["fantasy_points"]])

TE_model = generate_projection_model(TE_data, proj_base)
TE_current["next_fantasy_points"] = TE_model.predict(TE_current[passing_cols + weapon_cols + ["fantasy_points"]])

ValueError: could not convert string to float: 'Danny Amendola'

In [211]:
import plotly.express as px
import pandas as pd

def get_feature_importance(model):
    df = pd.DataFrame({
        'feature': model.feature_names_in_,
        'importance': model.feature_importances_
    })

    # Sort by importance
    df = df.sort_values('importance', ascending=True)

    # Create horizontal bar plot
    fig = px.bar(df, 
                x='importance', 
                y='feature',
                orientation='h',
                title='Gradient Boosting Feature Importance',
                color='importance',
                color_continuous_scale='viridis')

    fig.update_layout(
        xaxis_title="Feature Importance",
        yaxis_title="Features",
        height=max(400, len(df) * 25),
        showlegend=False
    )

    fig.show()


In [212]:
QB_current = pd.merge(QB_current, importance_df)[["player_display_name","position_group", "next_fantasy_points", "importance"]]
RB_current = pd.merge(RB_current, importance_df)[["player_display_name", "position_group","next_fantasy_points", "importance"]]
WR_current = pd.merge(WR_current, importance_df)[["player_display_name", "position_group","next_fantasy_points", "importance"]]
TE_current = pd.merge(TE_current, importance_df)[["player_display_name", "position_group","next_fantasy_points", "importance"]]

In [213]:
big_board = pd.concat([QB_current, RB_current, WR_current, TE_current])

In [214]:
big_board["draft_value"] = big_board["next_fantasy_points"]  * big_board["importance"]

In [215]:
big_board

Unnamed: 0,player_display_name,position_group,next_fantasy_points,importance,draft_value
0,Aaron Rodgers,QB,13.487789,0.76081,10.261643
1,Joe Flacco,QB,10.885792,0.76081,8.282018
2,Matthew Stafford,QB,12.163614,0.76081,9.254198
3,Andy Dalton,QB,3.073344,0.76081,2.338230
4,Russell Wilson,QB,10.718058,0.76081,8.154405
...,...,...,...,...,...
82,Brevyn Spann-Ford,TE,3.683140,1.00000,3.683140
83,Tip Reiman,TE,2.925455,1.00000,2.925455
84,A.J. Barner,TE,4.439062,1.00000,4.439062
85,Erick All,TE,4.082135,1.00000,4.082135
