In [1]:
import pandas as pd  
import numpy as np  
import requests
from time import sleep

pd.set_option('display.max_colwidth', None)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)



In [2]:
LEAGUES = {
    2023: "994356486438477824",
    2024: "1064047033306136576",
    2025: "1180226065723957248"
}

BASE_URL = "https://api.sleeper.app/v1"

ROSTER_TO_OWNER = {
    1: "Jose",
    2: "Daryl",
    3: "Gio",
    4: "Brigido",
    5: "Luis",
    6: "Lalo",
    7: "Devonte",
    8: "Marvin",
    9: "Jacky",
    10: "Bryan"
}

In [3]:
def sleeper_get(endpoint):
    r = requests.get(f"{BASE_URL}{endpoint}")
    r.raise_for_status()
    return r.json()

In [4]:
print("Loading player metadata...")
players = sleeper_get("/players/nfl")

player_meta = {}
for pid, p in players.items():
    player_meta[pid] = {
        "player_name": p.get("full_name"),
        "position": p.get("position")
    }

Loading player metadata...


In [5]:
rows = []

for season, league_id in LEAGUES.items():
    print(f"\nProcessing season {season}...")

    # ---- users → owner names
    users = sleeper_get(f"/league/{league_id}/users")
    user_map = {u["user_id"]: u["display_name"] for u in users}

    # ---- rosters → roster_id → owner
    rosters = sleeper_get(f"/league/{league_id}/rosters")
    roster_owner = {r["roster_id"]: user_map.get(r["owner_id"], "Unknown") for r in rosters}

    # ---- loop weeks (1–18)
    for week in range(1, 19):
        matchups = sleeper_get(f"/league/{league_id}/matchups/{week}")
        if not matchups:
            continue

        for m in matchups:
            roster_id = m["roster_id"]
            owner = roster_owner.get(roster_id, "Unknown")
            
            # Set of starters for this roster
            starters = set(m.get("starters", []))
            # Players in this roster for the week
            players_all = m.get("players", [])
            # Sleeper stores fantasy points assigned in this matchup (league-specific)
            points_map = m.get("players_points", {})

            for pid in players_all:
                slot = "starter" if pid in starters else "bench"
                rows.append({
                    "season": season,
                    "week": week,
                    "roster_id": roster_id,
                    "owner": owner,
                    "player_id": pid,
                    "player_name": player_meta.get(pid, {}).get("player_name"),
                    "position": player_meta.get(pid, {}).get("position"),
                    "slot": slot,
                    "fantasy_points": points_map.get(pid, 0.0)
                })

        sleep(0.25)  # avoid hitting Sleeper too quickly


Processing season 2023...

Processing season 2024...

Processing season 2025...


In [6]:
df = pd.DataFrame(rows)

# Optional: sort for readability
df = df.sort_values(
    ["season", "week", "roster_id", "slot", "fantasy_points"],
    ascending=[True, True, True, True, False]
).reset_index(drop=True)

# Map owner names from roster_id (in case roster_id mapping is needed)
df['owner'] = df['roster_id'].map(ROSTER_TO_OWNER)

print("\nExtraction complete")
print(df.head())


Extraction complete
   season  week  roster_id owner player_id     player_name position   slot  \
0    2023     1          1  Jose      6804     Jordan Love       QB  bench   
1    2023     1          1  Jose      8121     Romeo Doubs       WR  bench   
2    2023     1          1  Jose      4973    Hayden Hurst       TE  bench   
3    2023     1          1  Jose      1992  Allen Robinson       WR  bench   
4    2023     1          1  Jose      2251    Logan Thomas       TE  bench   

   fantasy_points  
0            29.0  
1            18.6  
2            17.6  
3            11.4  
4            10.3  


In [7]:
df[(df['owner'] == 'Jose') & (df['season'] == 2023) & (df['week'] == 1)].sort_values(['position'])

Unnamed: 0,season,week,roster_id,owner,player_id,player_name,position,slot,fantasy_points
0,2023,1,1,Jose,6804,Jordan Love,QB,bench,29.0
26,2023,1,1,Jose,2028,Derek Carr,QB,starter,16.6
25,2023,1,1,Jose,4046,Patrick Mahomes,QB,starter,23.54
19,2023,1,1,Jose,2306,Jameis Winston,QB,bench,0.0
17,2023,1,1,Jose,2307,Marcus Mariota,QB,bench,0.0
28,2023,1,1,Jose,4018,Joe Mixon,RB,starter,8.8
27,2023,1,1,Jose,4988,Nick Chubb,RB,starter,14.7
24,2023,1,1,Jose,4199,Aaron Jones,RB,starter,25.7
21,2023,1,1,Jose,5284,Jeff Wilson,RB,bench,0.0
31,2023,1,1,Jose,7551,Deon Jackson,RB,starter,1.3


In [8]:
df[df['owner'] == 'Jose'].sort_values(['season', 'owner','week', 'position']).head(28)

Unnamed: 0,season,week,roster_id,owner,player_id,player_name,position,slot,fantasy_points
0,2023,1,1,Jose,6804,Jordan Love,QB,bench,29.0
17,2023,1,1,Jose,2307,Marcus Mariota,QB,bench,0.0
19,2023,1,1,Jose,2306,Jameis Winston,QB,bench,0.0
25,2023,1,1,Jose,4046,Patrick Mahomes,QB,starter,23.54
26,2023,1,1,Jose,2028,Derek Carr,QB,starter,16.6
7,2023,1,1,Jose,9225,Tank Bigsby,RB,bench,5.3
8,2023,1,1,Jose,5248,Gus Edwards,RB,bench,5.2
9,2023,1,1,Jose,8228,Jaylen Warren,RB,bench,4.3
12,2023,1,1,Jose,7607,Michael Carter,RB,bench,2.8
13,2023,1,1,Jose,1476,Latavius Murray,RB,bench,2.2


In [9]:
df.to_parquet('../../Data/Historical_Player_Performance.parquet')

In [10]:
df[df['owner'] == 'Jose'].head()

Unnamed: 0,season,week,roster_id,owner,player_id,player_name,position,slot,fantasy_points
0,2023,1,1,Jose,6804,Jordan Love,QB,bench,29.0
1,2023,1,1,Jose,8121,Romeo Doubs,WR,bench,18.6
2,2023,1,1,Jose,4973,Hayden Hurst,TE,bench,17.6
3,2023,1,1,Jose,1992,Allen Robinson,WR,bench,11.4
4,2023,1,1,Jose,2251,Logan Thomas,TE,bench,10.3


### Feature Engineering

In [11]:
starter_points = (
    df[df["slot"] == "starter"]
    .groupby(["season", "week", "owner"], as_index=False)["fantasy_points"]
    .sum()
    .rename(columns={"fantasy_points": "starter_fantasy_points"})
)


In [12]:
starter_points = starter_points.sort_values(["owner", "season", "week"])

starter_points["starter_points_4gm_avg"] = (
    starter_points
    .groupby("owner")["starter_fantasy_points"]
    .rolling(window=4, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)


In [13]:
def compute_best_expected_lineup(group):
    selected_ids = set()
    out = {}

    def take_top(df, n):
        chosen = df.sort_values("fantasy_points", ascending=False).head(n)
        selected_ids.update(chosen.index)
        return chosen["fantasy_points"].sum()

    # Core positions
    out["best_expected_qb"] = take_top(group[group["position"] == "QB"], 1)
    out["best_expected_rb"] = take_top(group[group["position"] == "RB"], 2)
    out["best_expected_wr"] = take_top(group[group["position"] == "WR"], 3)
    out["best_expected_te"] = take_top(group[group["position"] == "TE"], 1)

    # Remaining player pool
    remaining = group.loc[~group.index.isin(selected_ids)]

    # Flex: RB / WR / TE
    flex_pool = remaining[remaining["position"].isin(["RB", "WR", "TE"])]
    out["best_expected_flex"] = take_top(flex_pool, 2)

    # SuperFlex: QB / RB / WR / TE
    remaining = group.loc[~group.index.isin(selected_ids)]
    sflex_pool = remaining[remaining["position"].isin(["QB", "RB", "WR", "TE"])]
    out["best_expected_sflex"] = take_top(sflex_pool, 1)

    out["best_expected_total_points"] = sum(out.values())

    return pd.Series(out)


In [14]:
best_expected = (
    df
    .groupby(["season", "week", "owner"], group_keys=False)
    .apply(compute_best_expected_lineup)
    .reset_index()
)

  df


In [15]:
starter_points = starter_points.merge(
    best_expected,
    on=["season", "week", "owner"],
    how="left"
)

In [16]:
starter_points[starter_points['owner'] == "Jose"].head()

Unnamed: 0,season,week,owner,starter_fantasy_points,starter_points_4gm_avg,best_expected_qb,best_expected_rb,best_expected_wr,best_expected_te,best_expected_flex,best_expected_sflex,best_expected_total_points
324,2023,1,Jose,125.14,125.14,29.0,40.4,56.2,17.6,19.1,23.54,185.84
325,2023,2,Jose,127.2,126.17,26.34,23.7,60.86,11.6,26.0,25.2,173.7
326,2023,3,Jose,179.44,143.926667,31.68,29.8,62.9,19.5,27.7,27.54,199.12
327,2023,4,Jose,125.86,139.41,19.64,16.6,72.7,18.2,28.2,15.22,170.56
328,2023,5,Jose,124.32,139.205,23.24,20.8,53.18,29.2,36.8,19.72,182.94


In [17]:
df[(df['owner'] == 'Jose') & (df['week'] == 1) & (df['season'] == 2023)].sort_values(['position', 'fantasy_points'])

Unnamed: 0,season,week,roster_id,owner,player_id,player_name,position,slot,fantasy_points
17,2023,1,1,Jose,2307,Marcus Mariota,QB,bench,0.0
19,2023,1,1,Jose,2306,Jameis Winston,QB,bench,0.0
26,2023,1,1,Jose,2028,Derek Carr,QB,starter,16.6
25,2023,1,1,Jose,4046,Patrick Mahomes,QB,starter,23.54
0,2023,1,1,Jose,6804,Jordan Love,QB,bench,29.0
21,2023,1,1,Jose,5284,Jeff Wilson,RB,bench,0.0
16,2023,1,1,Jose,10219,Chris Rodriguez,RB,bench,0.7
15,2023,1,1,Jose,9753,Zach Charbonnet,RB,bench,1.1
31,2023,1,1,Jose,7551,Deon Jackson,RB,starter,1.3
14,2023,1,1,Jose,2161,Jerick McKinnon,RB,bench,1.5


In [18]:
df.to_parquet('../../Data/Historical_Player_Performance.parquet')

### Positional Depth

In [19]:
DEPTH_THRESHOLDS = {
    "QB": 10,
    "RB": 10,
    "WR": 12,
    "TE": 8
}


In [20]:
depth_features = []

for pos, threshold in DEPTH_THRESHOLDS.items():
    tmp = (
        df[df["position"] == pos]
        .assign(above_threshold=lambda x: x["fantasy_points"] >= threshold)
        .groupby(["owner", "season", "week"])["above_threshold"]
        .sum()
        .rename(f"{pos.lower()}_count_{threshold}plus")
    )
    depth_features.append(tmp)

df_depth = pd.concat(depth_features, axis=1).reset_index()


In [21]:
df_depth.head()

Unnamed: 0,owner,season,week,qb_count_10plus,rb_count_10plus,wr_count_12plus,te_count_8plus
0,Brigido,2023,1,3,4,4,0
1,Brigido,2023,2,3,4,3,3
2,Brigido,2023,3,2,4,5,3
3,Brigido,2023,4,2,3,3,2
4,Brigido,2023,5,1,1,1,1


### Drop Off Features

In [22]:
def compute_dropoff(df, group_cols, value_col, n1, n2, feature_name):
    ranked = (
        df.sort_values(value_col, ascending=False)
          .groupby(group_cols)
          .head(n2)
          .assign(rank=lambda x: x.groupby(group_cols).cumcount() + 1)
    )

    wide = ranked.pivot_table(
        index=group_cols,
        columns="rank",
        values=value_col
    )

    dropoff = (wide[n1] - wide[n2]).rename(feature_name)
    return dropoff.reset_index()


In [23]:
rb_dropoff = compute_dropoff(
    df=df[df["position"] == "RB"],
    group_cols=["owner", "season", "week"],
    value_col="fantasy_points",
    n1=2,
    n2=3,
    feature_name="rb_dropoff_2_to_3"
)


In [24]:
wr_dropoff = compute_dropoff(
    df=df[df["position"] == "WR"],
    group_cols=["owner", "season", "week"],
    value_col="fantasy_points",
    n1=3,
    n2=4,
    feature_name="wr_dropoff_3_to_4"
)


In [25]:
wr_dropoff.head()

Unnamed: 0,owner,season,week,wr_dropoff_3_to_4
0,Brigido,2023,1,3.3
1,Brigido,2023,2,3.0
2,Brigido,2023,3,1.3
3,Brigido,2023,4,4.9
4,Brigido,2023,5,1.4


In [26]:
flex_dropoff = compute_dropoff(
    df=df[df["position"].isin(["RB", "WR", "TE"])],
    group_cols=["owner", "season", "week"],
    value_col="fantasy_points",
    n1=2,
    n2=3,
    feature_name="flex_dropoff"
)


In [27]:
qb_best = (
    df[df["position"] == "QB"]
    .groupby(["owner", "season", "week"])["fantasy_points"]
    .max()
    .rename("best_qb_points")
)

non_qb_best = (
    df[df["position"].isin(["RB", "WR", "TE"])]
    .groupby(["owner", "season", "week"])["fantasy_points"]
    .max()
    .rename("best_non_qb_points")
)

sflex_gap = (
    (qb_best - non_qb_best)
    .rename("sflex_qb_vs_nonqb_gap")
    .reset_index()
)


In [28]:
from functools import reduce

dfs = [
    df_depth,
    rb_dropoff,
    wr_dropoff,
    flex_dropoff,
    sflex_gap
]

df_positional_features = reduce(
    lambda left, right: left.merge(
        right, on=["owner", "season", "week"], how="left"
    ),
    dfs
)

df_positional_features = df_positional_features.fillna(0)


In [29]:
df_positional_features.shape

(540, 11)

In [30]:
df_positional_features.head()

Unnamed: 0,owner,season,week,qb_count_10plus,rb_count_10plus,wr_count_12plus,te_count_8plus,rb_dropoff_2_to_3,wr_dropoff_3_to_4,flex_dropoff,sflex_qb_vs_nonqb_gap
0,Brigido,2023,1,3,4,4,0,1.7,3.3,1.1,-3.94
1,Brigido,2023,2,3,4,3,3,7.7,3.0,0.7,8.66
2,Brigido,2023,3,2,4,5,3,1.4,1.3,4.3,3.78
3,Brigido,2023,4,2,3,3,2,2.1,4.9,9.9,2.34
4,Brigido,2023,5,1,1,1,1,0.1,1.4,13.9,-8.24


In [31]:
starter_points.shape

(540, 12)

In [32]:
starter_points.head()

Unnamed: 0,season,week,owner,starter_fantasy_points,starter_points_4gm_avg,best_expected_qb,best_expected_rb,best_expected_wr,best_expected_te,best_expected_flex,best_expected_sflex,best_expected_total_points
0,2023,1,Brigido,150.14,150.14,20.46,47.3,59.7,7.7,34.4,20.02,189.58
1,2023,2,Brigido,167.54,158.84,36.56,53.5,56.9,14.6,34.3,27.12,222.98
2,2023,3,Brigido,177.62,165.1,31.68,27.9,65.4,23.4,33.3,12.2,193.88
3,2023,4,Brigido,146.8,160.525,28.94,30.1,69.7,15.0,23.9,13.56,181.2
4,2023,5,Brigido,131.06,155.755,23.86,20.2,54.0,27.7,15.5,5.8,147.06


In [33]:
owner_weekly_points = starter_points.merge(df_positional_features, 
                     how = 'left', 
                     on =  ['owner', 'season', 'week'])

In [34]:
owner_weekly_points['lineup_efficiency_ratio'] = owner_weekly_points['starter_fantasy_points']/owner_weekly_points['best_expected_total_points']

In [35]:
owner_weekly_points = owner_weekly_points[owner_weekly_points['week'] != 18]

In [36]:
owner_weekly_points["weekly_ranking"] = (
    owner_weekly_points
    .groupby(["season", "week"])["starter_fantasy_points"]
    .rank(method="dense", ascending=False)
    .astype(int)
)


### Adding Weekly Win/Loss Columns

In [37]:
weekly_scores = (
    df[df["slot"] == "starter"]
    .groupby(["season", "week", "owner"], as_index=False)
    .agg(weekly_points=("fantasy_points", "sum"))
)

In [38]:
matchup_rows = []

for season, league_id in LEAGUES.items():
    for week in range(1, 19):
        matchups = sleeper_get(f"/league/{league_id}/matchups/{week}")
        if not matchups:
            continue

        for m in matchups:
            matchup_rows.append({
                "season": season,
                "week": week,
                "roster_id": m["roster_id"],
                "matchup_id": m["matchup_id"]
            })

matchups_df = pd.DataFrame(matchup_rows)

# Map roster_id → owner
matchups_df["owner"] = matchups_df["roster_id"].map(ROSTER_TO_OWNER)


In [39]:
weekly_scores = weekly_scores.merge(
    matchups_df[["season", "week", "owner", "matchup_id"]],
    on=["season", "week", "owner"],
    how="left"
)


In [40]:
weekly_scores["weekly_win"] = 0
weekly_scores["weekly_loss"] = 0
weekly_scores["weekly_tie"] = 0

for (season, week, matchup_id), grp in weekly_scores.groupby(
    ["season", "week", "matchup_id"]
):
    if len(grp) != 2:
        continue  # safety (byes, edge cases)

    idx = grp.index
    p1, p2 = grp["weekly_points"].values

    if p1 > p2:
        weekly_scores.loc[idx[0], "weekly_win"] = 1
        weekly_scores.loc[idx[1], "weekly_loss"] = 1
    elif p2 > p1:
        weekly_scores.loc[idx[1], "weekly_win"] = 1
        weekly_scores.loc[idx[0], "weekly_loss"] = 1
    else:
        weekly_scores.loc[idx, "weekly_tie"] = 1

In [41]:
weekly_scores = weekly_scores.sort_values(
    ["season", "owner", "week"]
)

weekly_scores["cumulative_wins"] = (
    weekly_scores.groupby(["season", "owner"])["weekly_win"].cumsum()
)

weekly_scores["cumulative_losses"] = (
    weekly_scores.groupby(["season", "owner"])["weekly_loss"].cumsum()
)

weekly_scores["games_played"] = (
    weekly_scores["cumulative_wins"] +
    weekly_scores["cumulative_losses"] +
    weekly_scores.groupby(["season", "owner"])["weekly_tie"].cumsum()
)

weekly_scores["win_pct"] = (
    weekly_scores["cumulative_wins"] / weekly_scores["games_played"]
).fillna(0)


In [42]:
weekly_scores.head()

Unnamed: 0,season,week,owner,weekly_points,matchup_id,weekly_win,weekly_loss,weekly_tie,cumulative_wins,cumulative_losses,games_played,win_pct
0,2023,1,Brigido,150.14,1.0,1,0,0,1,0,1,1.0
10,2023,2,Brigido,167.54,1.0,1,0,0,2,0,2,1.0
20,2023,3,Brigido,177.62,1.0,1,0,0,3,0,3,1.0
30,2023,4,Brigido,146.8,1.0,1,0,0,4,0,4,1.0
40,2023,5,Brigido,131.06,1.0,0,1,0,4,1,5,0.8


In [43]:
# Ensure proper ordering for cumulative calcs
weekly_scores = weekly_scores.sort_values(
    ["season", "owner", "week"]
)

# ---- cumulative points (season-to-date)
weekly_scores["cumulative_points"] = (
    weekly_scores
    .groupby(["season", "owner"])["weekly_points"]
    .cumsum()
)

In [44]:
weekly_scores = weekly_scores.sort_values(
    [
        "season",
        "week",
        "cumulative_wins",
        "win_pct",
        "cumulative_points"
    ],
    ascending=[True, True, False, False, False]
)

In [45]:
weekly_scores["cumulative_season_rank"] = (
    weekly_scores
    .groupby(["season", "week"])
    .cumcount() + 1
)

In [46]:
weekly_scores.head(20)

Unnamed: 0,season,week,owner,weekly_points,matchup_id,weekly_win,weekly_loss,weekly_tie,cumulative_wins,cumulative_losses,games_played,win_pct,cumulative_points,cumulative_season_rank
9,2023,1,Marvin,167.1,2.0,1,0,0,1,0,1,1.0,167.1,1
0,2023,1,Brigido,150.14,1.0,1,0,0,1,0,1,1.0,150.14,2
6,2023,1,Jose,125.14,4.0,1,0,0,1,0,1,1.0,125.14,3
3,2023,1,Devonte,113.28,5.0,1,0,0,1,0,1,1.0,113.28,4
8,2023,1,Luis,105.7,3.0,1,0,0,1,0,1,1.0,105.7,5
1,2023,1,Bryan,125.2,1.0,0,1,0,0,1,1,0.0,125.2,6
7,2023,1,Lalo,115.1,4.0,0,1,0,0,1,1,0.0,115.1,7
5,2023,1,Jacky,108.78,2.0,0,1,0,0,1,1,0.0,108.78,8
2,2023,1,Daryl,101.66,3.0,0,1,0,0,1,1,0.0,101.66,9
4,2023,1,Gio,92.34,5.0,0,1,0,0,1,1,0.0,92.34,10


In [47]:
owner_weekly_points = owner_weekly_points.merge(
    weekly_scores[[
        "season",
        "week",
        "owner",
        "weekly_win",
        "weekly_loss",
        "weekly_tie",
        "cumulative_wins",
        "cumulative_losses",
        "win_pct",
        "cumulative_points",
        "cumulative_season_rank"
    ]],
    on=["season", "week", "owner"],
    how="left"
)

In [48]:
owner_weekly_points.head()

Unnamed: 0,season,week,owner,starter_fantasy_points,starter_points_4gm_avg,best_expected_qb,best_expected_rb,best_expected_wr,best_expected_te,best_expected_flex,best_expected_sflex,best_expected_total_points,qb_count_10plus,rb_count_10plus,wr_count_12plus,te_count_8plus,rb_dropoff_2_to_3,wr_dropoff_3_to_4,flex_dropoff,sflex_qb_vs_nonqb_gap,lineup_efficiency_ratio,weekly_ranking,weekly_win,weekly_loss,weekly_tie,cumulative_wins,cumulative_losses,win_pct,cumulative_points,cumulative_season_rank
0,2023,1,Brigido,150.14,150.14,20.46,47.3,59.7,7.7,34.4,20.02,189.58,3,4,4,0,1.7,3.3,1.1,-3.94,0.791961,2,1,0,0,1,0,1.0,150.14,2
1,2023,2,Brigido,167.54,158.84,36.56,53.5,56.9,14.6,34.3,27.12,222.98,3,4,3,3,7.7,3.0,0.7,8.66,0.751368,2,1,0,0,2,0,1.0,317.68,1
2,2023,3,Brigido,177.62,165.1,31.68,27.9,65.4,23.4,33.3,12.2,193.88,2,4,5,3,1.4,1.3,4.3,3.78,0.916134,4,1,0,0,3,0,1.0,495.3,1
3,2023,4,Brigido,146.8,160.525,28.94,30.1,69.7,15.0,23.9,13.56,181.2,2,3,3,2,2.1,4.9,9.9,2.34,0.810155,4,1,0,0,4,0,1.0,642.1,2
4,2023,5,Brigido,131.06,155.755,23.86,20.2,54.0,27.7,15.5,5.8,147.06,1,1,1,1,0.1,1.4,13.9,-8.24,0.891201,7,0,1,0,4,1,0.8,773.16,2


In [49]:
owner_weekly_points[owner_weekly_points['owner'] == 'Jose']

Unnamed: 0,season,week,owner,starter_fantasy_points,starter_points_4gm_avg,best_expected_qb,best_expected_rb,best_expected_wr,best_expected_te,best_expected_flex,best_expected_sflex,best_expected_total_points,qb_count_10plus,rb_count_10plus,wr_count_12plus,te_count_8plus,rb_dropoff_2_to_3,wr_dropoff_3_to_4,flex_dropoff,sflex_qb_vs_nonqb_gap,lineup_efficiency_ratio,weekly_ranking,weekly_win,weekly_loss,weekly_tie,cumulative_wins,cumulative_losses,win_pct,cumulative_points,cumulative_season_rank
306,2023,1,Jose,125.14,125.14,29.0,40.4,56.2,17.6,19.1,23.54,185.84,3,2,2,2,5.9,5.0,7.1,2.8,0.673375,4,1,0,0,1,0,1.0,125.14,3
307,2023,2,Jose,127.2,126.17,26.34,23.7,60.86,11.6,26.0,25.2,173.7,2,4,5,2,0.9,5.1,1.5,4.38,0.732297,8,0,1,0,1,1,0.5,252.34,7
308,2023,3,Jose,179.44,143.926667,31.68,29.8,62.9,19.5,27.7,27.54,199.12,4,2,4,2,6.5,2.9,0.4,6.18,0.901165,3,1,0,0,2,1,0.666667,431.78,4
309,2023,4,Jose,125.86,139.41,19.64,16.6,72.7,18.2,28.2,15.22,170.56,2,0,4,2,2.2,1.8,0.3,-16.36,0.737922,7,1,0,0,3,1,0.75,557.64,3
310,2023,5,Jose,124.32,139.205,23.24,20.8,53.18,29.2,36.8,19.72,182.94,2,1,3,2,4.6,2.8,1.1,-5.96,0.679567,8,0,1,0,3,2,0.6,681.96,5
311,2023,6,Jose,120.06,137.42,19.34,14.9,55.4,3.1,14.6,18.32,125.66,2,0,3,0,1.4,5.1,6.1,-2.16,0.955435,7,0,1,0,3,3,0.5,802.02,6
312,2023,7,Jose,152.96,130.8,41.86,30.5,42.7,15.5,22.5,19.44,172.5,3,1,2,2,2.4,0.0,3.5,20.96,0.886725,5,0,1,0,3,4,0.428571,954.98,7
313,2023,8,Jose,132.26,132.4,22.3,46.9,54.8,28.4,36.1,16.56,205.06,2,2,2,3,11.1,1.3,3.3,-6.1,0.644982,6,0,1,0,3,5,0.375,1087.24,7
314,2023,9,Jose,168.24,143.38,20.74,33.1,48.5,25.6,28.1,19.8,175.84,3,4,1,3,0.6,1.8,8.4,-8.86,0.956779,2,1,0,0,4,5,0.444444,1255.48,7
315,2023,10,Jose,136.36,147.455,20.66,29.5,47.7,22.1,26.7,12.88,159.54,2,2,4,3,2.2,2.3,0.9,-1.44,0.854707,7,1,0,0,5,5,0.5,1391.84,6


In [50]:
owner_weekly_points.to_parquet('../../Data/Historical_Player_Performance.parquet')

### We can't use all featuers in modeling so we will make adjustments

In [51]:
cols_to_drop = [    
'starter_points_4gm_avg',          
'best_expected_qb',                
'best_expected_rb',
'best_expected_wr',
'best_expected_te',
'best_expected_flex',
'best_expected_sflex',
'best_expected_total_points',
'qb_count_10plus',
'rb_count_10plus',
'wr_count_12plus',
'te_count_8plus',
'rb_dropoff_2_to_3',
'wr_dropoff_3_to_4',
'flex_dropoff',
'sflex_qb_vs_nonqb_gap',
'lineup_efficiency_ratio',
'weekly_ranking',
'weekly_win',
'weekly_loss',
'weekly_tie',
'cumulative_wins',
'cumulative_losses',
'win_pct',
'cumulative_points',
'cumulative_season_rank']

In [52]:
# Make sure the data is sorted
owner_weekly_points = owner_weekly_points.sort_values(['owner', 'season', 'week'])

# List of lags we want
lags = [1, 2, 3, 4]

# Create lagged starter points
for lag in lags:
    owner_weekly_points[f'starter_fp_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['starter_fantasy_points']
        .shift(lag)
    )

# Create rolling average (last 4 weeks, shifted by 1 to avoid leakage)
owner_weekly_points['starter_fp_4wk_avg_lag1'] = (
    owner_weekly_points.groupby('owner')['starter_fantasy_points']
    .rolling(4)
    .mean()
    .shift(1)
    .reset_index(level=0, drop=True)
)

# Optional: 8-week rolling average
owner_weekly_points['starter_fp_8wk_avg_lag1'] = (
    owner_weekly_points.groupby('owner')['starter_fantasy_points']
    .rolling(8)
    .mean()
    .shift(1)
    .reset_index(level=0, drop=True)
)

# Optional: 4-week standard deviation for volatility
owner_weekly_points['starter_fp_4wk_std_lag1'] = (
    owner_weekly_points.groupby('owner')['starter_fantasy_points']
    .rolling(4)
    .std()
    .shift(1)
    .reset_index(level=0, drop=True)
)


In [53]:
owner_weekly_points.head()

Unnamed: 0,season,week,owner,starter_fantasy_points,starter_points_4gm_avg,best_expected_qb,best_expected_rb,best_expected_wr,best_expected_te,best_expected_flex,best_expected_sflex,best_expected_total_points,qb_count_10plus,rb_count_10plus,wr_count_12plus,te_count_8plus,rb_dropoff_2_to_3,wr_dropoff_3_to_4,flex_dropoff,sflex_qb_vs_nonqb_gap,lineup_efficiency_ratio,weekly_ranking,weekly_win,weekly_loss,weekly_tie,cumulative_wins,cumulative_losses,win_pct,cumulative_points,cumulative_season_rank,starter_fp_lag_1,starter_fp_lag_2,starter_fp_lag_3,starter_fp_lag_4,starter_fp_4wk_avg_lag1,starter_fp_8wk_avg_lag1,starter_fp_4wk_std_lag1
0,2023,1,Brigido,150.14,150.14,20.46,47.3,59.7,7.7,34.4,20.02,189.58,3,4,4,0,1.7,3.3,1.1,-3.94,0.791961,2,1,0,0,1,0,1.0,150.14,2,,,,,,,
1,2023,2,Brigido,167.54,158.84,36.56,53.5,56.9,14.6,34.3,27.12,222.98,3,4,3,3,7.7,3.0,0.7,8.66,0.751368,2,1,0,0,2,0,1.0,317.68,1,150.14,,,,,,
2,2023,3,Brigido,177.62,165.1,31.68,27.9,65.4,23.4,33.3,12.2,193.88,2,4,5,3,1.4,1.3,4.3,3.78,0.916134,4,1,0,0,3,0,1.0,495.3,1,167.54,150.14,,,,,
3,2023,4,Brigido,146.8,160.525,28.94,30.1,69.7,15.0,23.9,13.56,181.2,2,3,3,2,2.1,4.9,9.9,2.34,0.810155,4,1,0,0,4,0,1.0,642.1,2,177.62,167.54,150.14,,,,
4,2023,5,Brigido,131.06,155.755,23.86,20.2,54.0,27.7,15.5,5.8,147.06,1,1,1,1,0.1,1.4,13.9,-8.24,0.891201,7,0,1,0,4,1,0.8,773.16,2,146.8,177.62,167.54,150.14,160.525,,14.579359


### Create Lagged Features For Modeling

In [54]:
# Lagged weekly results
for lag in [1,2,3,4]:
    owner_weekly_points[f'weekly_win_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['weekly_win'].shift(lag)
    )
    owner_weekly_points[f'weekly_loss_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['weekly_loss'].shift(lag)
    )
    owner_weekly_points[f'weekly_tie_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['weekly_tie'].shift(lag)
    )

# Rolling win percentage (last 4 weeks, excluding current week)
owner_weekly_points['win_pct_4wk_lag1'] = (
    owner_weekly_points.groupby('owner')['weekly_win']
    .rolling(4)
    .mean()
    .shift(1)
    .reset_index(level=0, drop=True)
)


In [55]:
for lag in [1,2,3,4]:
    owner_weekly_points[f'cum_wins_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['cumulative_wins'].shift(lag)
    )
    owner_weekly_points[f'cum_losses_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['cumulative_losses'].shift(lag)
    )
    owner_weekly_points[f'win_pct_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['win_pct'].shift(lag)
    )


In [56]:
owner_weekly_points['win_pct_change_1wk'] = (
    owner_weekly_points.groupby('owner')['win_pct'].diff(1)
)
owner_weekly_points['cum_wins_change_1wk'] = (
    owner_weekly_points.groupby('owner')['cumulative_wins'].diff(1)
)


In [57]:
for lag in [1,2,3,4]:
    owner_weekly_points[f'cum_points_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['cumulative_points'].shift(lag)
    )
    owner_weekly_points[f'season_rank_lag_{lag}'] = (
        owner_weekly_points.groupby('owner')['cumulative_season_rank'].shift(lag)
    )

# Rolling 4-week average of cumulative points (trend)
owner_weekly_points['cum_points_4wk_avg_lag1'] = (
    owner_weekly_points.groupby('owner')['cumulative_points']
    .rolling(4)
    .mean()
    .shift(1)
    .reset_index(level=0, drop=True)
)


In [58]:
owner_weekly_points = owner_weekly_points.drop(columns=cols_to_drop, errors='ignore')
owner_weekly_points.head()

Unnamed: 0,season,week,owner,starter_fantasy_points,starter_fp_lag_1,starter_fp_lag_2,starter_fp_lag_3,starter_fp_lag_4,starter_fp_4wk_avg_lag1,starter_fp_8wk_avg_lag1,starter_fp_4wk_std_lag1,weekly_win_lag_1,weekly_loss_lag_1,weekly_tie_lag_1,weekly_win_lag_2,weekly_loss_lag_2,weekly_tie_lag_2,weekly_win_lag_3,weekly_loss_lag_3,weekly_tie_lag_3,weekly_win_lag_4,weekly_loss_lag_4,weekly_tie_lag_4,win_pct_4wk_lag1,cum_wins_lag_1,cum_losses_lag_1,win_pct_lag_1,cum_wins_lag_2,cum_losses_lag_2,win_pct_lag_2,cum_wins_lag_3,cum_losses_lag_3,win_pct_lag_3,cum_wins_lag_4,cum_losses_lag_4,win_pct_lag_4,win_pct_change_1wk,cum_wins_change_1wk,cum_points_lag_1,season_rank_lag_1,cum_points_lag_2,season_rank_lag_2,cum_points_lag_3,season_rank_lag_3,cum_points_lag_4,season_rank_lag_4,cum_points_4wk_avg_lag1
0,2023,1,Brigido,150.14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2023,2,Brigido,167.54,150.14,,,,,,,1.0,0.0,0.0,,,,,,,,,,,1.0,0.0,1.0,,,,,,,,,,0.0,1.0,150.14,2.0,,,,,,,
2,2023,3,Brigido,177.62,167.54,150.14,,,,,,1.0,0.0,0.0,1.0,0.0,0.0,,,,,,,,2.0,0.0,1.0,1.0,0.0,1.0,,,,,,,0.0,1.0,317.68,1.0,150.14,2.0,,,,,
3,2023,4,Brigido,146.8,177.62,167.54,150.14,,,,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,,,,3.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,1.0,,,,0.0,1.0,495.3,1.0,317.68,1.0,150.14,2.0,,,
4,2023,5,Brigido,131.06,146.8,177.62,167.54,150.14,160.525,,14.579359,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,4.0,0.0,1.0,3.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,1.0,-0.2,0.0,642.1,2.0,495.3,1.0,317.68,1.0,150.14,2.0,401.305


In [59]:
owner_weekly_points.to_parquet('../../Data/Historical_Player_Performance_Final.parquet')