In [3]:
import pandas as pd  
import numpy as np  
import requests
from time import sleep

pd.set_option('display.max_colwidth', None)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)



In [12]:
LEAGUES = {
    2023: "994356486438477824",
    2024: "1064047033306136576",
    2025: "1180226065723957248"
}

BASE_URL = "https://api.sleeper.app/v1"

ROSTER_TO_OWNER = {
    1: "Jose",
    2: "Daryl",
    3: "Gio",
    4: "Brigido",
    5: "Luis",
    6: "Lalo",
    7: "Devonte",
    8: "Marvin",
    9: "Jacky",
    10: "Bryan"
}

In [5]:
def sleeper_get(endpoint):
    r = requests.get(f"{BASE_URL}{endpoint}")
    r.raise_for_status()
    return r.json()

In [7]:
print("Loading player metadata...")
players = sleeper_get("/players/nfl")

player_meta = {}
for pid, p in players.items():
    player_meta[pid] = {
        "player_name": p.get("full_name"),
        "position": p.get("position")
    }

Loading player metadata...


In [9]:
rows = []

for season, league_id in LEAGUES.items():
    print(f"\nProcessing season {season}")

    # ---- users → owner names
    users = sleeper_get(f"/league/{league_id}/users")
    user_map = {
        u["user_id"]: u["display_name"]
        for u in users
    }

    # ---- rosters → roster_id → owner
    rosters = sleeper_get(f"/league/{league_id}/rosters")
    roster_owner = {
        r["roster_id"]: user_map.get(r["owner_id"], "Unknown")
        for r in rosters
    }

    # ---- loop weeks (1–18 is safe; Sleeper returns empty if invalid)
    for week in range(1, 19):
        matchups = sleeper_get(f"/league/{league_id}/matchups/{week}")
        if not matchups:
            continue

        stats = sleeper_get(f"/stats/nfl/regular/{season}/{week}")

        for m in matchups:
            roster_id = m["roster_id"]
            owner = roster_owner.get(roster_id)

            starters = set(m.get("starters", []))
            players_all = m.get("players", [])

            for pid in players_all:
                slot = "starter" if pid in starters else "bench"

                rows.append({
                    "season": season,
                    "week": week,
                    "roster_id": roster_id,
                    "owner": owner,
                    "player_id": pid,
                    "player_name": player_meta.get(pid, {}).get("player_name"),
                    "position": player_meta.get(pid, {}).get("position"),
                    "slot": slot,
                    "fantasy_points": stats.get(pid, {}).get("pts_ppr", 0.0)
                })

        sleep(0.25)  # be kind to Sleeper


Processing season 2023

Processing season 2024

Processing season 2025


In [13]:
df = pd.DataFrame(rows)

# Optional: sort for readability
df = df.sort_values(
    ["season", "week", "roster_id", "slot", "fantasy_points"],
    ascending=[True, True, True, True, False]
)

df['owner'] = df['roster_id'].map(ROSTER_TO_OWNER)

print("\nExtraction complete")
print(df.head())


Extraction complete
    season  week  roster_id owner player_id     player_name position   slot  \
30    2023     1          1  Jose      6804     Jordan Love       QB  bench   
6     2023     1          1  Jose      8121     Romeo Doubs       WR  bench   
20    2023     1          1  Jose      4973    Hayden Hurst       TE  bench   
18    2023     1          1  Jose      1992  Allen Robinson       WR  bench   
0     2023     1          1  Jose      2251    Logan Thomas       TE  bench   

    fantasy_points  
30            23.0  
6             18.6  
20            15.1  
18            11.4  
0              8.3  


In [15]:
df.to_parquet('../../Data/Historical_Player_Performance.parquet')

In [24]:
df[df['owner'] == 'Jose'].head()

Unnamed: 0,season,week,roster_id,owner,player_id,player_name,position,slot,fantasy_points
30,2023,1,1,Jose,6804,Jordan Love,QB,bench,23.0
6,2023,1,1,Jose,8121,Romeo Doubs,WR,bench,18.6
20,2023,1,1,Jose,4973,Hayden Hurst,TE,bench,15.1
18,2023,1,1,Jose,1992,Allen Robinson,WR,bench,11.4
0,2023,1,1,Jose,2251,Logan Thomas,TE,bench,8.3


### Feature Engineering

In [17]:
starter_points = (
    df[df["slot"] == "starter"]
    .groupby(["season", "week", "owner"], as_index=False)["fantasy_points"]
    .sum()
    .rename(columns={"fantasy_points": "starter_fantasy_points"})
)


In [21]:
starter_points = starter_points.sort_values(["owner", "season", "week"])

starter_points["starter_points_4gm_avg"] = (
    starter_points
    .groupby("owner")["starter_fantasy_points"]
    .rolling(window=4, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)


In [26]:
def compute_best_expected_lineup(group):
    selected_ids = set()
    out = {}

    def take_top(df, n):
        chosen = df.sort_values("fantasy_points", ascending=False).head(n)
        selected_ids.update(chosen.index)
        return chosen["fantasy_points"].sum()

    # Core positions
    out["best_expected_qb"] = take_top(group[group["position"] == "QB"], 1)
    out["best_expected_rb"] = take_top(group[group["position"] == "RB"], 2)
    out["best_expected_wr"] = take_top(group[group["position"] == "WR"], 3)
    out["best_expected_te"] = take_top(group[group["position"] == "TE"], 1)

    # Remaining player pool
    remaining = group.loc[~group.index.isin(selected_ids)]

    # Flex: RB / WR / TE
    flex_pool = remaining[remaining["position"].isin(["RB", "WR", "TE"])]
    out["best_expected_flex"] = take_top(flex_pool, 2)

    # SuperFlex: QB / RB / WR / TE
    remaining = group.loc[~group.index.isin(selected_ids)]
    sflex_pool = remaining[remaining["position"].isin(["QB", "RB", "WR", "TE"])]
    out["best_expected_sflex"] = take_top(sflex_pool, 1)

    out["best_expected_total_points"] = sum(out.values())

    return pd.Series(out)


In [27]:
best_expected = (
    df
    .groupby(["season", "week", "owner"], group_keys=False)
    .apply(compute_best_expected_lineup)
    .reset_index()
)

  df


In [28]:
starter_points = starter_points.merge(
    best_expected,
    on=["season", "week", "owner"],
    how="left"
)

In [35]:
starter_points[starter_points['owner'] == "Jose"].head()

Unnamed: 0,season,week,owner,starter_fantasy_points,starter_points_4gm_avg,best_expected_qb,best_expected_rb,best_expected_wr,best_expected_te,best_expected_flex,best_expected_sflex,best_expected_total_points
324,2023,1,Jose,126.64,126.64,23.0,43.4,56.2,15.1,18.6,20.54,176.84
325,2023,2,Jose,117.2,121.92,22.2,26.1,60.86,10.2,26.0,20.34,165.7
326,2023,3,Jose,169.44,137.76,25.68,31.8,62.9,16.0,26.0,25.26,187.64
327,2023,4,Jose,126.36,134.91,19.64,20.1,72.7,14.7,28.2,15.22,170.56
328,2023,5,Jose,122.32,133.83,19.24,24.3,53.18,27.7,32.3,15.72,172.44


In [34]:
df[(df['owner'] == 'Jose') & (df['week'] == 1) & (df['season'] == 2023)].sort_values(['position', 'fantasy_points'])

Unnamed: 0,season,week,roster_id,owner,player_id,player_name,position,slot,fantasy_points
2,2023,1,1,Jose,2307,Marcus Mariota,QB,bench,0.0
14,2023,1,1,Jose,2306,Jameis Winston,QB,bench,0.0
17,2023,1,1,Jose,2028,Derek Carr,QB,starter,15.6
26,2023,1,1,Jose,4046,Patrick Mahomes,QB,starter,20.54
30,2023,1,1,Jose,6804,Jordan Love,QB,bench,23.0
31,2023,1,1,Jose,5284,Jeff Wilson,RB,bench,0.0
21,2023,1,1,Jose,10219,Chris Rodriguez,RB,bench,0.7
1,2023,1,1,Jose,9753,Zach Charbonnet,RB,bench,1.1
23,2023,1,1,Jose,2161,Jerick McKinnon,RB,bench,2.0
13,2023,1,1,Jose,1476,Latavius Murray,RB,bench,2.7
