# 🏀 HoopSim Quickstart
This notebook verifies your environment, grabs sample NBA data, and walks through the first steps of the project.

## 1) Environment Check

In [None]:
import sys, platform
print("Python:", sys.version)
print("Platform:", platform.platform())
for pkg in ["pandas","numpy","matplotlib","sklearn","tqdm"]:
    try:
        __import__(pkg)
        print(f"{pkg}: OK")
    except Exception as e:
        print(f"{pkg}: MISSING -> {e}")


## 2) Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
pd.set_option("display.max_columns", 50)


## 3) Load Historical Games (FiveThirtyEight Elo)

In [None]:
# You need internet for this cell. If running offline, skip and load a local copy of nbaallelo.csv
URL = "https://raw.githubusercontent.com/fivethirtyeight/data/master/nba-elo/nbaallelo.csv"
nba = pd.read_csv(URL)
nba.head()


## 4) Compute Last Season End-of-Year Elo per Team

In [None]:
season = 2023  # change as needed
season_data = nba[nba["year_id"] == season].copy()
last_games = season_data.sort_values(["team_id","date_game"]).groupby("team_id").tail(1)
last_elo = last_games[["team_id","elo_n"]].rename(columns={"team_id":"team","elo_n":"last_elo"})
last_elo.sort_values("last_elo", ascending=False).head(10)


## 5) Visualize Top Teams by Elo

In [None]:
top = last_elo.sort_values("last_elo", ascending=False).head(10)
plt.figure(figsize=(6,4))
plt.barh(top["team"], top["last_elo"])
plt.gca().invert_yaxis()
plt.xlabel("End of Season Elo")
plt.title(f"Top Teams — {season}")
plt.show()


## 6) Build Hybrid Start Ratings (Regressed Elo + Roster Metric)
**Option A:** Run this with Basketball-Reference Scraper (requires internet) — or paste a precomputed CSV.

In [None]:
# If you have internet:
# from basketball_reference_scraper.teams import get_roster_stats
# teams_abr = ['ATL','BOS','BRK','CHI','CHO','CLE','DAL','DEN','DET','GSW','HOU','IND','LAC','LAL','MEM',
#              'MIA','MIL','MIN','NOP','NYK','OKC','ORL','PHI','PHO','POR','SAC','SAS','TOR','UTA','WAS']
# season_end_year = 2024
# rosters = []
# for t in teams_abr:
#     df = get_roster_stats(t, season_end_year=season_end_year)
#     df['TEAM'] = t
#     rosters.append(df)
# roster = pd.concat(rosters, ignore_index=True)

# For offline demo, create a tiny fake roster strength table (replace with real data)
player_strength = pd.DataFrame({
    "team": last_elo["team"].sample(min(10, len(last_elo)), random_state=42).values,
    "player_rating": np.random.normal(1500, 80, size=min(10, len(last_elo)))
})

LEAGUE_AVG = 1500
W_DECAY = 0.35
last_elo["regressed_elo"] = (1 - W_DECAY) * last_elo["last_elo"] + W_DECAY * LEAGUE_AVG

hybrid = pd.merge(last_elo, player_strength, on="team", how="left")
hybrid["player_rating"] = hybrid["player_rating"].fillna(LEAGUE_AVG)
hybrid["start_rating"] = 0.5 * hybrid["regressed_elo"] + 0.5 * hybrid["player_rating"]
hybrid["start_rating"] = 1500 + (hybrid["start_rating"] - hybrid["start_rating"].mean())
hybrid.sort_values("start_rating", ascending=False).head(10)


## 7) Save Clean Start Ratings CSV

In [None]:
import os
os.makedirs("data", exist_ok=True)
hybrid.to_csv("data/team_start_ratings.csv", index=False)
print("Saved -> data/team_start_ratings.csv")


## 8) Next Steps
- Implement season schedule download (`data.nba.net`)
- Write `simulate_regular_season` and `simulate_series`
- Run Monte Carlo and plot championship odds
- Backtest on past seasons