# ðŸ“ˆ Player Development â€” Cross-Season Tracking

## Key Findings
- **Most multi-season players improve** â€” the median PPG change is positive
- Players with **3+ seasons** show the clearest development trajectories
- The **top improvers** gain 5-10+ PPG over their careers
- Some players plateau or decline as they age up into harder divisions

---

In [None]:
%matplotlib inline
import sqlite3, re, pandas as pd, numpy as np
import matplotlib.pyplot as plt, seaborn as sns
import warnings; warnings.filterwarnings('ignore')

sns.set_theme(style="whitegrid"); plt.rcParams['figure.dpi'] = 120

SEASON_ORDER = {
    "Summer 2020/21": 0, "Winter 2021": 1, "Summer 2021/22": 2, "Winter 2022": 3,
    "Summer 2022/23": 4, "Winter 2023": 5, "Summer 2023/24": 6, "Winter 2024": 7,
    "Summer 2024/25": 8, "Winter 2025": 9, "Summer 2025/26": 10,
}

conn = sqlite3.connect("../data/playhq.db")
df = pd.read_sql("""
    SELECT p.id as pid, p.first_name || ' ' || p.last_name as name,
        s.name as season, ps.games_played as gp, ps.total_points as pts,
        ps.total_fouls as fouls, g.name as grade
    FROM player_stats ps
    JOIN players p ON p.id = ps.player_id
    JOIN grades g ON g.id = ps.grade_id
    JOIN seasons s ON s.id = g.season_id
    WHERE ps.games_played > 0
""", conn)
conn.close()

df["ppg"] = df["pts"] / df["gp"]
df["season_order"] = df["season"].map(SEASON_ORDER)

# Aggregate per player per season
sa = df.groupby(["pid", "name", "season"]).agg(
    gp=("gp", "sum"), pts=("pts", "sum"), fouls=("fouls", "sum"),
    season_order=("season_order", "first")
).reset_index()
sa["ppg"] = sa["pts"] / sa["gp"]

print(f"Total stat lines: {len(df):,}")
print(f"Player-seasons: {len(sa):,}")

## Players with 3+ Seasons

In [None]:
multi = sa.groupby("pid").filter(lambda x: x["season"].nunique() >= 3)
pids = multi["pid"].nunique()
print(f"Players with 3+ seasons: {pids:,}")

# Calculate firstâ†’last PPG change
devs = []
for pid, g in multi.groupby("pid"):
    g = g.sort_values("season_order")
    devs.append({
        "name": g.iloc[0]["name"],
        "first_ppg": round(g.iloc[0]["ppg"], 2),
        "last_ppg": round(g.iloc[-1]["ppg"], 2),
        "change": round(g.iloc[-1]["ppg"] - g.iloc[0]["ppg"], 2),
        "n_seasons": len(g),
        "total_gp": int(g["gp"].sum()),
    })

dev = pd.DataFrame(devs)
dev = dev[dev["total_gp"] >= 10]

print(f"With 10+ career games: {len(dev):,}")
print(f"\nPPG Change: mean={dev['change'].mean():+.2f}, median={dev['change'].median():+.2f}")
print(f"Improved: {(dev['change'] > 0).sum()} ({(dev['change'] > 0).mean()*100:.0f}%)")
print(f"Declined: {(dev['change'] < 0).sum()} ({(dev['change'] < 0).mean()*100:.0f}%)")

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle("Player Development Across Seasons", fontsize=15, fontweight="bold")

ax = axes[0]
ax.hist(dev["change"], bins=60, color="#1976D2", edgecolor="white", alpha=0.85)
ax.axvline(0, color="#D32F2F", ls="--", lw=2)
ax.axvline(dev["change"].mean(), color="#FF9800", ls="-", lw=2, label=f'Mean: {dev["change"].mean():+.2f}')
ax.set_title("PPG Change (First â†’ Last Season)"); ax.set_xlabel("PPG Change"); ax.set_ylabel("Players"); ax.legend()

ax = axes[1]
ax.scatter(dev["n_seasons"], dev["change"], alpha=0.3, s=10, color="#7B1FA2")
ax.axhline(0, color="#D32F2F", ls="--", lw=1)
ax.set_title("PPG Change vs Career Length"); ax.set_xlabel("Seasons"); ax.set_ylabel("PPG Change")

ax = axes[2]
ax.scatter(dev["first_ppg"], dev["last_ppg"], alpha=0.3, s=10, color="#388E3C")
mx = max(dev["first_ppg"].max(), dev["last_ppg"].max())
ax.plot([0, mx], [0, mx], "r--", lw=1, label="No change line")
ax.set_title("First vs Last Season PPG"); ax.set_xlabel("First PPG"); ax.set_ylabel("Last PPG"); ax.legend()

plt.tight_layout()
plt.savefig("../assets/player_development.png", dpi=150, bbox_inches="tight")
plt.show()

## Top 15 Improvers

In [None]:
top_imp = dev.nlargest(15, "change")[["name", "first_ppg", "last_ppg", "change", "n_seasons", "total_gp"]]
top_imp.index = range(1, 16)
top_imp.columns = ["Player", "First PPG", "Last PPG", "Change", "Seasons", "Games"]
top_imp