In [None]:
# Cell 1 – Imports and Player Selection
import random
import pandas as pd
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# fetch the full list of NBA players, then sample 50
all_players = players.get_players()
sampled = random.sample(all_players, 50)
player_id_map = {p['id']: p['full_name'] for p in sampled}
player_ids = list(player_id_map.keys())

print(f"Selected {len(player_ids)} players:")
for pid, name in player_id_map.items():
    print(f"  • {name} (ID: {pid})")


In [None]:
# Cell 2 – robustly download regular‐season logs for each player
from nba_api.stats.endpoints import playercareerstats, playergamelog

logs_list = []
skipped = []  # to record which (player,season) we drop and why

for pid in player_ids:
    # 1) get all seasons this player appears in
    career = playercareerstats.PlayerCareerStats(player_id=pid).get_data_frames()[0]
    seasons = career['SEASON_ID'].unique()

    for season in seasons:
        # 2) pull the reg-season log
        try:
            gl = playergamelog.PlayerGameLog(
                player_id=pid,
                season=season,
                season_type_all_star='Regular Season'
            ).get_data_frames()[0]
        except Exception as e:
            skipped.append((pid, season, f"API error: {e}"))
            continue

        # 3) skip if no rows
        if gl.empty:
            continue

        # 4) normalize to UPPERCASE
        gl.columns = [c.upper() for c in gl.columns]

        # 5) safely extract the five fields
        wanted = ['GAME_ID','PLAYER_ID','TEAM_ID','MATCHUP','WL']
        sub = gl.filter(items=wanted)  # this never raises; missing cols just get dropped
        missing = set(wanted) - set(sub.columns)
        if missing:
            skipped.append((pid, season, f"missing cols: {missing}"))
            continue

        # 6) keep only the five columns (in the right order)
        logs_list.append(sub[wanted])

# report what we skipped
for pid, season, reason in skipped:
    print(f"⚠️  Skipped {pid}@{season}: {reason}")

# concatenate
logs = pd.concat(logs_list, ignore_index=True)
print(f"✅  Fetched {len(logs)} total rows of game‐log data.")
logs.head()
