In [1]:
%pip install nba_api

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import sys
!{sys.executable} -m pip install ipywidgets

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [3]:
!pip3 install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [4]:
import pandas as pd
import numpy as np
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
import time

# --- 1. DATA INGESTION (AUTOMATED VIA API) ---
# Goal: Fetch live data directly from NBA API instead of manual scraping.

def fetch_wemby_career_data():
    print("ðŸ”„ Connecting to NBA API...")
    
    # 1. Find Victor Wembanyama's Player ID
    nba_players = players.get_players()
    wemby = [p for p in nba_players if p['full_name'] == 'Victor Wembanyama'][0]
    wemby_id = wemby['id']
    
    # 2. Define seasons to fetch
    # Format for API: '2023-24', '2024-25', '2025-26'
    seasons = ['2023-24', '2024-25', '2025-26']
    
    all_games = []
    
    for season in seasons:
        print(f"   Downloading season {season}...")
        try:
            # Fetch game log
            gamelog = playergamelog.PlayerGameLog(player_id=wemby_id, season=season)
            df_season = gamelog.get_data_frames()[0]
            
            # Add Season column for reference
            df_season['Season_Year'] = int(season.split('-')[0]) + 1 # e.g., 2024 for 2023-24
            
            all_games.append(df_season)
            time.sleep(0.6) # Short pause to be polite to the API
        except Exception as e:
            print(f"   Warning: Could not fetch data for {season} (maybe not started yet).")
            
    # 3. Combine all seasons
    if not all_games:
        raise ValueError("No data found from API.")
        
    df_raw = pd.concat(all_games, ignore_index=True)
    
    # 4. Clean and Format Data to match our Project Structure
    # Map API columns to our columns: 
    # API: GAME_DATE, MATCHUP, WL, PTS, REB, AST, BLK, STL, MIN
    # OURS: Date, Opponent, Is_Home, Win/Lose, Points, Rebounds, Assists, Blocks, Steals, Minutes
    
    df_clean = pd.DataFrame()
    df_clean['Date'] = pd.to_datetime(df_raw['GAME_DATE'])
    
    # Parse Opponent and Location from 'MATCHUP' (e.g., "SAS vs. LAL" or "SAS @ DEN")
    # 'vs.' means Home, '@' means Away
    df_clean['Is_Home'] = df_raw['MATCHUP'].apply(lambda x: 1 if 'vs.' in x else 0)
    df_clean['Opponent'] = df_raw['MATCHUP'].apply(lambda x: x.split(' ')[-1])
    
    df_clean['Win/Lose'] = df_raw['WL']
    df_clean['Points'] = df_raw['PTS']
    df_clean['Rebounds'] = df_raw['REB']
    df_clean['Assists'] = df_raw['AST']
    df_clean['Blocks'] = df_raw['BLK']
    df_clean['Steals'] = df_raw['STL']
    df_clean['Minutes'] = df_raw['MIN'].astype(str).apply(lambda x: int(float(x))) # Handle minutes format
    
    # Sort chronologically
    df_clean = df_clean.sort_values(by='Date').reset_index(drop=True)
    
    # 5. Calculate Rest Days (Feature Engineering)
    df_clean['Rest_Days'] = df_clean['Date'].diff().dt.days.fillna(3)
    
    # Format Date as string for display/CSV consistency
    df_clean['Date'] = df_clean['Date'].dt.strftime('%Y-%m-%d')
    
    return df_clean

# --- EXECUTION ---
try:
    df_all = fetch_wemby_career_data()
    
    # Save to CSV (overwriting the old static file)
    df_all.to_csv('wemby_career_enriched.csv', index=False)
    
    print("Success! Database updated with latest NBA games.")
    print(f"Total Games Loaded: {len(df_all)}")
    print(" Last Game Loaded:")
    print(df_all.iloc[-1][['Date', 'Opponent', 'Points', 'Rest_Days']])
    
except Exception as e:
    print(f"Error fetching data: {e}")



ðŸ”„ Connecting to NBA API...
   Downloading season 2023-24...
   Downloading season 2024-25...
   Downloading season 2025-26...
Success! Database updated with latest NBA games.
Total Games Loaded: 153
 Last Game Loaded:
Date         2026-02-04
Opponent            OKC
Points               22
Rest_Days           3.0
Name: 152, dtype: object


In [5]:
import pandas as pd
import numpy as np
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
import time

# --- 1. DATA INGESTION (AUTOMATED VIA API) ---
# Goal: Fetch live data directly from NBA API.

def fetch_wemby_career_data():
    print("ðŸ”„ Connecting to NBA API...")
    
    # 1. Get Player ID
    nba_players = players.get_players()
    wemby = [p for p in nba_players if p['full_name'] == 'Victor Wembanyama'][0]
    wemby_id = wemby['id']
    
    # 2. Fetch Seasons
    seasons = ['2023-24', '2024-25', '2025-26']
    all_games = []
    
    for season in seasons:
        print(f"   Downloading season {season}...")
        try:
            gamelog = playergamelog.PlayerGameLog(player_id=wemby_id, season=season)
            df_season = gamelog.get_data_frames()[0]
            if not df_season.empty:
                # Create 'Season_Year' (e.g., '2023-24' -> 2024)
                df_season['Season_Year'] = int(season.split('-')[0]) + 1
                all_games.append(df_season)
            time.sleep(0.6)
        except Exception:
            pass # Season might not exist yet
            
    if not all_games:
        raise ValueError("No data found from API.")
        
    df_raw = pd.concat(all_games, ignore_index=True)
    
    # 3. Clean and Format
    df_clean = pd.DataFrame()
    df_clean['Date'] = pd.to_datetime(df_raw['GAME_DATE'])
    df_clean['Season'] = df_raw['Season_Year']  # <--- COLONNE AJOUTÃ‰E ICI
    
    # Logic for Home/Away and Opponent
    df_clean['Is_Home'] = df_raw['MATCHUP'].apply(lambda x: 1 if 'vs.' in x else 0)
    df_clean['Opponent'] = df_raw['MATCHUP'].apply(lambda x: x.split(' ')[-1])
    
    df_clean['Win/Lose'] = df_raw['WL']
    df_clean['Points'] = df_raw['PTS']
    df_clean['Rebounds'] = df_raw['REB']
    df_clean['Assists'] = df_raw['AST']
    df_clean['Blocks'] = df_raw['BLK']
    df_clean['Steals'] = df_raw['STL']
    df_clean['Minutes'] = df_raw['MIN'].astype(str).apply(lambda x: int(float(x)))
    
    # Sort and Rest Days
    df_clean = df_clean.sort_values(by='Date').reset_index(drop=True)
    df_clean['Rest_Days'] = df_clean['Date'].diff().dt.days.fillna(3)
    df_clean['Date'] = df_clean['Date'].dt.strftime('%Y-%m-%d')
    
    return df_clean

# --- EXECUTION ---
try:
    df_all = fetch_wemby_career_data()
    df_all.to_csv('wemby_career_enriched.csv', index=False)
    print(f"Success! {len(df_all)} games loaded.")
    print(df_all[['Date', 'Season', 'Opponent', 'Points']].tail(3))
except Exception as e:
    print(f"Error: {e}")

ðŸ”„ Connecting to NBA API...
   Downloading season 2023-24...
   Downloading season 2024-25...
   Downloading season 2025-26...
Success! 153 games loaded.
           Date  Season Opponent  Points
150  2026-01-31    2026      CHA      16
151  2026-02-01    2026      ORL      25
152  2026-02-04    2026      OKC      22


In [None]:
# --- IMPORTS NÃ‰CESSAIRES ---
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# --- PART 2: MACHINE LEARNING PREDICTOR (FULLY AUTOMATED) ---
# Goal: Calculate fatigue automatically based on the latest API data.

# 1. Feature Engineering
df = pd.read_csv('wemby_career_enriched.csv')
df['Date'] = pd.to_datetime(df['Date']) # Ensure Date is datetime

# Encode Opponent
le = LabelEncoder()
df['Opponent_Code'] = le.fit_transform(df['Opponent'])

# Calculate Rolling Form (Average of last 3 games)
cols = ['Points', 'Rebounds', 'Assists', 'Blocks', 'Steals']
for col in cols:
    df[f'Forme_{col}'] = df[col].rolling(window=3, closed='left').mean()

df_ml = df.dropna().copy()

# 2. Train Random Forest Models
features = ['Opponent_Code', 'Is_Home', 'Rest_Days', 'Forme_Points', 'Forme_Rebounds', 'Forme_Blocks']
targets = ['Points', 'Rebounds', 'Assists', 'Blocks', 'Steals']
models = {}
margins = {}

for target in targets:
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(df_ml[features], df_ml[target])
    models[target] = model
    preds = model.predict(df_ml[features])
    margins[target] = np.mean(np.abs(preds - df_ml[target]))

# 3. The Smart Advisor Function
def wemby_advisor(opponent, is_home, match_date):
    if opponent not in le.classes_:
        return f"Error: Opponent '{opponent}' not found."
    
    # A. AUTOMATIC FATIGUE CALCULATION
    # We get the date of the absolute last game played from the API data
    last_game_date = pd.to_datetime(df_ml.iloc[-1]['Date'])
    target_date = pd.to_datetime(match_date)
    
    # We count the days difference
    delta = (target_date - last_game_date).days
    
    # Logic: 
    # If delta is 1 (or less), it's a Back-to-Back (Fatigue Max)
    # Otherwise, it's the real number of rest days.
    rest_days = max(1, delta)
    
    # Message for the user
    if rest_days <= 1:
        rest_msg = "1 Day (Back-to-Back detected)"
        rest_days = 1 # Force strict 1
    else:
        rest_msg = f"{rest_days} Days of Rest"
    
    # B. Prepare Input Data
    last_game = df_ml.iloc[-1]
    input_data = pd.DataFrame({
        'Opponent_Code': [le.transform([opponent])[0]],
        'Is_Home': [1 if is_home else 0],
        'Rest_Days': [rest_days],     # <--- AUTO-CALCULATED
        'Forme_Points': [last_game['Forme_Points']],
        'Forme_Rebounds': [last_game['Forme_Rebounds']],
        'Forme_Blocks': [last_game['Forme_Blocks']]
    })
    
    # C. Display Results
    loc_str = "Home" if is_home else "Away"
    print(f"\nWEMBY ADVISOR vs {opponent} ({loc_str})")
    print(f"Date: {target_date.date()} | Fatigue: {rest_msg}")
    print("="*60)
    print(f"{'STAT':<10} | {'SAFE BET (Low Risk)':<25} | {'VALUE BET (Target)':<25}")
    print("-" * 60)
    
    for target in targets:
        pred = models[target].predict(input_data)[0]
        margin = margins[target]
        safe_line = int(pred - margin)
        value_line = int(pred)
        print(f"{target:<10} | {f'Over {safe_line}.5':<25} | {f'Over {value_line}.5':<25}")
    print("="*60)

NameError: name 'LabelEncoder' is not defined

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import datetime

# --- PART 3: INTERACTIVE DASHBOARD (CLEAN VERSION) ---
# Goal: Minimalist GUI with automatic fatigue detection.

# 1. Create Widgets
opponent_dropdown = widgets.Dropdown(
    options=sorted(le.classes_),
    description='Opponent:',
    style={'description_width': 'initial'}
)

location_radio = widgets.RadioButtons(
    options=[('Home', True), ('Away', False)],
    description='Location:',
    style={'description_width': 'initial'}
)

date_picker = widgets.DatePicker(
    description='Match Date:',
    value=datetime.date.today() + datetime.timedelta(days=1), # Default tomorrow
    disabled=False
)

predict_button = widgets.Button(
    description='Generate Prediction',
    button_style='success',
    layout=widgets.Layout(width='50%', height='40px')
)

output_area = widgets.Output()

# 2. Logic
def on_button_click(b):
    with output_area:
        clear_output()
        if date_picker.value is None:
            print("Please select a valid date.")
            return
            
        # We only pass the 3 necessary arguments now
        wemby_advisor(
            opponent=opponent_dropdown.value, 
            is_home=location_radio.value,
            match_date=date_picker.value
        )

predict_button.on_click(on_button_click)

# 3. Display
print("\n--- WEMBY PREDICTOR DASHBOARD ---")
ui = widgets.VBox([
    opponent_dropdown, 
    location_radio, 
    date_picker, 
    predict_button
])
display(ui, output_area)