In [1]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder, scoreboardv2
from datetime import datetime, timedelta
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
from IPython.display import display

In [2]:
# today = datetime.now()
today = pd.Timestamp('2026-02-19')  # Fixed date for testing
tomorrow = today + timedelta(days=1)
tomorrow_str = tomorrow.strftime('%Y-%m-%d')

board_today = scoreboardv2.ScoreboardV2(game_date=today.strftime('%Y-%m-%d'), day_offset=0)
df_today = board_today.game_header.get_data_frame()
line_score_df_today = board_today.line_score.get_data_frame()

board_tomorrow = scoreboardv2.ScoreboardV2(game_date=tomorrow_str, day_offset=0)
df_tomorrow = board_tomorrow.game_header.get_data_frame()
line_score_df_tomorrow = board_tomorrow.line_score.get_data_frame()

df_history = pd.read_parquet('../dataframes/nba_games_history.parquet')
df_history['GAME_DATE'] = pd.to_datetime(df_history['GAME_DATE'])
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
id_map = df_history[['HOME_TEAM_ABBREVIATION', 'HOME_TEAM_ID']].drop_duplicates().set_index('HOME_TEAM_ABBREVIATION')['HOME_TEAM_ID'].to_dict()

df_today = df_today.dropna(subset=['HOME_TEAM_ID', 'VISITOR_TEAM_ID'])
df_tomorrow = df_tomorrow.dropna(subset=['HOME_TEAM_ID', 'VISITOR_TEAM_ID'])
df_today = df_today[df_today['GAME_ID'].astype(str).str.startswith('002')]

df_upcoming = pd.concat([df_today, df_tomorrow])
line_score_upcoming = pd.concat([line_score_df_today, line_score_df_tomorrow])

# Merge home abbreviation
df_upcoming = pd.merge(df_upcoming, line_score_upcoming[['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION']], 
                       left_on=['GAME_ID', 'HOME_TEAM_ID'], 
                       right_on=['GAME_ID', 'TEAM_ID'], 
                       how='left')
df_upcoming.rename(columns={'TEAM_ABBREVIATION': 'HOME_TEAM_ABBREVIATION'}, inplace=True)
df_upcoming.drop(columns=['TEAM_ID'], inplace=True)

# Merge away abbreviation
df_upcoming.rename(columns={'VISITOR_TEAM_ID': 'AWAY_TEAM_ID'}, inplace=True)
df_upcoming = pd.merge(df_upcoming, line_score_upcoming[['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION']], 
                       left_on=['GAME_ID', 'AWAY_TEAM_ID'], 
                       right_on=['GAME_ID', 'TEAM_ID'], 
                       how='left')
df_upcoming.rename(columns={'TEAM_ABBREVIATION': 'AWAY_TEAM_ABBREVIATION'}, inplace=True)
df_upcoming.drop(columns=['TEAM_ID'], inplace=True)

# Map API IDs to history IDs
try:
    id_map = df_history[['HOME_TEAM_ABBREVIATION', 'HOME_TEAM_ID']].drop_duplicates().set_index('HOME_TEAM_ABBREVIATION')['HOME_TEAM_ID'].to_dict()

    def map_id(abbr):
        return id_map.get(abbr)
    
    # Map home and away team IDs
    df_upcoming['HOME_TEAM_ID'] = df_upcoming['HOME_TEAM_ABBREVIATION'].apply(map_id)
    df_upcoming['AWAY_TEAM_ID'] = df_upcoming['AWAY_TEAM_ABBREVIATION'].apply(map_id)

    df_upcoming['HOME_TEAM_ID'] = df_upcoming['HOME_TEAM_ID'].astype(int)
    df_upcoming['AWAY_TEAM_ID'] = df_upcoming['AWAY_TEAM_ID'].astype(int)

except KeyError as e:
    print(f"Error mapping team IDs: {e}")

# Map matchups (Recreate encoding done in the training data)
try:
    temp_hist = df_history[['HOME_TEAM_ABBREVIATION', 'AWAY_TEAM_ABBREVIATION', 'MATCHUP']].drop_duplicates()
    temp_hist['MATCHUP_KEY'] = temp_hist['HOME_TEAM_ABBREVIATION'] + ' vs. ' + temp_hist['AWAY_TEAM_ABBREVIATION']
    matchup_map = temp_hist.set_index('MATCHUP_KEY')['MATCHUP'].to_dict()

    df_upcoming['MATCHUP_STR'] = df_upcoming['HOME_TEAM_ABBREVIATION'] + ' vs. ' + df_upcoming['AWAY_TEAM_ABBREVIATION']
    df_upcoming['MATCHUP'] = df_upcoming['MATCHUP_STR'].map(matchup_map)
    df_upcoming['MATCHUP'] = df_upcoming['MATCHUP'].fillna(-1).astype(int)
    
except Exception as e:
    print(f"ERROR encoding matchups: {e}")
    df_upcoming['MATCHUP'] = 0

# Normalize date
df_upcoming['GAME_DATE_EST'] = pd.to_datetime(df_upcoming['GAME_DATE_EST'])
df_upcoming.rename(columns={'GAME_DATE_EST': 'GAME_DATE'}, inplace=True)

def get_last_team_game(df_games, team_id, game_date):
    """
    Find the last game in df_games where this team played before game_date.
    """
    # Last time this team was HOME
    mask_home = (df_games['HOME_TEAM_ID'] == team_id) & (df_games['GAME_DATE'] < game_date)
    home_games = df_games.loc[mask_home].sort_values('GAME_DATE')
    last_home = home_games.iloc[-1] if not home_games.empty else None

    # Last time this team was AWAY
    mask_away = (df_games['AWAY_TEAM_ID'] == team_id) & (df_games['GAME_DATE'] < game_date)
    away_games = df_games.loc[mask_away].sort_values('GAME_DATE')
    last_away = away_games.iloc[-1] if not away_games.empty else None

    if last_home is None and last_away is None:
        return None, None

    if last_home is None:
        return last_away, 'away'
    if last_away is None:
        return last_home, 'home'

    # Both exist: pick the most recent one
    if last_home['GAME_DATE'] >= last_away['GAME_DATE']:
        return last_home, 'home'
    else:
        return last_away, 'away'

def get_current_team_elo(df_history):
    """
    Get the current ELO for a team.
    Returns a dictionary with the ELO and the momentum.
    """
    df = df_history.sort_values(['GAME_DATE', 'GAME_ID']).copy()
    elo_dict = {}
    team_histories = {}
    last_season_dict = {}

    k_factor = 40
    retention_rate = 0.75

    for _, row in df.iterrows():
        home_id = row['HOME_TEAM_ID']
        away_id = row['AWAY_TEAM_ID']
        season_id = row['SEASON_ID']
        home_win = row['HOME_WIN']

        # Get the ELO for the teams
        elo_h = elo_dict.get(home_id, 1500)
        elo_a = elo_dict.get(away_id, 1500)

        # Check if the season has changed
        if last_season_dict.get(home_id) != season_id:
            elo_h = (elo_h * retention_rate) + (1500 * (1 - retention_rate))
            last_season_dict[home_id] = season_id

        if last_season_dict.get(away_id) != season_id:
            elo_a = (elo_a * retention_rate) + (1500 * (1 - retention_rate))
            last_season_dict[away_id] = season_id

        # Save pre-game ELOs
        if home_id not in team_histories: team_histories[home_id] = []
        if away_id not in team_histories: team_histories[away_id] = []

        team_histories[home_id].append(elo_h)
        team_histories[away_id].append(elo_a)

        # Calculate post-game ELOs
        prob_h = 1 / (1 + 10 ** ((elo_a - elo_h) / 400))

        new_elo_h = elo_h + k_factor * (home_win - prob_h)
        new_elo_a = elo_a + k_factor * ((1 - home_win) - (1 - prob_h))

        elo_dict[home_id] = new_elo_h
        elo_dict[away_id] = new_elo_a

    # Add final state
    for t_id in elo_dict:
        if t_id in team_histories:
            team_histories[t_id].append(elo_dict[t_id])
            
    return elo_dict, team_histories

current_elos, team_elo_histories = get_current_team_elo(df_history)

def build_feature_row(up_row, df_games):
    """
    Build the features for the upcoming game.
    """
    home_id = up_row['HOME_TEAM_ID']
    away_id = up_row['AWAY_TEAM_ID']
    game_date = up_row['GAME_DATE']
    #season_id = up_row['SEASON_ID']

    home_game, home_side = get_last_team_game(df_games, home_id, game_date)
    away_game, away_side = get_last_team_game(df_games, away_id, game_date)

    # Skip if no history found
    if home_game is None or away_game is None:
        return None

    # Dates
    last_home_date = home_game['GAME_DATE']
    last_away_date = away_game['GAME_DATE']
    home_days_rest = (game_date - last_home_date).days
    away_days_rest = (game_date - last_away_date).days
    home_is_b2b = 1 if home_days_rest == 1 else 0
    away_is_b2b = 1 if away_days_rest == 1 else 0
    rest_diff = home_days_rest - away_days_rest

    def h(col):
        return home_game[f'HOME_{col}'] if home_side == 'home' else home_game[f'AWAY_{col}']
    def a(col):
        return away_game[f'HOME_{col}'] if away_side == 'home' else away_game[f'AWAY_{col}']

    # Get current ELO
    cur_elo_h = current_elos.get(home_id, 1500)
    cur_elo_a = current_elos.get(away_id, 1500)

    # Get momentum
    def get_momentum(t_id, current_val):
        hist = team_elo_histories.get(t_id, [])
        if len(hist) >= 5:
            # Get the difference between current ELO and the ELO from 5 games ago
            return current_val - hist[-5]
        return 0
        
    mom_h = get_momentum(home_id, cur_elo_h)
    mom_a = get_momentum(away_id, cur_elo_a)

    row ={
        'SEASON_ID': home_game['SEASON_ID'],
        'HOME_TEAM_ID': home_id,
        'HOME_TEAM_ABBREVIATION': up_row['HOME_TEAM_ABBREVIATION'],
        'HOME_TEAM_NAME': h('TEAM_NAME') if 'HOME_TEAM_NAME' in home_game.index else None,
        'GAME_ID': up_row['GAME_ID'],
        'GAME_DATE': game_date,
        'MATCHUP': up_row['MATCHUP'],

        # HOME_
        'HOME_MIN': h('MIN'),
        'HOME_PTS': h('PTS'),
        'HOME_FGM': h('FGM'),
        'HOME_FGA': h('FGA'),
        'HOME_FG_PCT': h('FG_PCT'),
        'HOME_FG3M': h('FG3M'),
        'HOME_FG3A': h('FG3A'),
        'HOME_FG3_PCT': h('FG3_PCT'),
        'HOME_FTM': h('FTM'),
        'HOME_FTA': h('FTA'),
        'HOME_FT_PCT': h('FT_PCT'),
        'HOME_OREB': h('OREB'),
        'HOME_DREB': h('DREB'),
        'HOME_REB': h('REB'),
        'HOME_AST': h('AST'),
        'HOME_STL': h('STL'),
        'HOME_BLK': h('BLK'),
        'HOME_TOV': h('TOV'),
        'HOME_PF': h('PF'),
        'HOME_DAYS_BETWEEN_GAMES': home_days_rest,
        #'HOME_WIN': h('WIN') if 'HOME_WIN' in home_game.index else 0,
        'HOME_WINS_LAST_5_GAMES': h('WINS_LAST_5_GAMES'),
        'HOME_SEASON_RECORD_PCT': h('SEASON_RECORD_PCT'),
        'HOME_GAME_COUNT': h('GAME_COUNT') + 1,
        'HOME_ROLLING_PTS_PER_GAME': h('ROLLING_PTS_PER_GAME'),
        'HOME_ROLLING_PLUS_MINUS_PER_GAME': h('ROLLING_PLUS_MINUS_PER_GAME'),
        'HOME_SEASON_FG_PCT': h('SEASON_FG_PCT'),
        'HOME_SEASON_FG3_PCT': h('SEASON_FG3_PCT'),
        'HOME_SEASON_FT_PCT': h('SEASON_FT_PCT'),
        'HOME_SEASON_REB_PER_GAME': h('SEASON_REB_PER_GAME'),
        'HOME_SEASON_TOV_PER_GAME': h('SEASON_TOV_PER_GAME'),
        'HOME_IS_B2B': home_is_b2b,

        # AWAY_
        'AWAY_TEAM_ID': away_id,
        'AWAY_TEAM_ABBREVIATION': up_row['AWAY_TEAM_ABBREVIATION'],
        'AWAY_TEAM_NAME': a('TEAM_NAME') if 'HOME_TEAM_NAME' in away_game.index else None,
        'AWAY_MIN': a('MIN'),
        'AWAY_PTS': a('PTS'),
        'AWAY_FGM': a('FGM'),
        'AWAY_FGA': a('FGA'),
        'AWAY_FG_PCT': a('FG_PCT'),
        'AWAY_FG3M': a('FG3M'),
        'AWAY_FG3A': a('FG3A'),
        'AWAY_FG3_PCT': a('FG3_PCT'),
        'AWAY_FTM': a('FTM'),
        'AWAY_FTA': a('FTA'),
        'AWAY_FT_PCT': a('FT_PCT'),
        'AWAY_OREB': a('OREB'),
        'AWAY_DREB': a('DREB'),
        'AWAY_REB': a('REB'),
        'AWAY_AST': a('AST'),
        'AWAY_STL': a('STL'),
        'AWAY_BLK': a('BLK'),
        'AWAY_TOV': a('TOV'),
        'AWAY_PF': a('PF'),
        'AWAY_DAYS_BETWEEN_GAMES': away_days_rest,
        'AWAY_WINS_LAST_5_GAMES': a('WINS_LAST_5_GAMES'),
        'AWAY_SEASON_RECORD_PCT': a('SEASON_RECORD_PCT'),
        'AWAY_GAME_COUNT': a('GAME_COUNT') + 1,
        'AWAY_ROLLING_PTS_PER_GAME': a('ROLLING_PTS_PER_GAME'),
        'AWAY_ROLLING_PLUS_MINUS_PER_GAME': a('ROLLING_PLUS_MINUS_PER_GAME'),
        'AWAY_SEASON_FG_PCT': a('SEASON_FG_PCT'),
        'AWAY_SEASON_FG3_PCT': a('SEASON_FG3_PCT'),
        'AWAY_SEASON_FT_PCT': a('SEASON_FT_PCT'),
        'AWAY_SEASON_REB_PER_GAME': a('SEASON_REB_PER_GAME'),
        'AWAY_SEASON_TOV_PER_GAME': a('SEASON_TOV_PER_GAME'),
        'AWAY_IS_B2B': away_is_b2b,
        'REST_DIFF': rest_diff,

        # ELO
        'HOME_ELO': cur_elo_h,
        'AWAY_ELO': cur_elo_a,
        'ELO_DIFF': cur_elo_h - cur_elo_a,
        'HOME_ELO_MOMENTUM': mom_h,
        'AWAY_ELO_MOMENTUM': mom_a,
        'MOMENTUM_DIFF': mom_h - mom_a
    }

    return row

In [3]:
xgb_model = joblib.load('../xgb model/nba_model_xgb.pkl')
features_xgb = joblib.load('../xgb model/feature_list.pkl')

prediction_rows = []

print(f"Building feature rows for {len(df_upcoming)} upcoming games")

# Iterate over upcoming games and build feature rows
for idx, row in df_upcoming.iterrows():
    feat_row = build_feature_row(row, df_history)
    if feat_row:
        prediction_rows.append(feat_row)
    else:
        print(f"ERROR: no history for {row['HOME_TEAM_ABBREVIATION']} vs {row['AWAY_TEAM_ABBREVIATION']}")

if prediction_rows:
    X_upcoming = pd.DataFrame(prediction_rows)
    X_upcoming = X_upcoming[features_xgb]
    probs = xgb_model.predict_proba(X_upcoming)[:, 1]
    print("\n" + "="*85)
    print(f"{'DATE':<12} | {'MATCHUP':<15} | {'PREDICTED WINNER':<18} | {'HOME':<10} | {'AWAY':<10}")
    print("="*85)

    # Probabilities
    for i, p_home in enumerate(probs):
        row = prediction_rows[i]

        home_team = row['HOME_TEAM_ABBREVIATION']
        away_team = row['AWAY_TEAM_ABBREVIATION']
        date_str = row['GAME_DATE'].strftime('%d/%m')
        p_away = 1 - p_home

        # Predict winner and confidence
        if p_home > 0.5:
            winner = home_team
            confidence = p_home
        else:
            winner = away_team
            confidence = p_away

        # Visual formatting
        color_code = "\033[92m" if confidence > 0.65 else ""
        reset_code = "\033[0m"

        print(f"{date_str:<12} | {home_team} vs {away_team:<8} | {color_code}{winner:<18}{reset_code} | {p_home:.1%}      | {p_away:.1%}")
    print("="*85)

else:
    print("No game processed")

Building feature rows for 19 upcoming games

DATE         | MATCHUP         | PREDICTED WINNER   | HOME       | AWAY      
19/02        | CHA vs HOU      | CHA               [0m | 51.0%      | 49.0%
19/02        | CLE vs BKN      | [92mCLE               [0m | 71.7%      | 28.3%
19/02        | PHI vs ATL      | PHI               [0m | 55.7%      | 44.3%
19/02        | WAS vs IND      | IND               [0m | 42.8%      | 57.2%
19/02        | NYK vs DET      | DET               [0m | 45.5%      | 54.5%
19/02        | CHI vs TOR      | TOR               [0m | 36.8%      | 63.2%
19/02        | SAS vs PHX      | SAS               [0m | 59.0%      | 41.0%
19/02        | GSW vs BOS      | BOS               [0m | 43.3%      | 56.7%
19/02        | SAC vs ORL      | [92mORL               [0m | 30.7%      | 69.3%
19/02        | LAC vs DEN      | LAC               [0m | 51.5%      | 48.5%
20/02        | CHA vs CLE      | CLE               [0m | 45.7%      | 54.3%
20/02        | WAS v