In [12]:
import pandas as pd
import numpy as np
import warnings
import json
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
import time
from selenium.webdriver.common.by import By
from IPython.display import Javascript
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))
#from skopt import BayesSearchCV
import os

def download_current_data(date=None):
    # Define the base URL and parameters
    base_url = "https://www.bigdataball.com/wp-admin/admin-ajax.php?action=outofthebox-download"
    account_id = "dbid:AADL0JM6TbjOPoH-7_QmtAYk4iT4-vis0Tk"
    listtoken = "5a58bb7418a59d0ec0a5558a510e959d"

    # Get current date in the required format
    current_date = datetime.now()
    yesterday = current_date - timedelta(1)
    current_date = yesterday.strftime("%m-%d-%Y") if date == None else date
    filename = f"{current_date}-nba-season-team-feed.xlsx"
    outofthebox_path = f"%2F{filename}"

    # Construct the full URL
    full_url = f"{base_url}&OutoftheBoxpath={outofthebox_path}&lastpath=%2F&account_id={account_id}&listtoken={listtoken}&dl=1"

    # Directory to save the file
    save_dir = "./"
    save_path = os.path.join(save_dir, filename)
    print(save_path)

    # don't redownload if we already have it
    if os.path.exists(save_path):
        return filename

    # Use curl to download the file
    response = requests.get(full_url, stream=True)
    print(response.status_code)
    if response.status_code == 200:
        with open(save_path, 'wb') as f:
            f.write(response.content)
    return filename

column_mappings = {
    "COLS": ['GAME-ID', 'DATE', 'TEAM', '1Q', '2Q', '3Q', '4Q', 'F', 'MIN', 'FG', 'FGA', '3P', '3PA', 'FT', 'FTA', 'OR', 'DR', 'TOT', 'A', 'PF', 'ST', 'TO', 'BL', 'PTS', 'POSS', 'PACE', 'OEFF', 'DEFF', 'TEAM_REST_DAYS', 'MAIN REF', 'CREW', 'OPENING ODDS', 'OPENING SPREAD', 'OPENING TOTAL', 'CLOSING_ODDS', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'MONEYLINE', 'HALFTIME'],
    "cleaned_cols": ['Dataset', 'GAME-ID', 'DATE', 'TEAM', 'VENUE', '1Q', '2Q', '3Q', '4Q', 'OT1', 'OT2', 'OT3', 'OT4', 'OT5', 'F', 'MIN', 'FG', 'FGA', '3P', '3PA', 'FT', 'FTA', 'OR', 'DR', 'TOT', 'A', 'PF', 'ST', 'TO', 'TO_TO', 'BL', 'PTS', 'POSS', 'PACE', 'OEFF', 'DEFF', 'TEAM_REST_DAYS', 'STARTER_1', 'STARTER_2', 'STARTER_3', 'STARTER_4', 'STARTER_5', 'MAIN REF', 'CREW', 'OPENING ODDS', 'OPENING SPREAD', 'OPENING TOTAL', 'LINE_MOVEMENT_1', 'LINE_MOVEMENT_2', 'LINE_MOVEMENT_3', 'CLOSING_ODDS', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'MONEYLINE', 'HALFTIME', 'BOX_SCORE_URL', 'ODDS_URL', 'BIGDATABALL_DATASET', 'FULL_GAME_ODDS_URL', 'CREW_CHIEF', 'REFEREE_UMPIRE'],
    "t_cleaned_cols": ['BIGDATABALL_DATASET', 'GAME-ID', 'DATE', 'TEAM', 'VENUE', '1Q', '2Q', '3Q', '4Q', 'OT1', 'OT2', 'OT3', 'OT4', 'OT5', 'F', 'MIN', 'FG', 'FGA', '3P', '3PA', 'FT', 'FTA', 'OR', 'DR', 'TOT', 'A', 'PF', 'ST', 'TO', 'TO_TO', 'BL', 'PTS', 'POSS', 'PACE', 'OEFF', 'DEFF', 'TEAM_REST_DAYS', 'STARTER_1', 'STARTER_2', 'STARTER_3', 'STARTER_4', 'STARTER_5', 'MAIN REF', 'CREW', 'OPENING_ODDS', 'OPENING_SPREAD', 'OPENING_TOTAL', 'LINE_MOVEMENT_1', 'LINE_MOVEMENT_2', 'LINE_MOVEMENT_3', 'CLOSING_ODDS', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'MONEYLINE', 'HALFTIME', 'BOX_SCORE_URL', 'FULL_GAME_ODDS_URL'],
    "TRAIN_COLS": ['Offensive_Rating', 'Defensive_Rating', 'Opp_Offensive_Rating', 'Opp_Defensive_Rating','Offensive_Var', 'Defensive_Var', 'Opp_Offensive_Var', 'Opp_Defensive_Var', 'Opp_Elo', 'Opp_Momentum', 'SPREAD_LINE_MOVEMENT_1', 'SPREAD_LINE_MOVEMENT_2', 'SPREAD_LINE_MOVEMENT_3', 'TOTAL_LINE_MOVEMENT_1', 'TOTAL_LINE_MOVEMENT_2', 'TOTAL_LINE_MOVEMENT_3', 'CREW', 'Opp_Avg_3_game_DEFF', 'Opp_Avg_5_game_DEFF', 'Opp_Season_Avg_DEFF', 'Opp_Avg_3_game_OEFF', 'Opp_Avg_5_game_OEFF', 'Opp_Season_Avg_OEFF', 'Opp_Avg_3_game_PACE', 'Opp_Avg_5_game_PACE', 'Opp_Season_Avg_PACE', 'Opp_Avg_3_game_POSS', 'Opp_Avg_5_game_POSS', 'Opp_Season_Avg_POSS', 'Avg_3_game_DEFF', 'Avg_5_game_DEFF', 'Season_Avg_DEFF', 'Avg_3_game_OEFF', 'Avg_5_game_OEFF', 'Season_Avg_OEFF', 'Avg_3_game_PACE', 'Avg_5_game_PACE', 'Season_Avg_PACE', 'Avg_3_game_POSS', 'Avg_5_game_POSS', 'Season_Avg_POSS', 'Avg_3_game_OR', 'Avg_5_game_OR', 'Season_Avg_OR', 'Avg_3_game_3P', 'Avg_5_game_3P', 'Season_Avg_3P', 'Avg_3_game_3PA', 'Avg_5_game_3PA', 'Season_Avg_3PA', 'Avg_3_game_TO', 'Avg_5_game_TO', 'Season_Avg_TO', 'Avg_3_game_FT', 'Avg_5_game_FT', 'Season_Avg_FT', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'MONEYLINE', 'Avg_3_game_PTS', 'Avg_5_game_PTS', 'Season_Avg_PTS', 'Last_ML_1', 'Last_ML_2', 'Last_ML_3', 'VENUE', 'TEAM', 'Opponent', 'Win_Loss_Diff', 'HOME TEAM WIN%', 'HOME TEAM POINTS DIFFERENTIAL', 'TOTAL POINTS PER GAME', 'CALLED FOULS PER GAME', 'FOUL% AGAINST ROAD TEAMS', 'FOUL% AGAINST HOME TEAMS', 'FOUL DIFFERENTIAL (Against Road Team) - (Against Home Team)', 'Elo_Rating', 'Momentum', 'MAIN REF', 'TEAM_REST_DAYS'],
    "today_mappings": ['Last_ML_1'],
    "TARGET": 'Result',
    "SEASON_MAP": {'NBA 2021-2022 Regular Season': 2022, 'NBA 2020-2021 Regular Season': 2021, 'NBA 2019-2020 Regular Season': 2020, 'NBA 2022-2023 Regular Season': 2023, 'NBA 2020 Playoffs': 2020, 'NBA 2021 Play-in': 2021, 'NBA 2021 Playoffs': 2021, 'NBA 2023 Play-In': 2023, 'NBA 2022 Play-In': 2022, 'NBA 2023 Playoffs': 2023, 'NBA 2022 Playoffs': 2022, 'NBA 2023-2024 Regular Season': 2024, 'NBA 2023 In-Season Tournament': 2024},
    "t_train_cols": ['Opp_Elo', 'Opp_Momentum', 'SPREAD_LINE_MOVEMENT_1', 'SPREAD_LINE_MOVEMENT_2', 'SPREAD_LINE_MOVEMENT_3', 'TOTAL_LINE_MOVEMENT_1', 'TOTAL_LINE_MOVEMENT_2', 'TOTAL_LINE_MOVEMENT_3', 'CREW', 'Opp_Avg_3_game_DEFF', 'Opp_Avg_5_game_DEFF', 'Opp_Season_Avg_DEFF', 'Opp_Avg_3_game_OEFF', 'Opp_Avg_5_game_OEFF', 'Opp_Season_Avg_OEFF', 'Opp_Avg_3_game_PACE', 'Opp_Avg_5_game_PACE', 'Opp_Season_Avg_PACE', 'Opp_Avg_3_game_POSS', 'Opp_Avg_5_game_POSS', 'Opp_Season_Avg_POSS','Avg_3_game_DEFF', 'Avg_5_game_DEFF', 'Season_Avg_DEFF', 'Avg_3_game_OEFF', 'Avg_5_game_OEFF', 'Season_Avg_OEFF', 'Avg_3_game_PACE', 'Avg_5_game_PACE', 'Season_Avg_PACE', 'Avg_3_game_POSS', 'Avg_5_game_POSS', 'Season_Avg_POSS', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'MONEYLINE', 'Avg_3_game_PTS', 'Avg_5_game_PTS', 'Season_Avg_PTS', 'Streak', 'Last_ML_1', 'Last_ML_2', 'Last_ML_3', 'VENUE', 'TEAM', 'Opponent', 'Win_Loss_Diff', 'HOME TEAM WIN%', 'HOME TEAM POINTS DIFFERENTIAL', 'TOTAL POINTS PER GAME', 'CALLED FOULS PER GAME', 'FOUL% AGAINST ROAD TEAMS', 'FOUL% AGAINST HOME TEAMS', 'FOUL DIFFERENTIAL (Against Road Team) - (Against Home Team)', 'Elo_Rating', 'Momentum', 'MAIN REF', 'TEAM_REST_DAYS'],
    "train_cols_final": ['Opp_Elo', 'Opp_Momentum', 'SPREAD_LINE_MOVEMENT_1', 'SPREAD_LINE_MOVEMENT_2', 'SPREAD_LINE_MOVEMENT_3', 'TOTAL_LINE_MOVEMENT_1', 'TOTAL_LINE_MOVEMENT_2', 'TOTAL_LINE_MOVEMENT_3', 'CREW', 'Opp_Avg_3_game_DEFF', 'Opp_Avg_5_game_DEFF', 'Opp_Season_Avg_DEFF', 'Opp_Avg_3_game_OEFF', 'Opp_Avg_5_game_OEFF', 'Opp_Season_Avg_OEFF', 'Opp_Avg_3_game_PACE', 'Opp_Avg_5_game_PACE', 'Opp_Season_Avg_PACE', 'Opp_Avg_3_game_POSS', 'Opp_Avg_5_game_POSS', 'Opp_Season_Avg_POSS','Avg_3_game_DEFF', 'Avg_5_game_DEFF', 'Season_Avg_DEFF', 'Avg_3_game_OEFF', 'Avg_5_game_OEFF', 'Season_Avg_OEFF', 'Avg_3_game_PACE', 'Avg_5_game_PACE', 'Season_Avg_PACE', 'Avg_3_game_POSS', 'Avg_5_game_POSS', 'Season_Avg_POSS', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'MONEYLINE', 'Avg_3_game_PTS', 'Avg_5_game_PTS', 'Season_Avg_PTS', 'Streak', 'Last_ML_1', 'Last_ML_2', 'Last_ML_3', 'VENUE', 'TEAM', 'Opponent', 'Win_Loss_Diff', 'HOME TEAM WIN%', 'HOME TEAM POINTS DIFFERENTIAL', 'TOTAL POINTS PER GAME', 'CALLED FOULS PER GAME', 'FOUL% AGAINST ROAD TEAMS', 'FOUL% AGAINST HOME TEAMS', 'FOUL DIFFERENTIAL (Against Road Team) - (Against Home Team)', 'Elo_Rating', 'Momentum', 'MAIN REF', 'TEAM_REST_DAYS']
}

team_map = {
    'Hawks': 'Atlanta',
    'Nets': 'Brooklyn',
    'Celtics': 'Boston',
    'Hornets': 'Charlotte',
    'Bulls': 'Chicago',
    'Cavaliers': 'Cleveland',
    'Mavericks': 'Dallas',
    'Nuggets': 'Denver',
    'Pistons': 'Detroit',
    'Warriors': 'Golden State',
    'Rockets': 'Houston',
    'Pacers': 'Indiana',
    'Clippers': 'LA Clippers',
    'Lakers': 'LA Lakers',
    'Grizzlies': 'Memphis',
    'Heat': 'Miami',
    'Bucks': 'Milwaukee',
    'Timberwolves': 'Minnesota',
    'Pelicans': 'New Orleans',
    'Knicks': 'New York',
    'Thunder': 'Oklahoma City',
    'Magic': 'Orlando',
    '76ers': 'Philadelphia',
    'Suns': 'Phoenix',
    'Trail Blazers': 'Portland',
    'Kings': 'Sacramento',
    'Spurs': 'San Antonio',
    'Raptors': 'Toronto',
    'Jazz': 'Utah',
    'Wizards': 'Washington'
}
# To save this as a JSON object, you can use the following code


with open('column_mappings.json', 'w') as file:
    json.dump(column_mappings, file)

with open('column_mappings.json', 'r') as file:
    column_mappings = json.load(file)

# Now, you can access your column lists like this
COLS = column_mappings['COLS']
cleaned_cols = column_mappings['cleaned_cols']
t_cleaned_cols = column_mappings['t_cleaned_cols']
TRAIN_COLS = column_mappings['TRAIN_COLS']
today_mappings = column_mappings['today_mappings']
TARGET = column_mappings['TARGET']
SEASON_MAP = column_mappings['SEASON_MAP']
t_train_cols = column_mappings['t_train_cols']
train_cols_final = column_mappings['train_cols_final']

warnings.filterwarnings('ignore')
DATA_ROOT = '../live_data'

<IPython.core.display.Javascript object>

In [None]:
def american_odds_to_probability(odds):
    if odds > 0:
        probability = 100 / (odds + 100)
    else:
        probability = -odds / (-odds + 100)
    return probability

def calculate_profit(odds, size):
    if odds > 0:
        profit = (odds / 100) * size
    else:
        profit = (100 / -(odds + 0.0000001)) * size
    return profit

def kelly_criterion(bankroll, probability, odds, temper=1):
    """
    Calculate the optimal bet size using the Kelly Criterion.

    :param bankroll: Total amount of money you have to bet with.
    :param probability: The probability of the bet winning (from 0 to 1).
    :param odds: The odds being offered on the bet (in decimal format).
    :return: The recommended bet size according to the Kelly Criterion.
    """
    # Convert American odds to decimal if necessary
    if odds > 0:
        odds = (odds / 100) + 1
    elif odds < 0:
        odds = (100 / -odds) + 1

    # Calculate the Kelly bet fraction
    b = odds - 1  # Decimal odds minus 1
    q = 1 - probability  # Probability of losing
    kelly_fraction = (b * probability - q) / b

    # Calculate the recommended bet
    recommended_bet = (temper * kelly_fraction) * bankroll

    return recommended_bet

def combine_parlay_odds(odds_list):
    total_multiplier = 1
    for odds in odds_list:
        if odds > 0:  # Positive odds
            total_multiplier *= (odds / 100) + 1
        else:  # Negative odds
            total_multiplier *= 1 - (100 / (odds + 0.0000001))

    # Calculate parlay odds
    if total_multiplier >= 2:
        parlay_odds = (total_multiplier - 1) * 100
    else:
        parlay_odds = -100 / ((total_multiplier - 1) + 0.00000001)
    return parlay_odds

def get_top_bottom_features_with_scores(pred_contributions, feature_names):
    # Pair feature names with their contributions
    feature_contributions = zip(feature_names, pred_contributions)

    # Sort by the absolute value of contributions but keep the original sign
    sorted_features = sorted(feature_contributions, key=lambda x: abs(x[1]), reverse=True)

    # Get top 5 and bottom 5 features based on absolute value
    top_5_features = sorted_features[:5]
    bottom_5_features = sorted_features[-5:]

    return top_5_features, bottom_5_features

    return round(parlay_odds)
def print_wrapper(func):
    ansi_reset = '\033[0m'
    ansi_black = '\033[90m'
    ansi_red = '\033[91m'
    ansi_green = '\033[92m'
    ansi_yellow = '\033[93m'
    ansi_blue = '\033[94m'
    ansi_pink = '\033[95m'
    ansi_teal = '\033[96m'
    ansi_gray = '\033[97m'
    ansi_warning = '\033[31;1;4m'
    ansi_error = '\033[31;100m'
    def wrapped_func(*args,**kwargs):
        new_args = args + tuple()
        new_kwargs = kwargs.copy()
        for kwarg, kwvalue in kwargs.items(): # Loop through the keyword arguments
            if kwarg == "color":
                if kwvalue == "black":
                    color = ansi_black
                elif kwvalue == "red":
                    color = ansi_red
                elif kwvalue == "green":
                    color = ansi_green
                elif kwvalue == "yellow":
                    color = ansi_yellow
                elif kwvalue == "blue":
                    color = ansi_blue
                elif kwvalue == "pink":
                    color = ansi_pink
                elif kwvalue == "teal":
                    color = ansi_teal
                elif kwvalue == "gray":
                    color = ansi_gray
                elif kwvalue == "warning":
                    color = ansi_warning
                elif kwvalue == "error":
                    color = ansi_error
                new_kwargs = kwargs.copy() # Make a copy of the keyword arguments dict
                del new_kwargs["color"] # Remove color from the keyword arguments dict
        try: # Is the variable color defined?
            color
        except NameError:
            pass
            # no color was specified
        else:
            new_args = ()
            for arg in args:
                new_args += (f"{color}{arg}{ansi_reset}",) # Apply the ANSI escape codes to each non-keyword argument
        return func(*new_args,**new_kwargs)
    return wrapped_func

print = print_wrapper(print) # Apply the wrapper to the print() function

def probability_to_american_odds(probability):
    if probability < 0 or probability > 1:
        raise ValueError("Probability must be between 0 and 1")

    if probability == 0.5:
        return 100  # Even odds

    if probability > 0.5:
        return int(-100 * (probability / (1 - probability)))
    else:
        return int(100 * ((1 - probability) / probability))

def odds_to_str(odds):
  if odds <= 0:
    return odds
  else:
    return f'+{odds}'
team_strengths = {}
team_momentum = {}

def update_off_def_ratings(row, ratings, momentum_scores):
    K = 20  # K-factor for Elo adjustment
    m = 1.2  # Momentum factor
    momentum_decay = 0.1  # Decay factor for momentum

    team = row['TEAM']
    opponent = row['Opponent']

    # Get team and opponent ratings
    team_offense = ratings[team]['offense']
    team_defense = ratings[team]['defense']
    opponent_offense = ratings[opponent]['offense']
    opponent_defense = ratings[opponent]['defense']

    tm, om = momentum_scores[team], momentum_scores[opponent]

    # Expected points scored and allowed
    expected_points_scored = (team_offense + opponent_defense) / 2
    expected_points_allowed = (opponent_offense + team_defense) / 2

    # Actual points scored and allowed
    actual_points_scored = row['actual_points_scored']
    actual_points_allowed = row['actual_points_allowed']

    # Update offensive rating
    ratings[team]['offense'] += K * (actual_points_scored - expected_points_scored) / 100
    ratings[opponent]['defense'] += K * (actual_points_scored - expected_points_scored) / 100

    # Update defensive rating
    ratings[team]['defense'] += K * (expected_points_allowed - actual_points_allowed) / 100
    ratings[opponent]['offense'] += K * (expected_points_allowed - actual_points_allowed) / 100

    # Update momentum
    team_elo_diff = abs(ratings[team]['offense'] - ratings[opponent]['offense'])
    if actual_points_scored > expected_points_scored:
        momentum_scores[team] += momentum_decay * (tm + team_elo_diff)
    else:
        momentum_scores[team] -= momentum_decay * (tm - (team_elo_diff / m))

    return ratings[team], ratings[opponent], momentum_scores[team], momentum_scores[opponent]

def update_bayesian_off_def(row):
    # Parameters
    sigma_prior = 200.0  # Initial standard deviation for ratings
    sigma_obs = 15.0     # Standard deviation of observed points
    momentum_decay = 0.9  # Decay factor for momentum

    team = row['TEAM']
    opponent = row['Opponent']

    # Initialize offensive and defensive ratings if not already done
    if team not in team_ratings:
        team_ratings[team] = {
            'offense': {'mu': 1500.0, 'sigma2': sigma_prior ** 2},
            'defense': {'mu': 1500.0, 'sigma2': sigma_prior ** 2},
        }
        team_momentum[team] = 0.0
    if opponent not in team_ratings:
        team_ratings[opponent] = {
            'offense': {'mu': 1500.0, 'sigma2': sigma_prior ** 2},
            'defense': {'mu': 1500.0, 'sigma2': sigma_prior ** 2},
        }
        team_momentum[opponent] = 0.0

    # Get offensive and defensive strengths
    team_off_mu, team_off_sigma2 = team_ratings[team]['offense']['mu'], team_ratings[team]['offense']['sigma2']
    team_def_mu, team_def_sigma2 = team_ratings[team]['defense']['mu'], team_ratings[team]['defense']['sigma2']
    opp_off_mu, opp_off_sigma2 = team_ratings[opponent]['offense']['mu'], team_ratings[opponent]['offense']['sigma2']
    opp_def_mu, opp_def_sigma2 = team_ratings[opponent]['defense']['mu'], team_ratings[opponent]['defense']['sigma2']

    # Observed points scored by each team
    y_team_scored = row['F']
    y_opp_scored = row['actual_points_allowed']

    # Offensive update: points scored vs opponent defense
    mu_team_scored_prior = (team_off_mu + opp_def_mu) / 2
    sigma2_team_scored_prior = (team_off_sigma2 + opp_def_sigma2 + sigma_obs ** 2) / 2

    mu_team_scored_post = (
        (mu_team_scored_prior / sigma2_team_scored_prior + y_team_scored / sigma_obs ** 2)
        / (1 / sigma2_team_scored_prior + 1 / sigma_obs ** 2)
    )
    sigma2_team_scored_post = 1 / (1 / sigma2_team_scored_prior + 1 / sigma_obs ** 2)

    delta_off = mu_team_scored_post - mu_team_scored_prior

    team_ratings[team]['offense']['mu'] += delta_off * (team_off_sigma2 / (team_off_sigma2 + opp_def_sigma2))
    team_ratings[opponent]['defense']['mu'] += delta_off * (opp_def_sigma2 / (team_off_sigma2 + opp_def_sigma2))

    # Defensive update: points allowed vs opponent offense
    mu_team_allowed_prior = (opp_off_mu + team_def_mu) / 2
    sigma2_team_allowed_prior = (opp_off_sigma2 + team_def_sigma2 + sigma_obs ** 2) / 2

    mu_team_allowed_post = (
        (mu_team_allowed_prior / sigma2_team_allowed_prior + y_opp_scored / sigma_obs ** 2)
        / (1 / sigma2_team_allowed_prior + 1 / sigma_obs ** 2)
    )
    sigma2_team_allowed_post = 1 / (1 / sigma2_team_allowed_prior + 1 / sigma_obs ** 2)

    delta_def = mu_team_allowed_post - mu_team_allowed_prior

    team_ratings[team]['defense']['mu'] += delta_def * (team_def_sigma2 / (team_def_sigma2 + opp_off_sigma2))
    team_ratings[opponent]['offense']['mu'] += delta_def * (opp_off_sigma2 / (team_def_sigma2 + opp_off_sigma2))

    # Update variances
    team_ratings[team]['offense']['sigma2'] = sigma2_team_scored_post / 2
    team_ratings[opponent]['defense']['sigma2'] = sigma2_team_scored_post / 2
    team_ratings[team]['defense']['sigma2'] = sigma2_team_allowed_post / 2
    team_ratings[opponent]['offense']['sigma2'] = sigma2_team_allowed_post / 2

    # Update momentum
    team_momentum[team] = momentum_decay * team_momentum[team] + abs(delta_off + delta_def)
    team_momentum[opponent] = momentum_decay * team_momentum[opponent] + abs(delta_off + delta_def)

    return (
        team_ratings[team]['offense'], team_ratings[team]['defense'], team_momentum[team],
        team_ratings[opponent]['offense'], team_ratings[opponent]['defense'], team_momentum[opponent]
    )



def update_bayesian_elo_momentum(row):
    df = infer_df

    # Parameters
    sigma_prior = 200.0  # Initial standard deviation of team strengths
    sigma_obs = 15.0     # Standard deviation of the observed point differential
    momentum_decay = 0.9  # Decay factor for momentum

    team = row['TEAM']
    opponent = df[(df['GAME-ID'] == row['GAME-ID']) & (df['TEAM'] != team)]['TEAM'].values[0]

    # Initialize team strengths if not already done
    if team not in team_strengths:
        team_strengths[team] = {'mu': 1500.0, 'sigma2': sigma_prior ** 2}
        team_momentum[team] = 0.0
    if opponent not in team_strengths:
        team_strengths[opponent] = {'mu': 1500.0, 'sigma2': sigma_prior ** 2}
        team_momentum[opponent] = 0.0

    # Get team strengths
    team_mu, team_sigma2 = team_strengths[team]['mu'], team_strengths[team]['sigma2']
    opponent_mu, opponent_sigma2 = team_strengths[opponent]['mu'], team_strengths[opponent]['sigma2']

    # Observed outcome (point differential)
    y = row['F'] - df[(df['GAME-ID'] == row['GAME-ID']) & (df['TEAM'] != team)]['F'].values[0]

    # Prior difference in strengths
    mu_diff_prior = team_mu - opponent_mu
    sigma2_diff_prior = team_sigma2 + opponent_sigma2 + sigma_obs ** 2

    # Bayesian update
    # Compute the posterior mean and variance of the difference in team strengths
    mu_diff_post = (mu_diff_prior / sigma2_diff_prior + y / sigma_obs ** 2) / (1 / sigma2_diff_prior + 1 / sigma_obs ** 2)
    sigma2_diff_post = 1 / (1 / sigma2_diff_prior + 1 / sigma_obs ** 2)

    # Compute the update amounts
    delta_mu = mu_diff_post - mu_diff_prior

    # Update team strengths (split the update between both teams)
    team_strengths[team]['mu'] += delta_mu * (team_sigma2 / (team_sigma2 + opponent_sigma2))
    team_strengths[opponent]['mu'] -= delta_mu * (opponent_sigma2 / (team_sigma2 + opponent_sigma2))

    # Update variances (simplified for demonstration purposes)
    team_strengths[team]['sigma2'] = sigma2_diff_post / 2
    team_strengths[opponent]['sigma2'] = sigma2_diff_post / 2

    # Update momentum (momentum is proportional to the change in team strength)
    team_momentum[team] = momentum_decay * team_momentum[team] + abs(delta_mu)
    team_momentum[opponent] = momentum_decay * team_momentum[opponent] + abs(delta_mu)

    return (team_strengths[team]['mu'], team_strengths[team]['sigma2'], team_momentum[team],
            team_strengths[opponent]['mu'], team_strengths[opponent]['sigma2'], team_momentum[opponent])


def update_elo_momentum(row):
    df = infer_df
    #do_train = train

    K = 20  # K-factor in Elo rating
    m = 1.2 # m factor in momentup
    momentum_decay = 0.1  # Decay factor for momentum

    team = row['TEAM']
    opponent = df[(df['GAME-ID'] == row['GAME-ID']) & (df['TEAM'] != team)]['TEAM'].values[0]
    team_elo, opponent_elo = elo_ratings[team], elo_ratings[opponent]

    #if do_train:
    tm, om =  momentum_scores[team], momentum_scores[opponent]

    # Calculate expected outcomes
    expected_team = 1 / (1 + 10 ** ((opponent_elo - team_elo) / 400))

    # Actual outcome
    actual_team = row['spread_result']

    # Update Elo ratings
    elo_ratings[team] += K * (actual_team - expected_team)
    #elo_ratings[opponent] += K * ((1 - actual_team) - (1 - expected_team))

    # Calculate Elo difference
    elo_diff = abs(opponent_elo - team_elo)

    # Update momentum
    momentum_scores[team] = momentum_decay * (momentum_scores[team] + elo_diff) if actual_team == 1 else momentum_decay * (momentum_scores[team] - (elo_diff/m))

    return elo_ratings[team], momentum_scores[team], elo_ratings[opponent], momentum_scores[opponent]

def parse_referee_data(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')[1:]  # Skipping the header row

    ref_data = {}
    for row in rows:
        columns = row.find_all('td')
        if len(columns) < 4:
            continue  # Skip rows that don't have enough columns

        game = columns[0].get_text(strip=True)
        crew_chief = columns[1].get_text(strip=True)
        referee = columns[2].get_text(strip=True)
        umpire = columns[3].get_text(strip=True)

        # Split the game into two teams
        teams = game.split(' @ ')
        if len(teams) != 2:
            continue  # Skip if format is not as expected
        city1 = teams[0] #_city_map.get(teams[0].split()[-1] + " " + teams[0].split()[-2], "Unknown")
        city2 = teams[1] #_city_map.get(teams[1].split()[-1] + " " + teams[1].split()[-2], "Unknown")

        ref_data[city1] = [crew_chief, referee, umpire]
        ref_data[city2] = [crew_chief, referee, umpire]

    return ref_data

def remove_ref_keys(data_dict):
    return {k: v for k, v in data_dict.items() if v[0] != 'REF'}

# Step 1: Result of the Game
def assign_results(group):
    # Copy referee and crew info
    group['MAIN REF'] = [group['MAIN REF'].iloc[0]] * 2
    group['CREW'] = [group['CREW'].iloc[0]] * 2

    # Spread-related calculations
    t1_spread_f = group.iloc[0]['F'] + group.iloc[0]['CLOSING_SPREAD']
    t2_spread_f = group.iloc[1]['F'] + group.iloc[1]['CLOSING_SPREAD']
    
    res = [t1_spread_f > group.iloc[1]['F'], t2_spread_f > group.iloc[0]['F']]
    group['spread_result'] = res
    group['ml_result'] = [group.iloc[0]['F'] > group.iloc[1]['F'], group.iloc[1]['F'] > group.iloc[0]['F']]
    group['q3_result'] = [ (group.iloc[0]['1Q'] + group.iloc[0]['2Q'] + group.iloc[0]['3Q'] >= 100),
                           (group.iloc[1]['1Q'] + group.iloc[1]['2Q'] + group.iloc[1]['3Q'] >= 100) ]
    group['total_result'] = [ (group.iloc[0]['F'] + group.iloc[1]['F'] >= group.iloc[0]['CLOSING_TOTAL']),
                              (group.iloc[0]['F'] + group.iloc[1]['F'] >= group.iloc[0]['CLOSING_TOTAL']) ]
    
    # Process line movement columns
    for lm in ['LINE_MOVEMENT_1', 'LINE_MOVEMENT_2', 'LINE_MOVEMENT_3']:
        group[lm] = group[lm].str.split(' ').str.get(0)\
                             .str.split('u').str.get(0)\
                             .str.split('o').str.get(0)
        group[lm] = pd.to_numeric(group[lm], errors='coerce')
        group[f'SPREAD_{lm}'] = group[lm][group[lm] < 100]
        group[f'TOTAL_{lm}'] = group[lm][group[lm] >= 100]
    
    # Update opponent stats using EMA features
    # For each stat, the opponent's feature is taken from the other row.
    stats = ['PTS', 'POSS', 'PACE', 'OEFF', 'DEFF', 'OR', '3P', '3PA', 'TO', 'FT']
    for stat in stats:
        # 3-game EMA
        group[f'Opp_Ema3_game_{stat}'] = [group.iloc[1][f'Ema3_game_{stat}'], group.iloc[0][f'Ema3_game_{stat}']]
        # 5-game EMA
        group[f'Opp_Ema5_game_{stat}'] = [group.iloc[1][f'Ema5_game_{stat}'], group.iloc[0][f'Ema5_game_{stat}']]
        # Season average (expanding mean)
        group[f'Opp_Season_Avg_{stat}'] = [group.iloc[1][f'Season_Avg_{stat}'], group.iloc[0][f'Season_Avg_{stat}']]
    
    return group



def assign_opp_elo(group):
    group['Opp_Elo'] = [group.iloc[1]['Elo_Rating'], group.iloc[0]['Elo_Rating']]
    group['Opp_Momentum'] = [group.iloc[1]['Momentum'], group.iloc[0]['Momentum']]
    return group

# Step 3: Win/Loss Streak
def calculate_streak(group):
    streak = 0
    streaks = []
    for result in group['Prev_Result']:
        if result == 1:
            streak = streak + 1 if streak > 0 else 1
        else:
            streak = streak - 1 if streak < 0 else -1
        streaks.append(streak)
    group['Streak'] = streaks
    return group

In [40]:
yesterday = (datetime.now() - timedelta(1)).strftime("%m-%d-%Y")
two_days_ago = (datetime.now() - timedelta(2)).strftime("%m-%d-%Y")
TODAY_FILE = download_current_data(date=yesterday)

./02-24-2025-nba-season-team-feed.xlsx


In [15]:
# Initialize headless WebDriver
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

# get todays date as YYYY-MM-DD
today = datetime.now().strftime('%Y-%m-%d')
driver.get(f'https://www.scoresandodds.com/nba?date={today}')

# Wait for page to load
time.sleep(3)

# Find the table by its class
tables = driver.find_elements(By.CLASS_NAME, 'event-card-table')

current_odds = {}
games = []
team = None
for table in tables:
    try:
      # Find all rows in the table
      rows = table.find_elements(By.CLASS_NAME, 'event-card-row')
      all_moves = [float(''.join(c for c in m.text if (c.isdigit() or c =='.' or c == '-'))) for m in table.find_elements(By.CSS_SELECTOR, '[data-tab*="#line-movements"] .data-value')]
      s_moves = []
      t_moves = []
      for m in all_moves:
        if abs(m) < 100:
          s_moves.append(m)
        else:
          t_moves.append(m)

      i = 0
      teams = []

      home_row = table.find_element(By.CSS_SELECTOR, '[data-side="home"]')
      away_row = table.find_element(By.CSS_SELECTOR, '[data-side="away"]')

      home_team = home_row.find_element(By.CSS_SELECTOR, '.team-name span').text
      away_team = away_row.find_element(By.CSS_SELECTOR, '.team-name span').text

      raw_h_ml = home_row.find_element(By.CSS_SELECTOR, '[data-field="current-moneyline"] .data-value').text
      raw_h_ml = -110 if raw_h_ml == 'even' else raw_h_ml
      h_ml = int(raw_h_ml)
      h_spread = float(''.join(c for c in home_row.find_element(By.CSS_SELECTOR, '[data-field="current-spread"] .data-value').text if (c.isdigit() or c == '.' or c == '-')))
      h_total = float(''.join(c for c in home_row.find_element(By.CSS_SELECTOR, '[data-field="current-total"] .data-value').text if (c.isdigit() or c == '.' or c == '-')))

      home = ['REF', h_ml, 'H', team_map[away_team], h_spread, h_total, s_moves[-3:], t_moves[-3:], 'CREW', 'UMPIRE']

      raw_a_ml = away_row.find_element(By.CSS_SELECTOR, '[data-field="current-moneyline"] .data-value').text
      raw_a_ml = -110 if raw_a_ml == 'even' else raw_a_ml
      a_ml = int(raw_a_ml)
      a_spread = float(''.join(c for c in away_row.find_element(By.CSS_SELECTOR, '[data-field="current-spread"] .data-value').text if (c.isdigit() or c == '.' or c == '-')))
      a_total = float(''.join(c for c in away_row.find_element(By.CSS_SELECTOR, '[data-field="current-total"] .data-value').text if (c.isdigit() or c == '.' or c == '-')))

      away = ['REF', a_ml, 'R', team_map[home_team], a_spread, a_total, s_moves[-3:], t_moves[-3:], 'CREW', 'UMPIRE']

      current_odds[team_map[home_team]] = home
      current_odds[team_map[away_team]] = away
    except:
        pass
driver.quit()
print('DONE')



DONE


In [16]:
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
driver.get('https://official.nba.com/referee-assignments/')

# Wait for JavaScript to load
time.sleep(3)

# Get the HTML content of the page
html_content = driver.page_source

# Close the browser
driver.quit()

referee_data = parse_referee_data(html_content)

for city, refs in referee_data.items():
    try:
        if city == 'L.A. Lakers':
          city = 'LA Lakers'
        current_odds[city][0] = ' '.join(refs[0].split(' ')[:-1])
        current_odds[city][-2] = ' '.join(refs[1].split(' ')[:-1])
        current_odds[city][-1] = ' '.join(refs[2].split(' ')[:-1])
    except:
      pass

processed_data = remove_ref_keys(current_odds)
TODAY_MAP = processed_data
display(pd.DataFrame(TODAY_MAP))

Unnamed: 0,Toronto,Boston,Orlando,Cleveland,Memphis,Phoenix,Houston,Milwaukee,New Orleans,San Antonio,LA Lakers,Dallas,Golden State,Charlotte
0,Mitchell Ervin,Mitchell Ervin,David Guthrie,David Guthrie,Curtis Blair,Curtis Blair,Tony Brothers,Tony Brothers,Courtney Kirkland,Courtney Kirkland,John Goble,John Goble,James Williams,James Williams
1,390,-520,240,-298,-325,260,-166,140,-148,124,-395,310,-1350,800
2,H,R,H,R,H,R,H,R,H,R,H,R,H,R
3,Boston,Toronto,Cleveland,Orlando,Phoenix,Memphis,Milwaukee,Houston,San Antonio,New Orleans,Dallas,LA Lakers,Charlotte,Golden State
4,11.0,-11.0,7.5,-7.5,-7.5,7.5,-3.5,3.5,-3.0,3.0,-9.0,9.0,-17.0,17.0
5,225.0,225.0,222.5,222.5,245.5,245.5,227.0,227.0,240.5,240.5,233.5,233.5,220.0,220.0
6,"[-11.0, -11.0, -11.0]","[-11.0, -11.0, -11.0]","[-7.5, -7.5, -7.5]","[-7.5, -7.5, -7.5]","[-7.5, -8.0, -8.0]","[-7.5, -8.0, -8.0]","[-3.5, -3.5, -3.5]","[-3.5, -3.5, -3.5]","[-3.0, -3.0, -3.0]","[-3.0, -3.0, -3.0]","[-9.0, -9.0, -9.0]","[-9.0, -9.0, -9.0]","[-17.0, -17.0, -17.0]","[-17.0, -17.0, -17.0]"
7,"[225.0, 226.0, 225.0]","[225.0, 226.0, 225.0]","[222.5, 222.5, 222.5]","[222.5, 222.5, 222.5]","[245.5, 245.5, 245.5]","[245.5, 245.5, 245.5]","[227.0, 226.5, 226.0]","[227.0, 226.5, 226.0]","[240.5, 239.5, 239.5]","[240.5, 239.5, 239.5]","[233.5, 232.5, 232.5]","[233.5, 232.5, 232.5]","[220.0, 220.5, 221.0]","[220.0, 220.5, 221.0]"
8,Nick Buchert,Nick Buchert,Justin Van Duyne,Justin Van Duyne,Jacyn Goble,Jacyn Goble,Gediminas Petraitis,Gediminas Petraitis,Andy Nagy,Andy Nagy,Pat Fraher,Pat Fraher,Brent Barnaky,Brent Barnaky
9,Tyler Ricks,Tyler Ricks,Mousa Dagher,Mousa Dagher,Intae Hwang,Intae Hwang,Matt Myers,Matt Myers,Brandon Schwab,Brandon Schwab,Evan Scott,Evan Scott,Brandon Adair,Brandon Adair


In [45]:
team_df_2019 = pd.read_excel(f'{DATA_ROOT}/2018-2019_NBA_Box_Score_Team-Stats.xlsx')
team_df_2020 = pd.read_excel(f'{DATA_ROOT}/2019-2020_NBA_Box_Score_Team-Stats.xlsx')
team_df_2021 = pd.read_excel(f'{DATA_ROOT}/2020-2021_NBA_Box_Score_Team-Stats.xlsx')
team_df_2022 = pd.read_excel(f'{DATA_ROOT}/2021-2022_NBA_Box_Score_Team-Stats.xlsx')
team_df_2023 = pd.read_excel(f'{DATA_ROOT}/2022-2023_NBA_Box_Score_Team-Stats.xlsx')
team_df_2024 = pd.read_excel(f'{DATA_ROOT}/2023-2024_NBA_Box_Score_Team-Stats.xlsx')
tdf = pd.read_excel(TODAY_FILE)
refs_2019 = pd.read_csv('../historical_data/2018-2019.csv')
refs_2020 = pd.read_csv('../historical_data/2019-2020.csv')
refs_2021 = pd.read_csv('../historical_data/2020-2021.csv')
refs_2022 = pd.read_csv('../historical_data/2021_2022.csv')
refs_2023 = pd.read_csv('../historical_data/2022-2023.csv')
refs_2024 = pd.read_csv('../historical_data/2023-2024.csv')

refs = pd.concat([refs_2019, refs_2020, refs_2021, refs_2022, refs_2023, refs_2024])
raw_df = pd.concat([team_df_2019, team_df_2020, team_df_2021, team_df_2022, team_df_2023, team_df_2024])
raw_df.columns = cleaned_cols
tdf.columns = t_cleaned_cols

rename_map = {
    'OPENING_ODDS': 'OPENING ODDS',
    'FULL_GAME_ODDS_URL': 'ODDS_URL'
}

tdf.rename(columns=rename_map, inplace=True)

# Step 2: Add missing columns in df2 (if any)
missing_cols = set(cleaned_cols) - set(tdf.columns)
for col in missing_cols:
    tdf[col] = pd.NA


#Step 3: Concatenate the DataFrames
raw_df = pd.concat([raw_df, tdf], ignore_index=True)

""" Add additional columns to games df """
raw_df['Season'] = raw_df['BIGDATABALL_DATASET']
raw_df['DATE'] = raw_df['DATE'].astype('datetime64[ns]')
infer_df = raw_df.copy(deep=True)


In [46]:
# Convert DATE to datetime
raw_df['DATE'] = pd.to_datetime(raw_df['DATE'])

# --- STEP 1: Merge to Get Opponent Stats ---
stats = ['PTS', 'PACE', 'OEFF', 'OR', '3P', 'FT']
opp_cols = ['GAME-ID', 'TEAM'] + stats
df_opp = raw_df[opp_cols].rename(columns={
    'TEAM': 'OPP_TEAM',
    'PTS': 'OPP_PTS',
    'PACE': 'OPP_PACE',
    'OEFF': 'OPP_OEFF',
    'OR': 'OPP_OR',
    '3P': 'OPP_3P',
    'FT': 'OPP_FT'
})

raw_df = pd.merge(raw_df, df_opp, on='GAME-ID')
raw_df = raw_df[raw_df['TEAM'] != raw_df['OPP_TEAM']]

# --- STEP 2: Sort by TEAM and DATE ---
raw_df.sort_values(['TEAM', 'DATE'], inplace=True)

# --- STEP 3: Compute EMAs ---
def compute_emas(df, group_col, target_cols):
    for col in target_cols:
        # Extract a simple stat name (e.g. 'PTS' -> 'pts')
        stat_name = col.split("_")[1].lower()
        # EMA over last 3 games
        df[f'ema3_allowed_{stat_name}'] = df.groupby(group_col)[col].transform(
            lambda x: x.shift(1).ewm(span=3, adjust=False).mean()
        )
        # EMA over last 5 games
        df[f'ema5_allowed_{stat_name}'] = df.groupby(group_col)[col].transform(
            lambda x: x.shift(1).ewm(span=5, adjust=False).mean()
        )
        # "Season-long" EMA as an expanding mean (cumulative average)
        df[f'ema_season_allowed_{stat_name}'] = df.groupby(group_col)[col].transform(
            lambda x: x.shift(1).expanding(min_periods=1).mean()
        )
    return df

rolling_stats = ['OPP_PTS', 'OPP_PACE', 'OPP_OEFF', 'OPP_OR', 'OPP_3P', 'OPP_FT']
raw_df = compute_emas(raw_df, 'TEAM', rolling_stats)

# Sort by DATE for inspection
raw_df.sort_values(['DATE'], inplace=True)

# Display new defensive features
feature_cols = ['TEAM', 'DATE'] + \
    [f'ema3_allowed_{col.split("_")[1].lower()}' for col in rolling_stats] + \
    [f'ema5_allowed_{col.split("_")[1].lower()}' for col in rolling_stats] + \
    [f'ema_season_allowed_{col.split("_")[1].lower()}' for col in rolling_stats]

raw_df[feature_cols].tail()

Unnamed: 0,TEAM,DATE,ema3_allowed_pts,ema3_allowed_pace,ema3_allowed_oeff,ema3_allowed_or,ema3_allowed_3p,ema3_allowed_ft,ema5_allowed_pts,ema5_allowed_pace,ema5_allowed_oeff,ema5_allowed_or,ema5_allowed_3p,ema5_allowed_ft,ema_season_allowed_pts,ema_season_allowed_pace,ema_season_allowed_oeff,ema_season_allowed_or,ema_season_allowed_3p,ema_season_allowed_ft
33762,Detroit,2025-02-24,125.326502,107.093697,116.587077,9.666234,15.672288,21.83801,119.643321,105.001425,113.622276,9.737824,14.46876,20.912426,113.42315,98.508835,114.396325,10.024668,11.552182,18.508539
33765,Miami,2025-02-24,116.243923,94.736413,119.772632,10.099324,14.718368,16.706828,114.719263,94.621913,118.505273,10.403899,14.656779,15.758265,107.835821,96.645414,110.985692,9.640133,12.975124,16.665008
33761,LA Clippers,2025-02-24,122.248202,99.626817,121.271429,8.408998,14.595289,22.504262,119.740018,99.298195,118.916074,8.875612,15.184072,20.664772,110.851468,98.512287,112.136643,10.626943,12.041451,17.25734
33753,Chicago,2025-02-24,121.01522,101.610262,116.413803,9.506688,12.000618,19.846074,122.27953,101.679212,117.905039,10.334878,13.016596,19.193603,113.007533,98.892618,113.492596,9.80226,12.574388,17.419962
33750,Washington,2025-02-24,113.623626,100.582077,111.383589,11.590369,11.820996,18.543829,116.41017,100.603537,113.828867,11.12231,12.725797,18.600973,117.751402,101.039824,115.90313,11.014953,12.357009,18.751402


In [50]:
train_df = raw_df.copy(deep=True)

# Initialize Elo ratings and momentum scores
elo_ratings = {team: 1500 for team in train_df['TEAM'].unique()}
momentum_scores = {team: 0 for team in train_df['TEAM'].unique()}

# For each stat, compute:
# - Ema3_game: EMA with span=3 on shifted data (last 3 games)
# - Ema5_game: EMA with span=5 on shifted data (last 5 games)
# - Season_Avg: Expanding (cumulative) average on shifted data

def add_ema_features(df, col):
    df[f'Ema3_game_{col}'] = df.groupby(['TEAM','Season'])[col]\
                                .transform(lambda x: x.shift(1).ewm(span=3, adjust=False).mean())
    df[f'Ema5_game_{col}'] = df.groupby(['TEAM','Season'])[col]\
                                .transform(lambda x: x.shift(1).ewm(span=5, adjust=False).mean())
    df[f'Season_Avg_{col}'] = df.groupby(['TEAM','Season'])[col]\
                                .transform(lambda x: x.shift(1).expanding(min_periods=1).mean())
    return df

stats = ['PTS', 'POSS', 'PACE', 'OEFF', 'DEFF', 'OR', '3P', '3PA', 'TO', 'FT']

for stat in stats:
    train_df = add_ema_features(train_df, stat)

# Apply the function to each game group (assumes assign_results is defined)
train_df = train_df.groupby('GAME-ID').apply(assign_results)

# Reset index
train_df.reset_index(drop=True, inplace=True)

# Last 3 Games Moneylines
train_df['Last_ML_1'] = train_df.groupby(['TEAM', 'Season'])['MONEYLINE'].shift(1)
train_df['Last_ML_2'] = train_df.groupby(['TEAM', 'Season'])['MONEYLINE'].shift(2)
train_df['Last_ML_3'] = train_df.groupby(['TEAM', 'Season'])['MONEYLINE'].shift(3)

# Current Number of Wins - Losses
train_df['Wins'] = train_df.groupby(['TEAM', 'Season'])['ml_result'].cumsum()
train_df['Losses'] = train_df.groupby(['TEAM', 'Season'])['ml_result'].transform('count') - train_df['Wins']
train_df['Win_Loss_Diff'] = train_df['Wins'] - train_df['Losses']

# Current Opponent (assumes games always have 2 rows)
train_df['Opponent'] = train_df.groupby('GAME-ID')['TEAM']\
                               .shift(-1)\
                               .fillna(train_df.groupby('GAME-ID')['TEAM'].shift())

# Merge Ref Data
train_df = train_df.merge(refs.groupby('REFEREE').first(),
                          how='left', left_on='MAIN REF', right_on='REFEREE')

# Sort by date
train_df = train_df.sort_values('DATE')


KeyError: 'Avg_3_game_PTS'

In [48]:
train_df.reset_index(drop=True, inplace=True)
train_df = train_df.sort_values('DATE')
train_df['actual_points_allowed'] = train_df.groupby('GAME-ID')['F'].transform(lambda x: x[::-1].values)

In [49]:

# Apply the Elo / Momentum function
train_df[['Elo_Rating', 'Elo_Var', 'Momentum', 'Opp_Elo', 'Opp_Elo_Var', 'Opp_Momentum']] = train_df.apply(update_bayesian_elo_momentum, axis=1, result_type='expand')
train_df.reset_index(drop=True, inplace=True)
train_df = train_df.groupby('GAME-ID').apply(assign_opp_elo)
team_ratings = {}
team_momentum = {}
train_df.reset_index(drop=True, inplace=True)
# Apply the Elo / Momentum function
# Apply the function and expand the returned dictionaries into columns
expanded_cols = train_df.apply(update_bayesian_off_def, axis=1).apply(pd.Series)
# Assign specific keys (including offense, defense, and their variances) to the appropriate columns
train_df['Offensive_Rating'] = expanded_cols.apply(lambda x: x[0]['mu'], axis=1)
train_df['Offensive_Var'] = expanded_cols.apply(lambda x: x[0]['sigma2'], axis=1)
train_df['Defensive_Rating'] = expanded_cols.apply(lambda x: x[1]['mu'], axis=1)
train_df['Defensive_Var'] = expanded_cols.apply(lambda x: x[1]['sigma2'], axis=1)

train_df['Momentum'] = expanded_cols.apply(lambda x: x[2], axis=1)

train_df['Opp_Offensive_Rating'] = expanded_cols.apply(lambda x: x[3]['mu'], axis=1)
train_df['Opp_Offensive_Var'] = expanded_cols.apply(lambda x: x[3]['sigma2'], axis=1)
train_df['Opp_Defensive_Rating'] = expanded_cols.apply(lambda x: x[4]['mu'], axis=1)
train_df['Opp_Defensive_Var'] = expanded_cols.apply(lambda x: x[4]['sigma2'], axis=1)
train_df['Opp_Momentum'] = expanded_cols.apply(lambda x: x[5], axis=1)


In [None]:
# ema3_allowed_pts	ema3_allowed_pace	ema3_allowed_oeff	ema3_allowed_or	ema3_allowed_3p	ema3_allowed_ft	ema5_allowed_pts	ema5_allowed_pace	ema5_allowed_oeff	ema5_allowed_or	ema5_allowed_3p	ema5_allowed_ft	ema_season_allowed_pts	ema_season_allowed_pace	ema_season_allowed_oeff	ema_season_allowed_or	ema_season_allowed_3p	ema_season_allowed_ft

In [None]:
train_df[['DATE', 'TEAM', 'Opponent', 'Avg_3_game_PTS', 'ema3_allowed_pts']].sort_values('DATE').tail(10)

Unnamed: 0,DATE,TEAM,Opponent,Avg_3_game_PTS,last3_allowed_pts
15759,2025-02-24,Miami,Atlanta,115.333333,116.333333
15757,2025-02-24,Washington,Brooklyn,107.0,116.0
15756,2025-02-24,Brooklyn,Washington,100.666667,103.0
15754,2025-02-24,Philadelphia,Chicago,101.0,109.666667
15753,2025-02-24,Denver,Indiana,120.333333,119.666667
15752,2025-02-24,Indiana,Denver,130.0,118.0
15751,2025-02-24,LA Clippers,Detroit,113.666667,120.333333
15750,2025-02-24,Detroit,LA Clippers,133.666667,121.0
15758,2025-02-24,Atlanta,Miami,133.0,137.0
15755,2025-02-24,Chicago,Philadelphia,112.666667,120.666667


In [11]:
X = train_df.copy()

def offset_cols(group):
    cols = ['Elo_Rating', 'Momentum']
    group['prev_elo'] = group['Elo_Rating'].shift()
    group['prev_mom'] = group['Momentum'].shift()

    return group

def apply_prev_elo(group):
    cols = ['Elo_Rating', 'Momentum']
    group[cols] = group[cols].shift()

    return group

def assign_opp_elo_mom(group):
    group['Opp_Elo'] = [group.iloc[1]['Elo_Rating'], group.iloc[0]['Elo_Rating']]
    group['Opp_Momentum'] = [group.iloc[1]['Momentum'], group.iloc[0]['Momentum']]
    return group



X = X.groupby(['TEAM']).apply(offset_cols)
X = X.reset_index(drop=True)

X = X.groupby(['TEAM']).apply(apply_prev_elo)
X = X.reset_index(drop=True)

X = X.groupby(['GAME-ID']).apply(assign_opp_elo_mom)
X = X.reset_index(drop=True)

y = X['spread_result']
X = X[TRAIN_COLS + ['spread_result', 'ml_result', 'total_result', 'q3_result', 'DATE', 'PTS', 'POSS', 'OEFF', 'DEFF', 'PACE', 'GAME-ID', 'Elo_Var','Opp_Elo_Var']]
X[['MONEYLINE', 'Last_ML_1', 'Last_ML_2', 'Last_ML_3']] = X[['MONEYLINE', 'Last_ML_1', 'Last_ML_2', 'Last_ML_3',]].replace('even', '-100', regex=True)
X[['MONEYLINE', 'Last_ML_1', 'Last_ML_2', 'Last_ML_3',]] = X[['MONEYLINE', 'Last_ML_1', 'Last_ML_2', 'Last_ML_3',]].fillna(0).astype(int)
X['MAIN REF'] = X['MAIN REF'].astype('category')
X['CREW'] = X['CREW'].astype('category')
X['TEAM_REST_DAYS'] = X['TEAM_REST_DAYS'].astype('category')
X['TEAM'] = X['TEAM'].astype('category')
X['Opponent'] = X['Opponent'].astype('category')
X['VENUE'] = (X['VENUE'] == 'H')*1

# get current date
today = datetime.now().strftime('%Y-%m-%d')

X.to_csv(f'{DATA_ROOT}/2024_2025_nba_team_full_{today}.csv', index=False)