In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import time
import pandas as pd
from IPython.display import display
import nba_inference_utils as niu
from ccb_model import BootstrapCalibratedClassifier

train_cols = ['Opp_Elo', 'Opp_Momentum', 'SPREAD_LINE_MOVEMENT_1', 
              'SPREAD_LINE_MOVEMENT_2', 'SPREAD_LINE_MOVEMENT_3', 
                'TOTAL_LINE_MOVEMENT_1', 'TOTAL_LINE_MOVEMENT_2',
                'TOTAL_LINE_MOVEMENT_3', 'CREW', 'Opp_Avg_3_game_DEFF',
                'Opp_Avg_5_game_DEFF', 'Opp_Season_Avg_DEFF',
                'Opp_Avg_3_game_OEFF', 'Opp_Avg_5_game_OEFF',
                'Opp_Season_Avg_OEFF', 'Opp_Avg_3_game_PACE',
                'Opp_Avg_5_game_PACE', 'Opp_Season_Avg_PACE',
                'Opp_Avg_3_game_POSS', 'Opp_Avg_5_game_POSS',
                'Opp_Season_Avg_POSS', 'Avg_3_game_DEFF',
                'Avg_5_game_DEFF', 'Season_Avg_DEFF',
                'Avg_3_game_OEFF', 'Avg_5_game_OEFF',
                'Season_Avg_OEFF', 'Avg_3_game_PACE',
                'Avg_5_game_PACE', 'Season_Avg_PACE',
                'Avg_3_game_POSS', 'Avg_5_game_POSS',
                'Season_Avg_POSS', 'Avg_3_game_OR', 'Avg_5_game_OR',
                'Season_Avg_OR','Avg_3_game_3P', 'Avg_5_game_3P',
                'Season_Avg_3P','Avg_3_game_3PA', 'Avg_5_game_3PA',
                'Season_Avg_3PA','Avg_3_game_TO', 'Avg_5_game_TO',
                'Season_Avg_TO','Avg_3_game_FT', 'Avg_5_game_FT',
                'Season_Avg_FT','CLOSING_SPREAD',
                'CLOSING_TOTAL', 'MONEYLINE', 'Avg_3_game_PTS',
                'Avg_5_game_PTS', 'Season_Avg_PTS', 'Last_ML_1',
                'Last_ML_2', 'Last_ML_3', 'VENUE', 'TEAM', 'Opponent',
                'Win_Loss_Diff', 'HOME TEAM WIN%', 'HOME TEAM POINTS DIFFERENTIAL',
                'TOTAL POINTS PER GAME', 'CALLED FOULS PER GAME',
                'FOUL% AGAINST ROAD TEAMS', 'FOUL% AGAINST HOME TEAMS',
                'FOUL DIFFERENTIAL (Against Road Team) - (Against Home Team)',
                'Elo_Rating', 'Momentum', 'MAIN REF', 'TEAM_REST_DAYS',
                    'Offensive_Rating', 'Defensive_Rating',
              'Opp_Offensive_Rating', 'Opp_Defensive_Rating', 'two_week_totals', 'Elo_Var', 'Opp_Elo_Var']

today_map_features = ['TEAM', 'Opponent', 'MONEYLINE', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'Venue', 'Referee', 
           'ELO_Rating', 'Momentum',
             'HOME TEAM WIN%', 'HOME TEAM POINTS DIFFERENTIAL',
             'Opp_Elo', 'Opp_Momentum', 'CREW',
              'TOTAL POINTS PER GAME', 'CALLED FOULS PER GAME',
              'FOUL% AGAINST ROAD TEAMS', 'FOUL% AGAINST HOME TEAMS',
              'FOUL DIFFERENTIAL (Against Road Team) - (Against Home Team)',
              'SPREAD_LINE_MOVEMENT_1', 'SPREAD_LINE_MOVEMENT_2', 'SPREAD_LINE_MOVEMENT_3',
              'TOTAL_LINE_MOVEMENT_1', 'TOTAL_LINE_MOVEMENT_2', 'TOTAL_LINE_MOVEMENT_3',  'Offensive_Rating', 'Defensive_Rating',
              'Opp_Offensive_Rating', 'Opp_Defensive_Rating', 'Elo_Var', 'Opp_Elo_Var']
# allow notebook to reload external modules


In [3]:
import joblib 

#total_model = BootstrapCalibratedClassifier(n_bootstrap_samples=5)
total_model = joblib.load('calibrated_total_model_ncaa.pkl')



### today's features
- refs
- lines + movement
- today's games

In [8]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from fuzzywuzzy import process

def extract_event_data(event_card):
    try:
        # Initialize a dictionary to store data
        event_data = {}
        
        # Extract game timestamp from data-time attribute (epoch time)
        data_time = event_card.get('data-time')
        if data_time:
            event_data['Timestamp'] = pd.to_datetime(int(data_time), unit='s')
        else:
            event_data['Timestamp'] = None
        
        # Extract date and time displayed on the card
        date_time_span = event_card.find('span', {'data-role': 'localtime'})
        if date_time_span:
            event_data['Date_Time_Display'] = date_time_span.get_text(strip=True)
        else:
            event_data['Date_Time_Display'] = None
        
        # Initialize dictionaries for away and home teams
        event_data['Away_Team'] = {}
        event_data['Home_Team'] = {}
        
        # Iterate through each side (away and home)
        for side in ['away', 'home']:
            team_data = event_card.find('tr', {'data-side': side})
            if team_data:
                # Team Name
                team_name_tag = team_data.find('span', class_='team-name').find('a')
                team_name = team_name_tag.get_text(strip=True) if team_name_tag else None
                event_data[f'{side.capitalize()}_Team_Name'] = team_name
                
                # Team Rotation
                rotation_tag = team_data.find('span', class_='team-rotation')
                rotation = rotation_tag.get_text(strip=True) if rotation_tag else None
                event_data[f'{side.capitalize()}_Rotation'] = rotation
                
                # Team Record
                record_tag = team_data.find('span', class_='team-record')
                record = record_tag.get_text(strip=True) if record_tag else None
                event_data[f'{side.capitalize()}_Record'] = record
                
                # Spread and Spread Odds
                spread_td = team_data.find('td', {'data-field': 'current-spread'})
                if spread_td:
                    spread = spread_td.find('span', class_='data-value').get_text(strip=True)
                    spread_odds = spread_td.find('small', class_='data-odds').get_text(strip=True)
                    event_data[f'{side.capitalize()}_Spread'] = spread
                    event_data[f'{side.capitalize()}_Spread_Odds'] = spread_odds
                else:
                    event_data[f'{side.capitalize()}_Spread'] = None
                    event_data[f'{side.capitalize()}_Spread_Odds'] = None
                
                # Total and Total Odds
                total_td = team_data.find('td', {'data-field': 'current-total'})
                if total_td:
                    total = total_td.find('span', class_='data-value').get_text(strip=True)
                    total_odds = total_td.find('small', class_='data-odds').get_text(strip=True)
                    event_data[f'{side.capitalize()}_Total'] = total
                    event_data[f'{side.capitalize()}_Total_Odds'] = total_odds
                else:
                    event_data[f'{side.capitalize()}_Total'] = None
                    event_data[f'{side.capitalize()}_Total_Odds'] = None
                
                # Moneyline
                moneyline_td = team_data.find('td', {'data-field': 'current-moneyline'})
                if moneyline_td:
                    moneyline = moneyline_td.find('span', class_='data-value').get_text(strip=True)
                    event_data[f'{side.capitalize()}_Moneyline'] = moneyline
                else:
                    event_data[f'{side.capitalize()}_Moneyline'] = None
            else:
                # If side data is missing
                event_data[f'{side.capitalize()}_Team_Name'] = None
                event_data[f'{side.capitalize()}_Rotation'] = None
                event_data[f'{side.capitalize()}_Record'] = None
                event_data[f'{side.capitalize()}_Spread'] = None
                event_data[f'{side.capitalize()}_Spread_Odds'] = None
                event_data[f'{side.capitalize()}_Total'] = None
                event_data[f'{side.capitalize()}_Total_Odds'] = None
                event_data[f'{side.capitalize()}_Moneyline'] = None
        
        # Extract Line Movements
        line_movements_td = event_card.find('td', class_='event-card-movements tablet')
        if line_movements_td:
            # There can be multiple line movements; we'll extract them as lists
            spread_movements = []
            total_movements = []
            moneyline_movements = []
            
            movement_divs = line_movements_td.find_all('div', {'data-role': 'openable'})
            for div in movement_divs:
                value = div.find('span', class_='data-value').get_text(strip=True)
                odds = div.find('small', class_='data-odds').get_text(strip=True)
                # Determine the type based on context or position
                # This requires assumption; adjust as necessary
                if 'spread' in div.get('data-field', '').lower():
                    spread_movements.append({'Spread': value, 'Spread_Odds': odds})
                elif 'total' in div.get('data-field', '').lower():
                    total_movements.append({'Total': value, 'Total_Odds': odds})
                elif 'moneyline' in div.get('data-field', '').lower():
                    moneyline_movements.append({'Moneyline': value, 'Moneyline_Odds': odds})
            
            # Alternatively, if the type cannot be determined, store all movements in one list
            # Here, we'll store them as generic movements
            event_data['Line_Movements'] = [{'Value': div.find('span', class_='data-value').get_text(strip=True),
                                            'Odds': div.find('small', class_='data-odds').get_text(strip=True)}
                                            for div in movement_divs]
        else:
            event_data['Line_Movements'] = []
    except:
        pass

    return event_data

def scrape_ncaab_games(url):
    # Initialize Selenium WebDriver with Chrome in headless mode
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run in headless mode
    options.add_argument('--disable-gpu')  # Disable GPU acceleration
    options.add_argument('--no-sandbox')  # Bypass OS security model
    options.add_argument('--window-size=1920,1080')  # Set window size to load all elements

    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
    
    try:
        # Load the webpage
        driver.get(url)

        # Wait for the page to load completely
        # Adjust the sleep time as necessary or implement explicit waits
        time.sleep(5)  # Wait for 5 seconds

        # Get the page source
        html = driver.page_source
        
        # Parse the HTML with BeautifulSoup
        soup = BeautifulSoup(html, 'html.parser')

        # Find all event cards
        event_cards = soup.find_all('div', class_='event-card')

        # Initialize a list to hold all event data
        events_data = []

        # Iterate through each event card and extract data
        for event_card in event_cards:
            data = extract_event_data(event_card)
            events_data.append(data)

        # Convert the list of dictionaries to a DataFrame
        df = pd.DataFrame(events_data)

        return df

    finally:
        # Close the WebDriver
        driver.quit()

target_url = 'https://www.scoresandodds.com/ncaab'  # Replace with the actual URL
ncaab_df = scrape_ncaab_games(target_url)
#ncaab_df = ncaab_df[ncaab_df['Away_Team_Name'].notna()][['Away_Team_Name', 'Home_Team_Name', 'Away_Total']]




Unnamed: 0,Away_Team_Name,Away_Spread,Away_Spread_Odds,Away_Total,Away_Total_Odds,Away_Moneyline,Home_Team_Name,Home_Spread,Home_Spread_Odds,Home_Total,Home_Total_Odds,Home_Moneyline
0,Florida (6),,,,,,Kentucky (10),,,,,
1,,,,,,,Princeton,,,,,
2,Boston College,,,,,,Georgia Tech,,,,,
3,Hofstra,,,,,,Northeastern,,,,,
4,Toledo,,,,,,Western Michigan,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
136,Saint Mary's,-18,-110,o138,-112,-2400,Portland,+18,-110,u138,-108,+1200
137,UC Riverside,+9,-108,o142.5,-112,+390,UC Irvine,-9,-112,u142.5,-108,-520
138,Cal Poly,+6.5,-110,o153.5,-108,+235,Hawaii,-6.5,-110,u153.5,-112,-290
139,Nevada,,,,,,New Mexico,,,,,


In [25]:
AWAY_MAP = ncaab_df.dropna()[['Away_Team_Name', 'Away_Spread',
       'Away_Spread_Odds', 'Away_Total', 'Away_Total_Odds', 'Away_Moneyline', 'Home_Team_Name']]

AWAY_MAP.columns = ['TEAM', 'CLOSING_SPREAD', 'SPREAD_ODDS', 'CLOSING_TOTAL', 'TOTAL_ODDS', 'MONEYLINE', 'Opponent']
AWAY_MAP['Venue'] = 'Away'
HOME_MAP = ncaab_df.dropna()[['Home_Team_Name','Home_Spread',
       'Home_Spread_Odds', 'Home_Total', 'Home_Total_Odds', 'Home_Moneyline', 'Away_Team_Name']]
HOME_MAP.columns = ['TEAM', 'CLOSING_SPREAD', 'SPREAD_ODDS', 'CLOSING_TOTAL', 'TOTAL_ODDS', 'MONEYLINE', 'Opponent']
HOME_MAP['Venue'] = 'Home'
TODAY_MAP = pd.concat([AWAY_MAP, HOME_MAP], axis=0)
TODAY_MAP

Unnamed: 0,TEAM,CLOSING_SPREAD,SPREAD_ODDS,CLOSING_TOTAL,TOTAL_ODDS,MONEYLINE,Opponent,Venue
21,Ball State,+10,-112,o138,-110,+425,Kent State,Away
22,Towson,+6.5,-112,o139,-112,+240,Charleston,Away
23,VCU,-6,-108,o139,-112,-245,Loyola Chicago,Away
24,Duquesne,+5,-112,o132.5,-110,+164,Davidson,Away
25,Western Kentucky,-5,-112,o155,-112,-230,FIU,Away
...,...,...,...,...,...,...,...,...
134,Cal State Fullerton,+12.5,-110,u136,-110,+650,California-San Diego,Home
135,Bakersfield,-5,-115,u134.5,-112,-238,Long Beach State,Home
136,Portland,+18,-110,u138,-108,+1200,Saint Mary's,Home
137,UC Irvine,-9,-112,u142.5,-108,-520,UC Riverside,Home


In [49]:
today_teams_list = list(TODAY_MAP.TEAM.values)
TODAY_MAP = TODAY_MAP.reset_index(drop=True)

# Example cleaning function
def clean_team_name(name):
    if pd.isnull(name):
        return name
    name = name.lower().strip()
    name = name.replace('&', 'and')
    name = name.replace('-', ' ')
    # Remove extra spaces
    name = ' '.join(name.split())
    return name

def match_team(name, avg_teams, threshold=90):
    
    if pd.isnull(name):
        return None
    match, score, _ = process.extractOne(name, avg_teams)

    if score >= threshold:
        return match
    else:
        return None

def get_most_recent_rows(df, teams):
    most_recent_rows = []
    for team in teams[:5]:
        team_df = df['TEAM'].apply(lambda x: match_team(x, df))
        display(team_df)
        if not team_df.empty:
            most_recent_row = team_df.loc[team_df['DATE'].idxmax()]
            most_recent_rows.append(most_recent_row)
    return pd.DataFrame(most_recent_rows)

# Clean team names in both DataFrames
TODAY_MAP['TEAM'] = TODAY_MAP['TEAM'].apply(clean_team_name)
TODAY_MAP['Opponent'] = TODAY_MAP['Opponent'].apply(clean_team_name)
TODAY_MAP[['TEAM', 'Opponent', 'MONEYLINE', 'CLOSING_SPREAD', 'CLOSING_TOTAL']]

Unnamed: 0,TEAM,Opponent,MONEYLINE,CLOSING_SPREAD,CLOSING_TOTAL
0,ball state,kent state,+425,+10,o138
1,towson,charleston,+240,+6.5,o139
2,vcu,loyola chicago,-245,-6,o139
3,duquesne,davidson,+164,+5,o132.5
4,western kentucky,fiu,-230,-5,o155
...,...,...,...,...,...
229,cal state fullerton,california san diego,+650,+12.5,u136
230,bakersfield,long beach state,-238,-5,u134.5
231,portland,saint mary's,+1200,+18,u138
232,uc irvine,uc riverside,-520,-9,u142.5


### most recent rows from historical dataset
- ELO scores
- Momentum scores


In [56]:
from datetime import datetime
#today = datetime.now().strftime('%Y-%m-%d')
historical_data = pd.DataFrame()
i = 0
while historical_data.empty:
    yesterday = (datetime.now() - pd.DateOffset(i)).strftime('%Y-%m-%d')
    try:
        historical_data = pd.read_csv(f'2024_2025_ncaa_team_full_{yesterday}.csv')
        print(f'worked with date: {yesterday}')
    except FileNotFoundError:
        i += 1
        pass
#historical_data = pd.read_csv(f'2024_2025_nba_team_full_{yesterday}.csv')
most_recent_historical = historical_data #get_most_recent_rows(historical_data, today_teams_list)
#print(historical_data[['TEAM', 'DATE']])
most_recent_historical[['TEAM', 'DATE', 'Elo_Rating', 'Momentum']].tail()

worked with date: 2025-01-04


Unnamed: 0,TEAM,DATE,Elo_Rating,Momentum
16747,Washington Huskies,2024-11-13,1511.743895,187.545806
16748,Oklahoma Sooners,2024-11-27,1530.069741,218.841208
16749,Providence Friars,2024-11-27,1499.837168,132.877127
16750,North Dakota Fighting Hawks,2024-12-13,1490.954503,1908.524429
16751,UTSA Roadrunners,2024-12-13,1502.631188,627.16399


### yesterday's data to update internal features
- running averages
- update ELO + momentum

In [57]:
yesterday_df = pd.read_excel('01-03-2025-cbb-season-team-feed.xlsx')

#yesterday_df = pd.read_excel(yesterday_data)

# replace all white spaces in column names with _
yesterday_df.columns = yesterday_df.columns.str.replace(' ', '_')

# replace \n in column names with _
yesterday_df.columns = yesterday_df.columns.str.replace('\n', '_')
yesterday_df.columns = yesterday_df.columns.str.replace('__', '_')

yesterday_df['DATE'] = pd.to_datetime(yesterday_df['DATE'])
most_recent_historical_date = historical_data['DATE'].max()

yesterday_df['Opponent'] = yesterday_df.groupby('GAME-ID')['TEAM'].shift(-1).fillna(yesterday_df.groupby('GAME-ID')['TEAM'].shift())

yesterday_df = yesterday_df.groupby('GAME-ID').apply(niu.assign_results)
yesterday_df = yesterday_df.sort_values('DATE')
yesterday_df = yesterday_df.set_index('DATE')
yesterday_df['two_week_totals'] = (
    yesterday_df['total_result']
    .rolling('14D')
    .mean()
)

yesterday_df['one_week_totals'] = (
    yesterday_df['total_result']
    .rolling('7D')
    .mean()
)
yesterday_df.reset_index(inplace=True)
just_yesterday_df = yesterday_df[yesterday_df['DATE'] >= most_recent_historical_date]



yesterday_df[['DATE', 'TEAM', 'Opponent', 'PTS', 'GAME-ID']].tail()

  yesterday_df = yesterday_df.groupby('GAME-ID').apply(niu.assign_results)


Unnamed: 0,DATE,TEAM,Opponent,PTS,GAME-ID
5555,2025-01-03,Georgetown Hoyas,Xavier Musketeers,69,401719085
5556,2025-01-03,Xavier Musketeers,Georgetown Hoyas,63,401719085
5557,2025-01-03,Fairfield Stags,Merrimack Warriors,54,401706192
5558,2025-01-03,Le Moyne Dolphins,Long Island University Sharks,62,401721160
5559,2025-01-03,Iona Gaels,Marist Red Foxes,65,401706195


### Use most recent historical data as starting point to update ELO / Momentum

In [58]:
# most recent elo ratings and momentum scores from historical data
elo_ratings = most_recent_historical.set_index('TEAM')['Elo_Rating'].to_dict()
variances = most_recent_historical.set_index('TEAM')['Elo_Var'].to_dict()
momentum_scores = most_recent_historical.set_index('TEAM')['Momentum'].to_dict()

# elo ratings is a dict of team names to elo ratings, and variances is a dict of team names to elo variances
# combine them into a single dict with team as key, mu as elo rating, and sigma as elo variance
team_strengths = {team: {'mu': elo_ratings[team], 'sigma2': variances[team]} for team in elo_ratings}

# Iterate over the new data to update Elo and Momentum
for index, row in just_yesterday_df.iterrows():
    team_elo, _, team_momentum, opp_elo, _, opp_momentum = niu.update_bayesian_elo_momentum(row, just_yesterday_df, team_strengths, momentum_scores)
    # Update the dictionaries with the new Elo and momentum values
    elo_ratings[row['TEAM']] = team_elo
    momentum_scores[row['TEAM']] = team_momentum
    elo_ratings[row['Opponent']] = opp_elo
    momentum_scores[row['Opponent']] = opp_momentum
    variances[row['TEAM']] = team_strengths[row['TEAM']]['sigma2']
    variances[row['Opponent']] = team_strengths[row['Opponent']]['sigma2']

# Output the updated Elo ratings and momentum scores
print("Updated Elo Ratings:", elo_ratings)
print("Updated Momentum Scores:", momentum_scores)


Updated Elo Ratings: {'Eastern Washington Eagles': nan, 'Montana Grizzlies': nan, 'Idaho Vandals': 1496.6569078576958, 'Montana State Bobcats': 1489.7109213706124, 'Idaho State Bengals': 1504.1205325932183, 'Northern Arizona Lumberjacks': 1500.3276090934046, 'Northern Colorado Bears': nan, 'Weber State Wildcats': nan, 'Jacksonville State Gamecocks': nan, 'UTEP Miners': nan, 'Liberty Flames': nan, 'New Mexico State Aggies': nan, 'Portland State Vikings': nan, 'Sacramento State Hornets': nan, 'UC Irvine Anteaters': 1506.0747592024711, 'UC Riverside Highlanders': nan, 'Cal State Fullerton Titans': nan, 'Long Beach State Beach': nan, 'UC Davis Aggies': nan, 'UC Santa Barbara Gauchos': nan, 'Cal State Bakersfield Roadrunners': nan, 'UC San Diego Tritons': 1510.9469215685183, 'Cal Poly Mustangs': 1496.9647612204524, 'Cal State Northridge Matadors': nan, "Hawai'i Rainbow Warriors": nan, 'Louisiana Tech Bulldogs': nan, 'Sam Houston Bearkats': nan, 'Western Kentucky Hilltoppers': nan, 'Florida 

In [62]:
# most recent elo ratings and momentum scores from historical data
off_ratings = most_recent_historical.set_index('TEAM')['Offensive_Rating'].to_dict()
def_ratings = most_recent_historical.set_index('TEAM')['Defensive_Rating'].to_dict()
off_variances = most_recent_historical.set_index('TEAM')['Offensive_Var'].to_dict()
def_variances = most_recent_historical.set_index('TEAM')['Defensive_Var'].to_dict()
momentum_scores = most_recent_historical.set_index('TEAM')['Momentum'].to_dict()

# elo ratings is a dict of team names to elo ratings, and variances is a dict of team names to elo variances
# combine them into a single dict with team as key, mu as elo rating, and sigma as elo variance
team_strengths2 = {team: {'offense': {'mu': off_ratings.get(team, 1500), 'sigma2': off_variances.get(team, 1500)}, 
                         'defense': {'mu': def_ratings.get(team, 1500), 'sigma2': def_variances.get(team, 1500)}} for team in elo_ratings}

# Iterate over the new data to update Elo and Momentum
for index, row in just_yesterday_df.iterrows():
    team_elo, team_elo_d, team_momentum, opp_elo, opp_elo_d, opp_momentum = niu.update_bayesian_off_def(row, just_yesterday_df, team_strengths2, momentum_scores)
    # Update the dictionaries with the new Elo and momentum values
    try:
        team_strengths2[row['TEAM']]['mu'] = team_elo['mu']
        team_strengths2[row['TEAM']]['mu'] = team_elo_d['mu']
        team_strengths2[row['TEAM']]['sigma2'] = team_elo['sigma2']
        momentum_scores[row['TEAM']] = team_momentum
        team_strengths2[row['Opponent']]['mu'] = opp_elo['mu']
        team_strengths2[row['Opponent']]['mu'] = opp_elo_d['mu']
        team_strengths2[row['Opponent']]['sigma2'] = opp_elo['sigma2']
        momentum_scores[row['Opponent']] = opp_momentum
    except Exception as e:
        print(e)
        pass
# Output the updated Elo ratings and momentum scores
print("Updated Offensive Ratings:", off_ratings)
print("Updated Defensive Ratings:", def_ratings)
print("Updated Momentum Scores:", momentum_scores)


Updated Offensive Ratings: {'Eastern Washington Eagles': 57.496162365915445, 'Montana Grizzlies': -99.87477903758784, 'Idaho Vandals': 31.827567051764472, 'Montana State Bobcats': 52.91307148176856, 'Idaho State Bengals': -135.67235248767702, 'Northern Arizona Lumberjacks': -19.252804482561004, 'Northern Colorado Bears': -2.425427499061552, 'Weber State Wildcats': 28.873112711168616, 'Jacksonville State Gamecocks': -40.70539377321851, 'UTEP Miners': -56.75265106746333, 'Liberty Flames': -1.0503407541751173, 'New Mexico State Aggies': -14.007935460111169, 'Portland State Vikings': -53.67154040937268, 'Sacramento State Hornets': -4.357641573048404, 'UC Irvine Anteaters': 59.67887674317232, 'UC Riverside Highlanders': -75.88718902043288, 'Cal State Fullerton Titans': -19.525797530026207, 'Long Beach State Beach': 84.14465139576214, 'UC Davis Aggies': -16.837762759559425, 'UC Santa Barbara Gauchos': 28.77975347543238, 'Cal State Bakersfield Roadrunners': -10.887262305507882, 'UC San Diego 

### Update TODAY_MAP with new elo/momentum (opps too)

In [65]:
# convert index to TEAM column in TODAY_MAP

TODAY_MAP['ELO_Rating'] = TODAY_MAP['TEAM'].map(elo_ratings)
TODAY_MAP['Offensive_Rating'] = TODAY_MAP['TEAM'].map(team_strengths2).apply(lambda x: x['offense']['mu'] if isinstance(x, dict) else None)
TODAY_MAP['Defensive_Rating'] = TODAY_MAP['TEAM'].map(team_strengths2).apply(lambda x: x['defense']['mu'] if isinstance(x, dict) else None)
TODAY_MAP['Offensive_Var'] = TODAY_MAP['TEAM'].map(team_strengths2).apply(lambda x: x['offense']['sigma2'] if isinstance(x, dict) else None)
TODAY_MAP['Defensive_Var'] = TODAY_MAP['TEAM'].map(team_strengths2).apply(lambda x: x['defense']['sigma2'] if isinstance(x, dict) else None)
TODAY_MAP['Opp_Offensive_Var'] = TODAY_MAP['Opponent'].map(team_strengths2).apply(lambda x: x['offense']['sigma2'] if isinstance(x, dict) else None)
TODAY_MAP['Opp_Defensive_Var'] = TODAY_MAP['Opponent'].map(team_strengths2).apply(lambda x: x['defense']['sigma2'] if isinstance(x, dict) else None)
TODAY_MAP['Momentum'] = TODAY_MAP['TEAM'].map(momentum_scores)
TODAY_MAP['Opp_Elo'] = TODAY_MAP['Opponent'].map(elo_ratings)
TODAY_MAP['Elo_Var'] = TODAY_MAP['TEAM'].map(variances) 
TODAY_MAP['Opp_Elo_Var'] = TODAY_MAP['Opponent'].map(variances)
TODAY_MAP['Opp_Offensive_Rating'] = TODAY_MAP['Opponent'].map(team_strengths2).apply(lambda x: x['offense']['mu'] if isinstance(x, dict) else None)
TODAY_MAP['Opp_Defensive_Rating'] = TODAY_MAP['Opponent'].map(team_strengths2).apply(lambda x: x['defense']['mu'] if isinstance(x, dict) else None)
#TODAY_MAP[['SPREAD_LINE_MOVEMENT_1', 'SPREAD_LINE_MOVEMENT_2', 'SPREAD_LINE_MOVEMENT_3']] = pd.DataFrame(TODAY_MAP['Spread_Movement'].to_list(), index=TODAY_MAP.index)
#TODAY_MAP[['TOTAL_LINE_MOVEMENT_1', 'TOTAL_LINE_MOVEMENT_2', 'TOTAL_LINE_MOVEMENT_3']] = pd.DataFrame(TODAY_MAP['Total_Movement'].to_list(), index=TODAY_MAP.index)
#TODAY_MAP[['OPP_SPREAD_MOVEMENT_1', 'OPP_SPREAD_MOVEMENT_2', 'OPP_SPREAD_MOVEMENT_3']] = TODAY_MAP['Opponent'].map(TODAY_MAP.set_index('TEAM')[['SPREAD_LINE_MOVEMENT_1', 'SPREAD_LINE_MOVEMENT_2', 'SPREAD_LINE_MOVEMENT_3']].to_dict('index')).apply(lambda x: pd.Series(x) if isinstance(x, dict) else pd.Series([None, None, None]))
#TODAY_MAP[['OPP_TOTAL_MOVEMENT_1', 'OPP_TOTAL_MOVEMENT_2', 'OPP_TOTAL_MOVEMENT_3']] = TODAY_MAP['Opponent'].map(TODAY_MAP.set_index('TEAM')[['TOTAL_LINE_MOVEMENT_1', 'TOTAL_LINE_MOVEMENT_2', 'TOTAL_LINE_MOVEMENT_3']].to_dict('index')).apply(lambda x: pd.Series(x) if isinstance(x, dict) else pd.Series([None, None, None]))

TODAY_MAP['Opp_Momentum'] = TODAY_MAP['Opponent'].map(momentum_scores)
TODAY_MAP[['TEAM', 'Opponent', 'Offensive_Rating', 'Defensive_Rating', 'MONEYLINE', 'CLOSING_SPREAD', 'CLOSING_TOTAL', 'ELO_Rating', 'Opp_Elo', 'Momentum', 'Opp_Momentum', ]]



Unnamed: 0,TEAM,Opponent,Offensive_Rating,Defensive_Rating,MONEYLINE,CLOSING_SPREAD,CLOSING_TOTAL,ELO_Rating,Opp_Elo,Momentum,Opp_Momentum
0,ball state,kent state,,,+425,+10,o138,,,,
1,towson,charleston,,,+240,+6.5,o139,,,,
2,vcu,loyola chicago,,,-245,-6,o139,,,,
3,duquesne,davidson,,,+164,+5,o132.5,,,,
4,western kentucky,fiu,,,-230,-5,o155,,,,
...,...,...,...,...,...,...,...,...,...,...,...
229,cal state fullerton,california san diego,,,+650,+12.5,u136,,,,
230,bakersfield,long beach state,,,-238,-5,u134.5,,,,
231,portland,saint mary's,,,+1200,+18,u138,,,,
232,uc irvine,uc riverside,,,-520,-9,u142.5,,,,


In [232]:
inf_features = list(total_model.feature_names_in_)
s_inf_features = list(spread_model.feature_names_in_)

### update rolling stats, collect features, and perform inference

In [233]:
# get all the rolling stats for today
most_recent_tdf = niu.get_rolling_stats(yesterday_df, today_teams_list)
today_features = TODAY_MAP[today_map_features + ['Offensive_Var', 'Defensive_Var', 'Opp_Offensive_Var', 'Opp_Defensive_Var']]

# merge most_recent_tdf with today_features on TEAM
infer_df = most_recent_tdf.merge(today_features, how='left', on='TEAM')
# convert categorical columns
infer_df['MAIN REF'] = infer_df['Referee'].astype('category')
infer_df['TEAM'] = infer_df['TEAM'].astype('category')
infer_df['CREW'] = infer_df['CREW'].astype('category')
infer_df['Opponent'] = infer_df['Opponent_y'].astype('category')
infer_df['TEAM_REST_DAYS'] = infer_df['TEAM_REST_DAYS'].astype('category')

# fix merged column names
infer_df['MONEYLINE'] = infer_df['MONEYLINE_y']
infer_df['VENUE'] = infer_df['Venue']
infer_df['CLOSING_SPREAD'] = infer_df['CLOSING_SPREAD_y']
infer_df['CLOSING_TOTAL'] = infer_df['CLOSING_TOTAL_y']
infer_df['Elo_Rating'] = infer_df['ELO_Rating']
# convert datatypes
infer_df['VENUE'] = (infer_df['VENUE'] == 'H')*1
infer_df[["MONEYLINE", "Last_ML_1", "Last_ML_2", "Last_ML_3"]] = (
        infer_df[["MONEYLINE", "Last_ML_1", "Last_ML_2", "Last_ML_3"]]
        .replace('even', '-100', regex=True)
        .fillna(0)
        .astype(int)
    )
temp_df = infer_df.dropna(subset=['TEAM', 'Opponent'])
temp_df[['TEAM', 'Opponent']].dropna()
# filter down to train cols
infer_df = infer_df[train_cols +['DATE', 'GAME-ID', 'Offensive_Var', 'Defensive_Var', 'Opp_Offensive_Var', 'Opp_Defensive_Var', 'one_week_totals']]
infer_df = infer_df[inf_features]
# s_infer_df = infer_df[s_inf_features]
# Get predictions from the ensemble models
#spread_probabilities = spread_model.predict_proba(s_infer_df)[:, 1]
total_probabilities = total_model.predict_proba(infer_df)[:, 1]
spread_probabilities = np.zeros(len(total_probabilities))
#ml_probabilities = ml_model.predict_proba(infer_df.drop(['DATE', 'GAME-ID', 'Momentum'], axis=1))[:, 1]
# dummy ml_probabilities the right shape
ml_probabilities = np.zeros(len(spread_probabilities))
# do totals

real_probabilities = {}
processed_games = set()
ps = {team: prob for team, prob in zip(temp_df['TEAM'], total_probabilities)}
for team, opp in zip(temp_df['TEAM'], temp_df['Opponent']):
    # Ensure we process each game only once
    game = tuple(sorted([team, opp]))
    if game not in processed_games:
        processed_games.add(game)
        # Sum probabilities of both teams and divide by 2
        prob_team = ps[team]
        prob_opp = ps[opp]
        average_prob = (prob_team + prob_opp) / 2
        # Assign the average probability to both teams
        real_probabilities[team] = average_prob
        real_probabilities[opp] = average_prob
    else:
        # If the game is already processed, assign the existing average probability
        average_prob = real_probabilities[team]
        
# Update the probabilities mapping with the averaged probabilities
ps = real_probabilities
spread_predictions = np.array([x > 0.5 for x in spread_probabilities])
ml_predictions = np.array([x > 0.5 for x in ml_probabilities])
total_predictions = np.array([x > 0.5 for x in total_probabilities])

infer_df['spread_prob'] = spread_probabilities
infer_df['ml_prob'] = ml_probabilities
infer_df['total_prob'] = total_probabilities

# get the results
today_results = infer_df[['TEAM', 'Opponent', 'MONEYLINE',
                        'CLOSING_SPREAD', 'CLOSING_TOTAL',
                        'spread_prob', 'ml_prob', 'total_prob']].dropna().reset_index(drop=True)

spread_ps = {team: prob for team, prob in zip(today_results['TEAM'].values, spread_probabilities)}
normed_spread_odds = {team: spread_ps[team]/(spread_ps[team] + spread_ps[opp]) for team, opp in zip(today_results['TEAM'], today_results['Opponent'])}
ml_ps = {team: prob for team, prob in zip(today_results['TEAM'].values, ml_probabilities)}
normed_ml_odds = {team: ml_ps[team]/(ml_ps[team] + ml_ps[opp]) for team, opp in zip(today_results['TEAM'], today_results['Opponent'])}
total_ps = ps #{team: prob for team, prob in zip(today_results['TEAM'].values, total_probabilities)}
normed_total_odds = ps #{team: total_ps[team]/(total_ps[team] + total_ps[opp]) for team, opp in zip(today_results['TEAM'], today_results['Opponent'])}

today_results['spread_prob_normed'] = today_results['TEAM'].map(normed_spread_odds)
today_results['ml_prob_normed'] = today_results['TEAM'].map(normed_ml_odds)
today_results['total_prob_normed'] = today_results['TEAM'].map(normed_total_odds)
today_results['total_prob'] = today_results['TEAM'].map(normed_total_odds)

In [234]:
# convert CLOSING_SPREAD to implied probability
today_results['CLOSING_SPREAD_LINE'] = -110
today_results['CLOSING_TOTAL_LINE'] = -110
today_results['spread_implied_prob'] = today_results['CLOSING_SPREAD_LINE'].apply(niu.odds_to_implied_prob)
today_results['total_implied_prob'] = today_results['CLOSING_TOTAL_LINE'].apply(niu.odds_to_implied_prob)
# add column to today_results for kelly criterion
today_results['spread_kelly'] = today_results.apply(lambda x: niu.kelly_criterion(150, x['spread_prob_normed'], -110, temper=0.5), axis=1)
today_results['total_kelly'] = today_results.apply(lambda x: niu.kelly_criterion(150, x['total_prob_normed'], -110, temper=0.5), axis=1)


### print spread predictions

In [235]:
print('NORMALIZED PREDICTED WINNERS\n')
display(today_results[(today_results['spread_prob_normed'] > 0.5) & (today_results['spread_prob'] > 0.5)] \
.sort_values('spread_prob', ascending=False)[['TEAM', 'Opponent','CLOSING_SPREAD',
                                                'spread_prob', 'spread_prob_normed', 'spread_kelly', 'spread_implied_prob']].drop_duplicates(subset=['TEAM']))

print('\nALL PREDICTED WINNERS\n')
today_results[(today_results['spread_prob_normed'] > 0.5)] \
.sort_values('spread_prob', ascending=False)[['TEAM', 'Opponent','CLOSING_SPREAD',
                                                'spread_prob', 'spread_prob_normed']].drop_duplicates(subset=['TEAM'])


NORMALIZED PREDICTED WINNERS



Unnamed: 0,TEAM,Opponent,CLOSING_SPREAD,spread_prob,spread_prob_normed,spread_kelly,spread_implied_prob



ALL PREDICTED WINNERS



Unnamed: 0,TEAM,Opponent,CLOSING_SPREAD,spread_prob,spread_prob_normed


### print ml predictions

In [236]:
print('NORMALIZED PREDICTED WINNERS\n')
display(today_results[(today_results['ml_prob_normed'] > 0.5) & (today_results['ml_prob'] > 0.5)] \
.sort_values('ml_prob', ascending=False)[['TEAM', 'Opponent','MONEYLINE',
                                                'ml_prob', 'ml_prob_normed']].drop_duplicates(subset=['TEAM']))

print('ALL PREDICTED WINNERS\n')
display(today_results[(today_results['ml_prob_normed'] > 0.5)] \
.sort_values('ml_prob', ascending=False)[['TEAM', 'Opponent','MONEYLINE',
                                                'ml_prob', 'ml_prob_normed']].drop_duplicates(subset=['TEAM']))


NORMALIZED PREDICTED WINNERS



Unnamed: 0,TEAM,Opponent,MONEYLINE,ml_prob,ml_prob_normed


ALL PREDICTED WINNERS



Unnamed: 0,TEAM,Opponent,MONEYLINE,ml_prob,ml_prob_normed


### print total predictions

In [237]:
import warnings
warnings.simplefilter(action='ignore')

print('PREDICTED OVERS')
over_df = today_results[(today_results['total_prob'] > 0.5)]
over_df["sorted_pair"] = over_df.apply(lambda x: tuple(sorted([x["TEAM"], x["Opponent"]])), axis=1)
over_df = over_df.drop_duplicates(subset="sorted_pair", keep="first")

total_frame = today_results.drop_duplicates(subset=['TEAM'])
display(over_df[(total_frame['total_prob'] > 0.5)] \
.sort_values('total_prob', ascending=False)[['TEAM', 'Opponent','CLOSING_TOTAL',
                                                'total_prob', 'total_kelly', 'total_prob_normed']])
under_df = total_frame[(total_frame['total_prob'] <= 0.5)]
under_df['total_prob'] = 1 - under_df['total_prob']
under_df["sorted_pair"] = under_df.apply(lambda x: tuple(sorted([x["TEAM"], x["Opponent"]])), axis=1)
under_df = under_df.drop_duplicates(subset="sorted_pair", keep="first")
under_df['total_kelly'] = under_df.apply(lambda x: niu.kelly_criterion(150, x['total_prob'], -110, temper=0.5), axis=1)
print('\nPREDICTED UNDERS\n')
display(under_df \
.sort_values('total_prob', ascending=False)[['TEAM', 'Opponent','CLOSING_TOTAL',
                                                'total_prob', 'total_kelly']])

PREDICTED OVERS


Unnamed: 0,TEAM,Opponent,CLOSING_TOTAL,total_prob,total_kelly,total_prob_normed
1,Minnesota,Detroit,217.5,0.989506,73.347243,0.989506
17,San Antonio,Denver,236.0,0.885697,56.997222,0.885697
4,Phoenix,Indiana,233.5,0.85334,51.900975,0.85334
0,New York,Chicago,233.5,0.747803,35.279018,0.747803
7,Utah,Miami,223.0,0.551107,4.299299,0.551107
5,LA Clippers,Atlanta,228.0,0.519139,-0.735559,0.519139
2,Golden State,Memphis,233.5,0.517572,-0.982479,0.517572



PREDICTED UNDERS



Unnamed: 0,TEAM,Opponent,CLOSING_TOTAL,total_prob,total_kelly
11,Philadelphia,Brooklyn,214.0,0.845544,50.673183
3,Portland,Milwaukee,228.5,0.554968,4.90753
