In [3]:
import pandas as pd
import numpy as np
import os
# import datetime and timedelta
from datetime import datetime, timedelta
import requests
import joblib
from utils import nba_inference_utils as niu
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time

import warnings
warnings.simplefilter(action='ignore')

DATA_ROOT = '../live_data'
MODEL_ROOT = '../models'

In [2]:
df = pd.read_csv(f'{DATA_ROOT}/nba_players_data_011925.csv')

features = [col for col in df.columns if col not in ['DATE', 'PLAYER_FULL_NAME','DR',
 'TOT',
 'PF',
 'ST',
 'TO',
 'BL', 'PTS', 'PTS_target', 'A_target', 'R_target']]

In [4]:
pts_model = joblib.load(f'{MODEL_ROOT}/pts_model.pkl')
a_model = joblib.load(f'{MODEL_ROOT}/a_model.pkl')
r_model = joblib.load(f'{MODEL_ROOT}/r_model.pkl')

In [None]:
yesterday_file = niu.download_current_players_data()
today_data = pd.read_excel(yesterday_file)
d6 = pd.read_excel(f'{DATA_ROOT}/NBA-2023-2024-Player-BoxScore-Dataset.xlsx')
infer_df = pd.concat([d6, today_data])
# replace all white spaces in column names with _
infer_df.columns = infer_df.columns.str.replace(' ', '_')

# replace \n in column names with _
infer_df.columns = infer_df.columns.str.replace('\n', '_')
infer_df.columns = infer_df.columns.str.replace('__', '_')

# more cleanup and data preprocessing
#df = df.dropna()
#df = df.drop_duplicates()
infer_df = infer_df.reset_index(drop=True)

# convert dates to datetime and sort by date
infer_df['DATE'] = pd.to_datetime(infer_df['DATE'])
infer_df['R'] = infer_df['OR'] + infer_df['DR']

./01-19-2025-nba-season-player-feed.xlsx
200


In [40]:
# Ensure the DataFrame is sorted by 'PLAYER-ID' and 'DATE' so we get the latest record
infer_df = infer_df.sort_values(by=['PLAYER-ID', 'DATE'])

# Filter to keep only the most recent row for each player
recent_infer_df = infer_df.loc[infer_df.groupby('PLAYER-ID')['DATE'].idxmax()].reset_index(drop=True)

# Group by GAME-ID, OPPONENT_TEAM, and POSITION to calculate the total points given up in each game
points_given_up = recent_infer_df.groupby(['GAME-ID', 'OPPONENT_TEAM', 'OWN_TEAM', 'POSITION'])['PTS'].sum().reset_index()
points_given_up.rename(columns={'PTS': 'PTS_Allowed'}, inplace=True)

# Sort by OPPONENT_TEAM and GAME-ID to prepare for rolling calculations
points_given_up.sort_values(by=['OPPONENT_TEAM', 'GAME-ID'], inplace=True)

# Calculate the rolling average of the last 3 games for each team and position
points_given_up['Rolling_Avg_Last_3'] = (
    points_given_up.groupby(['OPPONENT_TEAM', 'POSITION'])['PTS_Allowed']
    .transform(lambda x: x.rolling(window=5, min_periods=1).mean())
)

# Pivot the table to convert rows for each position into columns, using OWN_TEAM for the matchup
points_given_up_pivot = points_given_up.pivot(index=['GAME-ID', 'OWN_TEAM'], columns='POSITION', values=['Rolling_Avg_Last_3'])

# Flatten the column multi-index created by pivot for easier access
points_given_up_pivot.columns = [f'{stat}_{pos}' for stat, pos in points_given_up_pivot.columns]

# Reset the index to make GAME-ID and OWN_TEAM columns again
points_given_up_pivot.reset_index(inplace=True)

# Forward fill NaNs to ensure rolling averages are consistent
points_given_up_pivot.ffill(inplace=True)

# Display the pivoted result for verification
display(points_given_up_pivot.tail())


Unnamed: 0,GAME-ID,OWN_TEAM,Rolling_Avg_Last_3_C,Rolling_Avg_Last_3_C-F,Rolling_Avg_Last_3_F,Rolling_Avg_Last_3_F-C,Rolling_Avg_Last_3_F-G,Rolling_Avg_Last_3_G,Rolling_Avg_Last_3_G-F
277,52300111,Atlanta,1.5,2.0,21.75,29.0,2.0,29.6,8.0
278,52300131,Golden State,1.5,2.0,11.0,29.0,2.0,29.6,8.0
279,52300131,Sacramento,1.5,1.0,11.0,29.0,2.0,29.6,8.0
280,52300211,Sacramento,1.5,1.0,7.0,29.0,2.0,29.6,8.0
281,62400001,Milwaukee,1.5,1.0,7.0,29.0,2.0,16.4,8.0


In [41]:
# Select only the rolling average columns and the index columns for merging
rolling_avg_columns = ['GAME-ID', 'OWN_TEAM'] + [col for col in points_given_up_pivot.columns if 'Rolling_Avg_Last_3' in col]
rolling_avg_df = points_given_up_pivot[rolling_avg_columns]

# Merge with the recent inference DataFrame on GAME-ID and OWN_TEAM
infer_df = recent_infer_df.merge(rolling_avg_df, on=['GAME-ID', 'OWN_TEAM'], how='left')

# Filter the DataFrame to include only the desired subset of columns
infer_subset_cols = [
    'GAME-ID', 'PLAYER_FULL_NAME', 'PLAYER-ID', 'DATE', 'PTS', 'USAGE_RATE_(%)', 
    '3P', '3PA', 'FG', 'FGA', 'A', 'R', 'FT', 'FTA', 'OWN_TEAM', 'OPPONENT_TEAM', 
    'MIN', 'DR', 'TOT', 'PF', 'ST', 'TO', 'BL', 'VENUE_(R/H)', 'STARTER_(Y/N)', 'POSITION'
] + [col for col in infer_df.columns if 'Rolling_Avg_Last_3' in col]

infer_df = infer_df[infer_subset_cols]

# Display the resulting DataFrame for verification
display(infer_df.head())


Unnamed: 0,GAME-ID,PLAYER_FULL_NAME,PLAYER-ID,DATE,PTS,USAGE_RATE_(%),3P,3PA,FG,FGA,...,VENUE_(R/H),STARTER_(Y/N),POSITION,Rolling_Avg_Last_3_C,Rolling_Avg_Last_3_C-F,Rolling_Avg_Last_3_F,Rolling_Avg_Last_3_F-C,Rolling_Avg_Last_3_F-G,Rolling_Avg_Last_3_G,Rolling_Avg_Last_3_G-F
0,22400596,LeBron James,2544,2025-01-19,25,38.096026,2,6,9,20,...,,Y,F,6.5,4.0,13.5,12.5,25.0,11.666667,13.0
1,22400592,Chris Paul,101108,2025-01-19,10,9.90077,3,6,3,6,...,,Y,G,16.5,2.0,12.0,15.25,8.0,9.4,11.5
2,22400478,Kyle Lowry,200768,2025-01-04,5,5.321724,1,1,2,2,...,,N,G,0.0,28.0,13.333333,0.0,14.0,2.0,6.0
3,42300176,P.J. Tucker,200782,2024-05-03,8,18.105823,2,3,3,5,...,R,Y,F,15.5,4.0,5.5,29.0,10.666667,25.8,8.0
4,22400586,Kevin Durant,201142,2025-01-18,36,38.407776,2,8,13,27,...,,Y,F,13.5,6.0,16.0,4.0,31.0,15.0,18.0


In [42]:
# Ensure infer_df is sorted by player and date
infer_df = infer_df.sort_values(by=['PLAYER-ID', 'DATE'])

stats = ['PTS', 'USAGE_RATE_(%)', '3P', '3PA', 'FG', 'FGA', 'A', 'R', 'FT', 'FTA']
# Calculate rolling averages for the last 3 and 5 games
for stat in stats:  # Using the same 'stats' list as before
    infer_df[f'{stat}_3G_avg'] = infer_df.groupby('PLAYER-ID')[stat].transform(lambda x: x.rolling(3, min_periods=1).mean())
    infer_df[f'{stat}_5G_avg'] = infer_df.groupby('PLAYER-ID')[stat].transform(lambda x: x.rolling(5, min_periods=1).mean())


In [43]:

# Select feature columns for inference, excluding target-related columns
X_infer = infer_df[features]  # Reusing the `features` list from training setup
X_infer['OWN_TEAM'] = X_infer['OWN_TEAM'].astype('category')
X_infer['OPPONENT_TEAM'] = X_infer['OPPONENT_TEAM'].astype('category')
X_infer['VENUE_(R/H)'] = X_infer['VENUE_(R/H)'].astype('category')
X_infer['STARTER_(Y/N)'] = X_infer['STARTER_(Y/N)'].astype('category')
X_infer['POSITION'] = X_infer['POSITION'].astype('category')
# Run inference on each row

recent_infer_df['PTS_pred'] = pts_model.predict(X_infer.drop(['GAME-ID'], axis=1))
recent_infer_df['A_pred'] = a_model.predict(X_infer)
recent_infer_df['R_pred'] = r_model.predict(X_infer)


# Display predictions for all unique players
player_predictions = recent_infer_df[['GAME-ID', 'PLAYER-ID', 'PLAYER_FULL_NAME', 'OWN_TEAM', 'OPPONENT_TEAM', 'PTS_pred', 'A_pred', 'R_pred']]



In [44]:
today = datetime.today().strftime('%Y%m%d')
response = requests.get(f'http://stats.nba.com/js/data/leaders/00_daily_lineups_{today}.json')

In [45]:
starters = []
for game in response.json()['games']:
    starters += [p['playerName'] for p in game['homeTeam']['players']]
    starters += [p['playerName'] for p in game['awayTeam']['players']]

len(starters)

106

In [46]:
today_preds = player_predictions[player_predictions['PLAYER_FULL_NAME'].isin(starters)].sort_values(by=['PTS_pred'], ascending=False)

In [47]:


# Set up Chrome options for Selenium
options = Options()
options.add_argument('--headless')  # Run in headless mode to avoid opening the browser window
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Initialize the Selenium WebDriver
driver = webdriver.Chrome(options=options)

# Open the target website
url = 'https://www.scoresandodds.com/nba/props'  # Replace with the actual URL
driver.get(url)

# Wait for the elements to load
time.sleep(5)  # Adjust as needed based on page load time

# Scrape player prop lines
player_data = []

# Locate the prop list by finding all relevant 'li' elements with class 'border'
prop_elements = driver.find_elements(By.CSS_SELECTOR, 'ul.table-list > li.border')

# Loop through each prop element and extract relevant data
for prop in prop_elements:
    player_name = prop.get_attribute('data-name')
    projected_score = prop.get_attribute('data-proj')
    delta = prop.get_attribute('data-delta')
    
    # Fetch odds details for each over/under entry
    odds_entries = prop.find_elements(By.CLASS_NAME, 'best-odds-container')
    
    for odds in odds_entries:
        # Extract over/under line and the associated odds
        line_element = odds.find_element(By.CLASS_NAME, 'data-moneyline')
        odds_element = odds.find_element(By.CLASS_NAME, 'data-odds')
        
        line_text = line_element.text
        odds_text = odds_element.text
        
        # Determine if it's an over or under line based on the line text
        over_under = 'Over' if 'o' in line_text else 'Under'
        line_value = line_text[1:]  # Strip out 'o' or 'u' prefix
        
        # Append to the data list
        player_data.append({
            'Player': player_name,
            'Projected_Score': projected_score,
            'Delta': delta,
            'Line_Type': over_under,
            'Line_Value': line_value,
            'Odds': odds_text
        })

# Close the driver
driver.quit()

# Convert to DataFrame for easier analysis
df = pd.DataFrame(player_data)

# remove duplicate rows based on Player
df = df.drop_duplicates(subset=['Player'])

# Display the resulting DataFrame
df.tail()


Unnamed: 0,Player,Projected_Score,Delta,Line_Type,Line_Value,Odds
196,al horford,6.55,1.05,Over,5.5,-130
198,isaiah stewart,6.03,0.53,Over,5.5,-109
200,sam hauser,6.53,1.03,Over,5.5,-110
202,maxi kleber,2.84,-1.66,Over,4.5,even
204,nicolas batum,3.49,-0.01,Over,3.5,+130


In [48]:
# Assuming `df` is the scraped DataFrame and `inference_df` is the output from your inference

# Convert player names to lowercase for both DataFrames
df['Player'] = df['Player'].str.lower()
player_predictions['PLAYER_FULL_NAME'] = player_predictions['PLAYER_FULL_NAME'].str.lower()

# Merge on the player name column
merged_df = player_predictions.merge(df, left_on='PLAYER_FULL_NAME', right_on='Player', how='left')

# Drop duplicate 'Player' column if necessary
merged_df.drop(columns=['Player'], inplace=True)

# Display the merged DataFrame
#display(merged_df.dropna().sort_values(by=['PTS_pred'], ascending=False).head(5))

#create delta column between PTS_pred and Line_Value
merged_df['Line_Value'] = merged_df['Line_Value'].astype(float)
merged_df['Delta'] = merged_df['PTS_pred'] - merged_df['Line_Value']
merged_df['Delta2'] = merged_df['Projected_Score'].astype(float) - merged_df['Line_Value']

#display_df = merged_df[(merged_df['Delta'] > 0) & (merged_df['Projected_Score'].astype(float) > merged_df['Line_Value'].astype(float))]
#display_df['mean_proj'] = (display_df['Projected_Score'].astype(float) + display_df['PTS_pred'])/2
#display_df.sort_values(by=['OWN_TEAM', 'Delta'], ascending=False)[['GAME-ID', 'PLAYER-ID', 'PLAYER_FULL_NAME', 'OWN_TEAM', 'PTS_pred', 'Projected_Score', 'mean_proj', 'Line_Value', 'Delta', 'Odds']]  



In [49]:
player_cols = ['GAME-ID', 'PLAYER-ID', 'PLAYER_FULL_NAME', 'OWN_TEAM', 'PTS_pred', 'Projected_Score', 'Line_Value', 'Delta', 'Delta2']

for team in merged_df.OWN_TEAM.unique():
    if merged_df[merged_df['OWN_TEAM'] == team].dropna().shape[0] > 0:
        plays = merged_df[(merged_df['OWN_TEAM'] == team) &
                           (merged_df['Delta'] > 0) &
                             (merged_df['Delta2'] > 0) &
                               (((merged_df['Line_Value'] >= 11) & (merged_df['Line_Value'] <= 15)) |
                                ((merged_df['Line_Value'] >= 16) & (merged_df['Line_Value'] <= 20)) |
                                ((merged_df['Line_Value'] >= 21) & (merged_df['Line_Value'] <= 25)) |
                                ((merged_df['Line_Value'] >= 26) & (merged_df['Line_Value'] <= 30)))]
        
        if plays.shape[0] > 0:
          print(team)
          display(plays.sort_values(by=['Delta'], ascending=False)[player_cols].dropna())
          print()
        else:
          print(f'No plays for {team}')
          print()

LA Clippers


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
19,22400596,201935,james harden,LA Clippers,22.639833,25.44,21.5,1.139833,3.94



Phoenix


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
109,22400586,1626164,devin booker,Phoenix,30.2264,27.89,26.5,3.7264,1.39



Boston


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
173,22400587,1628401,derrick white,Boston,20.130219,14.16,13.5,6.630219,0.66
100,22400577,204001,kristaps porzingis,Boston,17.758562,19.5,17.5,0.258562,2.0



Minnesota


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
71,22400590,203497,rudy gobert,Minnesota,13.725427,11.89,11.5,2.225427,0.39
318,22400590,1630183,jaden mcdaniels,Minnesota,12.33332,12.14,11.5,0.83332,0.64



Houston


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
536,22400576,1641708,amen thompson,Houston,16.832413,14.91,14.5,2.332413,0.41



Memphis


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
339,22400582,1630217,desmond bane,Memphis,20.806425,17.68,17.5,3.306425,0.18



No plays for Golden State

Charlotte


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
463,22400584,1631109,mark williams,Charlotte,19.075855,18.6,16.5,2.575855,2.1



No plays for Utah

Atlanta


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
441,22400587,1630700,dyson daniels,Atlanta,17.661322,12.41,11.5,6.161322,0.91



Dallas


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
226,22400583,1629023,p.j. washington,Dallas,13.764725,15.23,13.5,0.264725,1.73



Cleveland


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
262,22400590,1629636,darius garland,Cleveland,24.790741,22.79,21.5,3.290741,1.29



Chicago


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
77,22400597,203897,zach lavine,Chicago,26.373491,24.17,23.5,2.873491,0.67
42,22400597,202696,nikola vucevic,Chicago,18.527771,18.62,18.5,0.027771,0.12



Detroit


Unnamed: 0,GAME-ID,PLAYER-ID,PLAYER_FULL_NAME,OWN_TEAM,PTS_pred,Projected_Score,Line_Value,Delta,Delta2
43,22400586,202699,tobias harris,Detroit,16.21908,13.14,12.5,3.71908,0.64



No plays for New York

No plays for New Orleans

