In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
import duckdb
import os

cwd = os.path.abspath(os.getcwd()).replace("\\", "/")
if cwd.startswith("C:/Users/Rodolfo/"):
    RUN_LOCATION = "local"
else:
    RUN_LOCATION = "cloud"
time_offset = {"local": 3, "cloud": -5}

print("Current working dir:", cwd)
print("RUN_LOCATION =", RUN_LOCATION)

Current working dir: C:/Users/Rodolfo/Jupyter_files/FantasyBasketball/notebooks
RUN_LOCATION = local


In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
warnings.filterwarnings("ignore")

now = str((datetime.now() + timedelta(hours=time_offset[RUN_LOCATION]) + timedelta(hours=-3)).date())
print(f"Today's date:", now)

con = duckdb.connect(database=":memory:")
categories = ['PTS', 'AST', 'REB', 'PR', 'PA', 'RA', 'PRA', 'TPM', 'STL', 'BLK', 'STL_BLK']

folders = os.listdir('../tables/')
df_gmlog = pd.DataFrame()
for yr in folders:
    df_temp = pd.read_csv(f"../tables/{yr}/season_gamelogs.csv")
    df_temp.insert(0, 'Season', int(yr))
    df_gmlog = pd.concat([df_gmlog, df_temp])
df_gmlog['Date'] = pd.to_datetime(df_gmlog.Date)
df_gmlog = df_gmlog.rename(columns={"TRB": "REB", "3PM": "TPM", "3PA": "TPA"})
df_gmlog['STL_BLK'] = df_gmlog.STL + df_gmlog.BLK
df_gmlog['PR'] = df_gmlog.PTS + df_gmlog.REB 
df_gmlog['PA'] = df_gmlog.PTS + df_gmlog.AST
df_gmlog['RA'] = df_gmlog.REB + df_gmlog.AST
df_gmlog['PRA'] = df_gmlog.PTS + df_gmlog.REB + df_gmlog.AST

Today's date: 2026-01-20


In [3]:
%run ./common_utils.ipynb

In [4]:
df_gms = pd.read_csv(f"../tables/{YEAR}/nba_schedule.csv")
df_gms['Date'] = pd.to_datetime(df_gms.Date)
df_gms_td = df_gms[(df_gms.Date == now)]
df_gms_td['gm_id'] = df_gms_td.AwayABV + "_" + df_gms_td.HomeABV
df_gms_td['gm_id2'] = df_gms_td.HomeABV + "_" + df_gms_td.AwayABV
gms_today = df_gms_td.gm_id.tolist() + df_gms_td.gm_id2.tolist()
display(df_gms_td.drop(['gm_id', 'gm_id2'], axis=1))

Unnamed: 0,Date,StartTime_ET,AwayTeam,AwayABV,AwayPTS,HomeTeam,HomeABV,HomePTS,is_OT,Arena,AwayB2B,HomeB2B,cup_gm,pstszn_gm,rtrvd
642,2026-01-20,7:00p,Phoenix Suns,PHO,,Philadelphia 76ers,PHI,,0,Xfinity Mobile Arena,1,1,0,0,0
643,2026-01-20,8:00p,Los Angeles Clippers,LAC,,Chicago Bulls,CHI,,0,United Center,1,0,0,0,0
644,2026-01-20,8:00p,San Antonio Spurs,SAS,,Houston Rockets,HOU,,0,Toyota Center,1,0,0,0,0
645,2026-01-20,9:00p,Minnesota Timberwolves,MIN,,Utah Jazz,UTA,,0,Delta Center,0,1,0,0,0
646,2026-01-20,10:00p,Los Angeles Lakers,LAL,,Denver Nuggets,DEN,,0,Ball Arena,0,0,0,0,0
647,2026-01-20,10:00p,Toronto Raptors,TOR,,Golden State Warriors,GSW,,0,Chase Center,0,1,0,0,0
648,2026-01-20,10:00p,Miami Heat,MIA,,Sacramento Kings,SAC,,0,Golden 1 Center,1,0,0,0,0


In [5]:
def gather_stats(YEAR, stat, pos, df_gmlog, df_gms, now):
    print(pos, '- position')
    
    df_gms = df_gms[(df_gms.Date == now)]
    gms_today = df_gms_td.gm_id.tolist() + df_gms_td.gm_id2.tolist()
    df_mtch = df_gms[['AwayABV', 'HomeABV', 'AwayB2B', 'HomeB2B']]
    df_mtch = df_mtch.rename(columns={"AwayABV": "Team", "HomeABV": "Opp", "AwayB2B": "B2B"})[['Team', 'Opp', 'B2B']]
    df_mtch2 = df_mtch.copy().rename(columns={"Team": "Opp", "Opp": "Team", "HomeB2B": "B2B"})[['Team', 'Opp', 'B2B']]
    df_mtch = pd.concat([df_mtch, df_mtch2])    
        
    df = df_gmlog[(df_gmlog.Active == 1) & (df_gmlog.Season == YEAR) & (df_gmlog.Pos == pos)].copy().sort_values(['Player', 'Date'])
    final_cols = ['Team', 'B2B', 'Player', 'Pos', 'Opp']

    # Offensive Player derivations
    df[f'Off_{stat}'] = (
        df.groupby(['Player'])[stat]
          .transform('mean')
    )
    final_cols.append(f'Off_{stat}')

    for N in [3, 5, 10]:
        df[f'Off_L{N}_{stat}'] = (
            df.groupby('Player')[stat]
              .transform(lambda x: x.rolling(window=N, min_periods=1).mean())
        )
        final_cols.append(f'Off_L{N}_{stat}')
        
    df = df.groupby(['Player']).last().reset_index()
    
    df_rk = df_gmlog[(df_gmlog.Active == 1) & (df_gmlog.Season == YEAR) & (df_gmlog.Pos == pos)].copy().sort_values(['Player', 'Date'])
    df_rk = con.execute(f"""SELECT Team, Player, AVG({stat}) as {stat} FROM df_rk GROUP BY Team, Player ORDER BY {stat} DESC""").fetchdf()
    df_rk[f'Off_{stat}_Rk'] = df_rk[stat].rank(method='dense', ascending=False).astype(int)
    df = df.merge(df_rk[['Team', 'Player', f'Off_{stat}_Rk']], on=['Team', 'Player'])
    final_cols.append(f'Off_{stat}_Rk')
    
    # Defensive Opponents derivations
    df2 = df_gmlog[(df_gmlog.Active == 1) & (df_gmlog.Season == YEAR) & (df_gmlog.Pos == pos) & (df_gmlog.MP >= 18)]\
                .sort_values(['Opp', 'Date'])
    df2 = con.execute(f"""SELECT Date, Opp, SUM({stat}) as {stat} FROM df2 GROUP BY Date, Opp ORDER BY Opp, Date""").fetchdf()

    df2[f'Def_{stat}'] = (
        df2.groupby(['Opp'])[stat]
          .transform('mean')
    )
    final_cols.append(f'Def_{stat}')
    
    for N in [3, 5, 10]:
        df2[f'Def_L{N}_{stat}'] = (
            df2.groupby(['Opp'])[stat]
              .transform(lambda x: x.rolling(window=N, min_periods=1).mean())
        )
        final_cols.append(f'Def_L{N}_{stat}')
    df2 = df2.groupby(['Opp']).last().reset_index()
    
    df2_rk = df_gmlog[(df_gmlog.Active == 1) & (df_gmlog.Season == YEAR) & (df_gmlog.Pos == pos) & (df_gmlog.MP >= 18)]\
                .sort_values(['Opp', 'Date'])
    df2_rk = con.execute(f"""SELECT Opp, AVG(game_total) AS {stat} FROM (
                                SELECT Date, Opp, SUM({stat}) AS game_total
                                FROM df2_rk
                                GROUP BY Date, Opp) 
                            GROUP BY Opp
                            ORDER BY {stat}""").fetchdf()
    df2_rk[f'Def_{stat}_Rk'] = df2_rk[stat].rank(method='dense', ascending=True).astype(int)
    df2 = df2.merge(df2_rk[['Opp', f'Def_{stat}_Rk']], on=['Opp'])
    final_cols.append(f'Def_{stat}_Rk')

    df3 = df[['Team', 'Player', 'Pos', f'Off_{stat}', f'Off_L3_{stat}', f'Off_L5_{stat}', f'Off_L10_{stat}', f'Off_{stat}_Rk']].merge(df_mtch, on='Team')
    df3 = df3.merge(df2[['Opp', f'Def_{stat}', f'Def_L3_{stat}', f'Def_L5_{stat}', f'Def_L10_{stat}', f'Def_{stat}_Rk']], on='Opp', how='left')
    
    df3['Rk_Diff'] = df3[f'Def_{stat}_Rk'] - df3[f'Off_{stat}_Rk']
    df3 = df3.sort_values('Rk_Diff', ascending=False)
    
    df_injuries = pd.read_csv(f"../tables/{YEAR}/injuries.csv")
    df_injuries['Date'] = pd.to_datetime(df_injuries.Date)
    df_injuries = df_injuries[df_injuries.Date == now]
    df3 = df3.merge(df_injuries[['Team', 'Player', 'Status']], on=['Team', 'Player'], how='left')
    df3 = df3[df3.Status != 'Out'][final_cols]
    
    return df3

In [7]:
def main(YEAR, df_gmlog, now, reload=False):

    if reload == True:
        df_gmlog = df_gmlog[df_gmlog.Date <= now]
    
    df_gms = pd.read_csv(f"../tables/{YEAR}/nba_schedule.csv")
    df_gms['Date'] = pd.to_datetime(df_gms.Date)
    df_stats = pd.DataFrame()
    for stat in categories:
        print(f"==={stat}===")
        for position in ['PG', 'SG', 'SF', 'PF', 'C']:
            df_temp = gather_stats(YEAR, stat, position, df_gmlog, df_gms, now)
            df_stats = pd.concat([df_stats, df_temp], ignore_index=True)

    df_save = pd.DataFrame()
    for player in df_stats.Player.unique():
        df_temp = df_stats[(df_stats.Player == player)]
        for col in df_temp.columns.difference(['Date', 'Team', 'Player']):
            df_temp[col] = df_temp[col].ffill()
            df_temp[col] = df_temp[col].backfill()
        df_temp = df_temp.drop_duplicates()
        df_save = pd.concat([df_save, df_temp])
    print('Saving for Date:', now)
    df_save.insert(0, 'Date', pd.to_datetime(now))
    display(df_save)
    partition_save_df(df_save, f"../tables/{YEAR}/parlay_stats.csv")
main(YEAR, df_gmlog, now)

In [10]:
def reload_data(season):
    # NOTE: this will grab ALL games even after TODAY (fix code or stop loop manually)
    schd = pd.read_csv(f"../tables/{season}/nba_schedule.csv")
    schd['Date'] = pd.to_datetime(schd.Date)
#     schd = schd[(schd.Date <= '2025-10-31')]
    df_teams = pd.read_csv("../src/team_info_xref.csv")
    for date in schd.Date.unique().tolist():
        dt_str = date.strftime("%Y-%m-%d")
        print(dt_str)
        main(season, df_gmlog, dt_str, True)