In [7]:
import pandas as pd
import numpy as np
import time
from datetime import datetime
import warnings
import duckdb
import os
from bs4 import BeautifulSoup
import requests
import unicodedata
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

year = 2025
now = str(datetime.now().date())
print(f"Today's date:", now)

cwd = os.path.abspath(os.getcwd()).replace("\\", "/")
if cwd.startswith("C:/Users/Rodolfo/"):
    RUN_LOCATION = "local"
else:
    RUN_LOCATION = "cloud"
    import cloudscrapper

print("Current working dir:", cwd)
print("RUN_LOCATION =", RUN_LOCATION)

Today's date: 2025-11-20
Current working dir: C:/Users/Rodolfo/Jupyter_files/FantasyBasketball/notebooks
RUN_LOCATION = local


In [8]:
def email(error, game_id):
    
    # Email details
    sender_email = "rodolfoe7157@gmail.com"
    receiver_email = "rodolfoe7157@gmail.com"
    password = "cqgu bfey cnyx sfue"  # See note below

    subject = "NBA gamelog_rtrvr error"
    body = f"game_id: {game_id}\nERROR: {error}"

    # Create message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'plain'))

    # Connect to Gmail SMTP server and send
    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender_email, password)
        server.send_message(msg)

    print("Email sent successfully!")

In [9]:
def remove_accents(text):
    if not isinstance(text, str):
        return text
    # Normalize to NFKD (decomposes characters)
    text = unicodedata.normalize('NFKD', text)
    # Encode to ASCII bytes, ignore non-ASCII characters, then decode back to str
    return text.encode('ascii', 'ignore').decode('ascii')

In [10]:
def retrieve_boxscore(con, soup, gm_type, date, home_team, away_team):
    
    target_ids = [f"box-{home_team}-{gm_type}-basic", f"box-{away_team}-{gm_type}-basic"]
    target_tables = [soup.find("table", {"id": tid}) for tid in target_ids]
    dfs = [pd.read_html(str(tbl), header=1)[0] for tbl in target_tables]
    
    home = dfs[0]
    home['Team'] = home_team
    home['Team_type'] = 'Home'
    home['Opp'] = away_team
    home['Opp_type'] = 'Away'

    away = dfs[1]
    away['Team'] = away_team
    away['Team_type'] = 'Away'
    away['Opp'] = home_team
    away['Opp_type'] = 'Home'
    
    df = pd.concat([home, away])
    df = df[~(df.Starters.isin(['Team Totals', 'Reserves']))].fillna(0).replace("Did Not Play", "DNP")\
            .replace("Did Not Dress", "DNP").replace("Not With Team", "DNP").replace("Player Suspended", "DNP").rename(columns={"Starters": "Player", "3P": "3PM"})
    df["Player"] = df["Player"].apply(lambda x: x.encode("latin1").decode("utf-8"))
    df['Player'] = df['Player'].apply(remove_accents)
    df['Date'] = pd.to_datetime(date, format="%Y%m%d")
    df['Active'] = np.where(df['MP'] == 'DNP', 0, 1)

    df['MP'] = np.where(df.MP == 'DNP', '0:0', df.MP)
    df['mins'] = df.MP.str.split(":").str[0]
    df['secs'] = df.MP.str.split(":").str[1]
    df = df.fillna({'mins': 0, 'secs': 0})
    df['MP'] = df.mins.astype(int) + round(df.secs.astype(int) / 60, 2)
    
    for col in df.columns.difference(['Date', 'Player', 'Team', 'Team_type', 'Opp', 'Opp_type']):
        df[col] = np.where(df[col] == 'DNP', 0, df[col])
        
    for col in ['MP', 'FG%', '3P%', 'FT%', 'GmSc']:
        df[col] = df[col].astype(float)

    for col in df.columns.difference(['Date', 'Player', 'MP', 'Team', 'Team_type', 'Opp', 'Opp_type', 
                                      'FG%', '3P%', 'FT%', 'GmSc', '+/-']):
        df[col] = df[col].astype(int)

    df_pos = pd.read_csv(f"../tables/{year}/plyr_pos_xref.csv")
    df = con.execute("""SELECT * FROM df LEFT JOIN df_pos ON df.Player = df_pos.Player AND df.Team = df_pos.Team""").fetchdf()
        
    df['game_id'] = f"{date}_{away_team}_{home_team}"
    df = df[['game_id', 'Date', 'Team', 'Team_type', 'Opp', 'Opp_type', 'Player', 'Pos', 'Active', 'MP', 
             'FG', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FT', 'FTA',
             'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
             'GmSc', '+/-']]
    
    return df

In [11]:
def fantasy_scoring(df):
    
    # Columns we care about
    stat_cols = ['PTS', 'TRB', 'AST', 'STL', 'BLK']

    # Count how many stats are >= 10 for each player
    df['double_count'] = (df[stat_cols] >= 10).sum(axis=1)

    # Determine double-double or triple-double
    df['DD'] = (df['double_count'] >= 2).astype(int)
    df['TD'] = (df['double_count'] >= 3).astype(int)
    
    df['40Pts'] = (df['PTS'] >= 40).astype(int)
    df['50Pts'] = (df['PTS'] >= 40).astype(int)
    
    df['Fpts'] = (df.PTS * 0.5) + (df.AST * 1) + (df.TRB * 1) + (df.STL * 2) + (df.BLK * 2) + (df.TOV * -1) + (df['3PM'] * 0.5)\
                   + (df.DD * 1) + (df.TD * 2) + (df['40Pts'] * 2) + (df['50Pts'] * 2)
    
    df = df.drop(columns=['double_count', '40Pts', '50Pts'])

    return df

In [12]:
def main():
    con = duckdb.connect(database=":memory:")

    df = pd.read_csv(f"../tables/{year}/nba_schedule.csv")
    df['Date'] = pd.to_datetime(df.Date).astype(str)
    filtrd_df = df[(df.rtrvd == 0) & (df.Date < now)].reset_index(drop=True)
    display(filtrd_df)
    
    for row in range(filtrd_df.shape[0]):
        game = filtrd_df.loc[row]
        date = game.loc['Date'].replace("-", "")
        home_team = game.loc['HomeABV']
        away_team = game.loc['AwayABV']
        url = f"https://www.basketball-reference.com/boxscores/{date}0{home_team}.html"
        print(url)
        
        time.sleep(6)
        if RUN_LOCATION == 'local':
            html = requests.get(url).text
            soup = BeautifulSoup(html, "lxml")
        else:
            scraper = cloudscraper.create_scraper()
            html = scraper.get(url).text
        soup = BeautifulSoup(html, "lxml")
        
        for gm_type in ['game', 'q1', 'q2', 'q3', 'q4', 'h1', 'h2']:
            if gm_type == 'game':
                file_name = 'season_gamelogs.csv'
            else:
                file_name = f'{gm_type}_season_gamelogs.csv'
                
            try:
                print(f'{gm_type.capitalize()}_Log of {date}_{away_team}_{home_team}')
                df_gm = retrieve_boxscore(con, soup, gm_type, date, home_team, away_team)
                df_gm = fantasy_scoring(df_gm)
                
                try:
                    df_cnct = pd.read_csv(f"../tables/{year}/{file_name}")
                except:
                    df_cnct = pd.DataFrame()
                    
                df_cnct = pd.concat([df_cnct, df_gm])
                df_cnct['Date'] = pd.to_datetime(df_cnct['Date'], format='mixed', errors='coerce')
                df_cnct['Date'] = df_cnct['Date'].dt.strftime("%m/%d/%Y")
                df_cnct.to_csv(f"../tables/{year}/{file_name}", index=False)
                
                if gm_type == 'game':
                    display(df_gm)

            except Exception as e:
                game_id = f"{date}_{away_team}_{home_team}"
                email(e, game_id)
                raise Exception(e)
        
        df_save_indx = df[(df.Date == game.loc['Date']) & (df.AwayABV == away_team) & (df.HomeABV == home_team)].index
        df.loc[df_save_indx, 'rtrvd'] = 1
        df.to_csv(f"../tables/{year}/nba_schedule.csv", index=False)

    con.close()
    
main()

Unnamed: 0,Date,StartTime_ET,AwayTeam,AwayABV,HomeTeam,HomeABV,Arena,AwayB2B,HomeB2B,rtrvd
0,2025-11-19,7:00p,Houston Rockets,HOU,Cleveland Cavaliers,CLE,Rocket Arena,0,0,0
1,2025-11-19,7:00p,Charlotte Hornets,CHO,Indiana Pacers,IND,Gainbridge Fieldhouse,0,0,0
2,2025-11-19,7:00p,Toronto Raptors,TOR,Philadelphia 76ers,PHI,Xfinity Mobile Arena,0,0,0
3,2025-11-19,7:30p,Golden State Warriors,GSW,Miami Heat,MIA,Kaseya Center,1,0,0
4,2025-11-19,8:00p,Washington Wizards,WAS,Minnesota Timberwolves,MIN,Target Center,0,0,0
5,2025-11-19,8:00p,Denver Nuggets,DEN,New Orleans Pelicans,NOP,Smoothie King Center,0,0,0
6,2025-11-19,8:00p,Sacramento Kings,SAC,Oklahoma City Thunder,OKC,Paycom Center,0,0,0
7,2025-11-19,9:30p,New York Knicks,NYK,Dallas Mavericks,DAL,American Airlines Center,0,0,0
8,2025-11-19,10:00p,Chicago Bulls,CHI,Portland Trail Blazers,POR,Moda Center,0,1,0


https://www.basketball-reference.com/boxscores/202511190CLE.html
Game_Log of 20251119_HOU_CLE


Unnamed: 0,game_id,Date,Team,Team_type,Opp,Opp_type,Player,Pos,Active,MP,FG,FGA,FG%,3PM,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-,DD,TD,Fpts
0,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Craig Porter Jr.,PG,1,25.78,4,6,0.667,1,2,0.5,0,0,0.0,1,6,7,5,0,2,1,2,9,12.0,-3,0,0,20.0
1,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Evan Mobley,PF,1,35.68,7,13,0.538,0,3,0.0,4,10,0.4,1,5,6,0,0,0,3,2,18,7.7,-10,0,0,12.0
2,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Dean Wade,PF,1,26.72,1,5,0.2,1,4,0.25,1,4,0.25,1,6,7,3,0,2,0,3,4,4.5,0,0,0,16.5
3,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Donovan Mitchell,SG,1,34.1,8,17,0.471,3,9,0.333,2,2,1.0,1,2,3,2,2,0,4,2,21,12.2,-18,0,0,17.0
4,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Jarrett Allen,C,1,28.27,4,9,0.444,0,0,0.0,0,0,0.0,2,5,7,3,1,0,1,3,8,7.1,-9,0,0,15.0
5,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Tyrese Proctor,PG,1,10.38,0,1,0.0,0,0,0.0,2,2,1.0,0,1,1,3,0,0,0,0,2,3.7,-3,0,0,5.0
6,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Larry Nance Jr.,PF,0,0.0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0
7,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,De'Andre Hunter,SF,1,30.53,6,11,0.545,5,8,0.625,8,8,1.0,2,2,4,2,0,0,3,3,25,18.9,-6,0,0,18.0
8,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Lonzo Ball,PG,1,25.0,2,8,0.25,2,8,0.25,1,2,0.5,0,2,2,2,1,1,0,1,7,5.1,1,0,0,12.5
9,20251119_HOU_CLE,2025-11-19,CLE,Home,HOU,Away,Thomas Bryant,C,1,3.88,0,1,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,1,0,-1.1,-1,0,0,0.0


Q1_Log of 20251119_HOU_CLE
Q2_Log of 20251119_HOU_CLE
Q3_Log of 20251119_HOU_CLE
Q4_Log of 20251119_HOU_CLE
H1_Log of 20251119_HOU_CLE
H2_Log of 20251119_HOU_CLE
https://www.basketball-reference.com/boxscores/202511190IND.html


KeyboardInterrupt: 