In [1]:
import requests
from bs4 import BeautifulSoup
from nbainjuries import injury
import duckdb
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import os
import unicodedata
import warnings

pd.set_option('display.max_rows', None)
warnings.filterwarnings("ignore")

cwd = os.path.abspath(os.getcwd()).replace("\\", "/")
if cwd.startswith("C:/Users/Rodolfo/"):
    RUN_LOCATION = "local"
else:
    RUN_LOCATION = "cloud"
time_offset = {"local": 3, "cloud": -5}

print("Current working dir:", cwd)
print("RUN_LOCATION =", RUN_LOCATION)

Current working dir: C:/Users/Rodolfo/Jupyter_files/FantasyBasketball/notebooks
RUN_LOCATION = local


In [2]:
%run ./common_utils.ipynb

# Get Injuries Report

In [7]:
def get_injuries():
    con = duckdb.connect(database=":memory:")
    df_teams = pd.read_csv("../src/team_info_xref.csv")

    df_inj = injury.get_reportdata(datetime.now() + timedelta(hours=time_offset[RUN_LOCATION]), return_df=True)
    df_inj = df_inj.rename(columns={"Game Date": "Date", "Player Name": "Player", "Current Status": "Status"})
    df_inj['Player'] = df_inj.Player.str.split(",").str[1] + " " + df_inj.Player.str.split(",").str[0]
    df_inj['Player'] = df_inj['Player'].str.strip()
    df_inj['Date'] = pd.to_datetime(df_inj['Date'])
    df_inj = con.execute(f"""SELECT Date, ABV as Team, Player, Status, Reason FROM df_inj 
                             JOIN df_teams ON df_inj.Team = df_teams.Team
                             WHERE Date = '{str((datetime.now() + timedelta(hours=time_offset[RUN_LOCATION])).date())}' AND Status != 'Available'""").fetchdf()
    partition_save_df(df_inj, f"../tables/{YEAR}/injuries.csv")
    display(df_inj[df_inj.Status == 'Out'])
get_injuries()

Validated Injury-Report_2026-01-10_05_00PM.
../tables/2025/injuries.csv saved!


Unnamed: 0,Date,Team,Player,Status,Reason
0,2026-01-10,MIN,Mike Conley,Out,Rest
1,2026-01-10,MIN,Enrique Freeman,Out,G League - Two-Way
2,2026-01-10,MIN,Terrence Shannon Jr.,Out,Injury/Illness - Left Foot; Abductor Hallucis ...
3,2026-01-10,MIN,Rocco Zikarsky,Out,G League - Two-Way
4,2026-01-10,CLE,Chris Livingston,Out,G League - Two-Way
5,2026-01-10,CLE,Max Strus,Out,Injury/Illness - Left Foot; Surgery - Jones Fr...
6,2026-01-10,CLE,Luke Travers,Out,G League - Two-Way
7,2026-01-10,CLE,Dean Wade,Out,Injury/Illness - Left Knee; Contusion
8,2026-01-10,MIA,Vladislav Goldin,Out,G League - Two-Way
10,2026-01-10,MIA,Terry Rozier,Out,Not With Team


# Get Betting Odds

In [7]:
def get_game_odds():

    df_gms = pd.read_csv(f"../tables/{YEAR}/nba_schedule.csv")
    df_gms['Date'] = pd.to_datetime(df_gms.Date)
    df_gms_td = df_gms[(df_gms.Date == str((datetime.now() + timedelta(hours=time_offset[RUN_LOCATION])).date()))]
    df_gms_td['gm_id'] = df_gms_td.AwayABV + "_" + df_gms_td.HomeABV
    df_gms_td['gm_id2'] = df_gms_td.HomeABV + "_" + df_gms_td.AwayABV
    gms_today = df_gms_td.gm_id.tolist() + df_gms_td.gm_id2.tolist()    

    dk_tm_mapping = {
                    "ATL Hawks": "ATL", "BKN Nets": "BRK", "BOS Celtics": "BOS", "CHA Hornets": "CHO", "CHI Bulls": "CHI",
                    "CLE Cavaliers": "CLE", "DAL Mavericks": "DAL", "DEN Nuggets": "DEN", "DET Pistons": "DET", 
                    "GS Warriors": "GSW", "HOU Rockets": "HOU", "IND Pacers": "IND", "LA Clippers": "LAC", 
                    "LA Lakers": "LAL", "MEM Grizzlies": "MEM", "MIA Heat": "MIA", "MIL Bucks": "MIL", "MIN Timberwolves": "MIN",
                    "NO Pelicans": "NOP", "NY Knicks": "NYK", "OKC Thunder": "OKC", "ORL Magic": "ORL", "PHI 76ers": "PHI",
                    "PHO Suns": "PHO", "POR Trail Blazers": "POR", "SA Spurs": "SAS", "SAC Kings": "SAC", "TOR Raptors": "TOR",
                    "UTA Jazz": "UTA", "WAS Wizards": "WAS", "GSW Warriors": "GSW", "LAL Lakers": "LAL", "NOP Pelicans": "NOP",
                    "NYK Knicks": "NYK", "PHX Suns": "PHO", "SAS Spurs": "SAS"
                    }

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "*/*",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://sportsbook.draftkings.com/",
        "Origin": "https://sportsbook.draftkings.com"
    }
    response = requests.get(f"https://sportsbook-nash.draftkings.com/sites/US-SB/api/sportscontent/controldata/league/leagueSubcategory/v1/markets?isBatchable=false&templateVars=42648%2C4511&eventsQuery=%24filter%3DleagueId%20eq%20%2742648%27%20AND%20clientMetadata%2FSubcategories%2Fany%28s%3A%20s%2FId%20eq%20%274511%27%29&marketsQuery=%24filter%3DclientMetadata%2FsubCategoryId%20eq%20%274511%27%20AND%20tags%2Fall%28t%3A%20t%20ne%20%27SportcastBetBuilder%27%29&include=Events&entity=events", headers=headers)
    if response.status_code != 200:
        raise Exception('Bad Request')

    ids = []
    rows = []
    for i in range(len(response.json()['selections'])):
        if 'HC' in response.json()['selections'][i]['id'] or 'OU' in response.json()['selections'][i]['id']:
            ids.append(response.json()['selections'][i])
    for i in range(0, len(ids) - 1, 4):
        team1 = ids[i]['label']
        spread1 = ids[i]['points']
        team2 = ids[i+1]['label']
        spread2 = ids[i+1]['points']
        total = ids[i+2]['points']
        rows.append({"Team": team1, "Opp": team2, "Spread": spread1, "Total": total})
        rows.append({"Team": team2, "Opp": team1, "Spread": spread2, "Total": total})
    df_tm_bets = pd.DataFrame(rows)
    
    for key, value in dk_tm_mapping.items():
        df_tm_bets['Team'] = np.where(df_tm_bets.Team == key, value, df_tm_bets.Team)
        df_tm_bets['Opp'] = np.where(df_tm_bets.Opp == key, value, df_tm_bets.Opp)

    df_tm_bets['gm_id'] = df_tm_bets.Team + "_" + df_tm_bets.Opp
    df_tm_bets = df_tm_bets[(df_tm_bets.gm_id.isin(gms_today))].drop(['Opp', 'gm_id'], axis=1)
    
    return df_tm_bets

def get_sportsbook():
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "*/*",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://sportsbook.draftkings.com/",
        "Origin": "https://sportsbook.draftkings.com"
    }

    dk_cats = {"PTS": 12488, "AST": 12495, "REB": 12492, "STL": 13508, "BLK": 13780, "STL_BLK": 13781, "TPM": 12497, 
               "PA": 9973, "PR": 9976, "RA": 9974, "PRA": 5001}
    df_lines = pd.DataFrame()
    for key, value in dk_cats.items():
        response = requests.get(f"https://sportsbook-nash.draftkings.com/sites/US-SB/api/sportscontent/controldata/league/leagueSubcategory/v1/markets?isBatchable=false&templateVars=42648%2C{value}&eventsQuery=%24filter%3DleagueId%20eq%20%2742648%27%20AND%20clientMetadata%2FSubcategories%2Fany%28s%3A%20s%2FId%20eq%20%27{value}%27%29&marketsQuery=%24filter%3DclientMetadata%2FsubCategoryId%20eq%20%27{value}%27%20AND%20tags%2Fall%28t%3A%20t%20ne%20%27SportcastBetBuilder%27%29&include=Events&entity=events", headers=headers)
        if response.status_code != 200:
            raise Exception('Bad Request')

        plyr_names = []
        pnt_lines = []
        for i in response.json()['selections']:
            plyr_names.append(i['participants'][0]['name'])
            pnt_lines.append(i['points'])
        df_dk = pd.DataFrame({"Player": plyr_names, f"{key}_line": pnt_lines}).drop_duplicates().reset_index(drop=True)

        if df_lines.empty:
            df_lines = df_dk
        else:
            df_lines = pd.merge(df_lines, df_dk, on="Player", how="outer")

    df_lines = pd.merge(pd.read_csv(f"../tables/{YEAR}/plyr_pos_xref.csv"), df_lines, on='Player', how='right')
    df_lines = pd.merge(df_lines, get_game_odds(), on='Team')
    df_lines.insert(0, 'Date', pd.to_datetime(str((datetime.now() + timedelta(hours=time_offset[RUN_LOCATION])).date())))

    partition_save_df(df_lines, f"../tables/{YEAR}/parlay_lines.csv") 
    display(df_lines)
get_sportsbook()

../tables/2025/parlay_lines.csv saved!


Unnamed: 0,Date,Team,Player,Pos,PTS_line,AST_line,REB_line,STL_line,BLK_line,STL_BLK_line,TPM_line,PA_line,PR_line,RA_line,PRA_line,Spread,Total
0,2026-01-10,IND,Aaron Nesmith,SF,16.5,,4.5,,,,2.5,,22.5,,,6.5,235.5
1,2026-01-10,IND,Andrew Nembhard,PG,17.5,7.5,2.5,,,,1.5,25.5,20.5,10.5,28.5,6.5,235.5
2,2026-01-10,MIA,Andrew Wiggins,SF,14.5,2.5,4.5,1.5,,,1.5,16.5,19.5,7.5,21.5,-6.5,235.5
3,2026-01-10,BOS,Anfernee Simons,SG,13.5,2.5,,,,,2.5,15.5,15.5,4.5,18.5,1.5,230.5
4,2026-01-10,DET,Ausar Thompson,SF,12.5,2.5,5.5,1.5,,,,15.5,18.5,8.5,21.5,-3.5,214.5
5,2026-01-10,MIA,Bam Adebayo,C,16.5,2.5,9.5,,,,,20.5,26.5,12.5,29.5,-6.5,235.5
6,2026-01-10,IND,Ben Sheppard,SG,6.5,,,,,,1.5,,,,,6.5,235.5
7,2026-01-10,CHO,Brandon Miller,SF,20.5,3.5,4.5,,,,3.5,23.5,24.5,7.5,28.5,-5.5,238.5
8,2026-01-10,UTA,Brice Sensabaugh,SF,16.5,2.5,3.5,,,,2.5,18.5,20.5,6.5,22.5,5.5,238.5
9,2026-01-10,LAC,Brook Lopez,C,5.5,,3.5,,,,1.5,,9.5,,,3.5,214.5


# Get Starting Lineups

In [3]:
def read_page(date=""):
    if date != "":
        date = f"?date={date}"
    url = f"https://fantasydata.com/nba/starting-lineups{date}"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept-Language": "en-US,en;q=0.9"
    }

    resp = requests.get(url, headers=headers)
    resp.raise_for_status()

    html = resp.text
    
    return html

In [4]:
def extract_starters(team_div):
    """
    Extract starters from a team div (away/home),
    stopping at the Injuries section.
    """
    starters = []
    collecting = False

    for row in team_div.select(":scope > div.text-nowrap"):
        strong = row.find("strong")

        if strong:
            label = strong.get_text(strip=True)
            if label == "Starters":
                collecting = True
                continue
            if label == "Injuries":
                break

        if collecting:
            a = row.find("a")
            if not a:
                continue

            name = a.get_text(strip=True)
            href = a.get("href")

            # player ID is last number in URL
            player_id = href.rstrip("/").split("/")[-1]

            # position is plain text after name
            text = row.get_text(" ", strip=True)
            position = text.replace(name, "").strip()

            starters.append({
                "player_name": name,
                "player_id": player_id,
                "position": position
            })

    return starters

In [5]:
def parse_lineups(html):
    time.sleep(6)
    soup = BeautifulSoup(html, "lxml")
    games_data = []

    for game in soup.select("div.game"):
        # --- Game info ---
        info_div = game.select_one("div.header div.info > div")
        lines = list(info_div.stripped_strings)

        matchup = lines[0]
        game_time = lines[1]

        away_team, home_team = [t.strip() for t in matchup.split("@")]

        # --- Lineups ---
        away_div = game.select_one("div.away")
        home_div = game.select_one("div.home")

        away_starters = extract_starters(away_div)
        home_starters = extract_starters(home_div)

        for p in away_starters:
            games_data.append({
                "team_side": "away",
                "team": away_team,
                "opponent": home_team,
                "game_time": game_time,
                **p
            })

        for p in home_starters:
            games_data.append({
                "team_side": "home",
                "team": home_team,
                "opponent": away_team,
                "game_time": game_time,
                **p
            })

    return pd.DataFrame(games_data)

# Other functions

In [8]:
def email(error):
    
    # Email details
    sender_email = "rodolfoe7157@gmail.com"
    receiver_email = "rodolfoe7157@gmail.com"
    password = "cqgu bfey cnyx sfue"  # See note below

    subject = "NBA Starting Lineups error"
    body = f"ERROR: {error}"

    # Create message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'plain'))

    # Connect to Gmail SMTP server and send
    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender_email, password)
        server.send_message(msg)

    print("Email sent successfully!")

In [9]:
def remove_accents(text):
    if not isinstance(text, str):
        return text
    # Normalize to NFKD (decomposes characters)
    text = unicodedata.normalize('NFKD', text)
    # Encode to ASCII bytes, ignore non-ASCII characters, then decode back to str
    return text.encode('ascii', 'ignore').decode('ascii')

# Main

In [10]:
now = str(datetime.now().date() + timedelta(days=-1))
df_teams = pd.read_csv(f"../src/team_info_xref.csv")
df_teams['short_name'] = df_teams.Team.str.split(" ").str[-1]
df_teams['short_name'] = np.where(df_teams.ABV == 'POR', 'Trail Blazers', df_teams.short_name)
df = parse_lineups(read_page(now))
if df.shape[0] > 0:
    df = df.merge(df_teams, left_on=['team'], right_on=['short_name'])
    df = df[['ABV', 'player_name', 'position']].rename(columns={"ABV": "Team", "player_name": "Player", "position": "Pos"})
    df.insert(0, 'Date', pd.to_datetime(now))
    df['Player'] = df['Player'].apply(remove_accents)
    display(df)
    partition_save_df(df, f"../tables/{YEAR}/daily_lineups.csv")
else:
    email(f'Empty lineup for {now}')

Unnamed: 0,Date,Team,Player,Pos
0,2026-01-09,NOP,Jeremiah Fears,PG
1,2026-01-09,NOP,Bryce McGowens,SG
2,2026-01-09,NOP,Trey Murphy III,SF
3,2026-01-09,NOP,Zion Williamson,PF
4,2026-01-09,NOP,Derik Queen,C
5,2026-01-09,WAS,Tre Johnson,SG
6,2026-01-09,WAS,Bilal Coulibaly,SG
7,2026-01-09,WAS,Khris Middleton,SF
8,2026-01-09,WAS,Kyshawn George,SF
9,2026-01-09,WAS,Alexandre Sarr,PF


../tables/2025/daily_lineups.csv saved!


In [11]:
now = str(datetime.now().date())
df_teams = pd.read_csv(f"../src/team_info_xref.csv")
df_teams['short_name'] = df_teams.Team.str.split(" ").str[-1]
df_teams['short_name'] = np.where(df_teams.ABV == 'POR', 'Trail Blazers', df_teams.short_name)
df = parse_lineups(read_page())
if df.shape[0] > 0:
    df = df.merge(df_teams, left_on=['team'], right_on=['short_name'])
    df = df[['ABV', 'player_name', 'position']].rename(columns={"ABV": "Team", "player_name": "Player", "position": "Pos"})
    df.insert(0, 'Date', pd.to_datetime(now))
    df['Player'] = df['Player'].apply(remove_accents)
    display(df)
    partition_save_df(df, f"../tables/{YEAR}/daily_lineups.csv")
else:
    email(f'Empty lineup for {now}')

Unnamed: 0,Date,Team,Player,Pos
0,2026-01-10,MIN,Donte DiVincenzo,SG
1,2026-01-10,MIN,Anthony Edwards,SG
2,2026-01-10,MIN,Jaden McDaniels,PF
3,2026-01-10,MIN,Julius Randle,PF
4,2026-01-10,MIN,Rudy Gobert,C
5,2026-01-10,CLE,Darius Garland,PG
6,2026-01-10,CLE,Sam Merrill,SG
7,2026-01-10,CLE,Donovan Mitchell,SG
8,2026-01-10,CLE,Evan Mobley,PF
9,2026-01-10,CLE,Jarrett Allen,C


../tables/2025/daily_lineups.csv saved!
