In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from datetime import datetime
import time
import os
import unicodedata
import warnings

pd.set_option('display.max_rows', None)
warnings.filterwarnings("ignore")

cwd = os.path.abspath(os.getcwd()).replace("\\", "/")
if not cwd.startswith("C:/Users/Rodolfo/"):
    import cloudscraper

year = 2025
now = str(datetime.now().date())
print(f"Today's date:", now)

Today's date: 2026-01-08


In [2]:
%run ./common_utils.ipynb

# Obtain Lineups

In [3]:
def read_page(date=""):
    if date != "":
        date = f"?date={date}"
    url = f"https://fantasydata.com/nba/starting-lineups{date}"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept-Language": "en-US,en;q=0.9"
    }

    resp = requests.get(url, headers=headers)
    resp.raise_for_status()

    html = resp.text
    
    return html

In [4]:
def extract_starters(team_div):
    """
    Extract starters from a team div (away/home),
    stopping at the Injuries section.
    """
    starters = []
    collecting = False

    for row in team_div.select(":scope > div.text-nowrap"):
        strong = row.find("strong")

        if strong:
            label = strong.get_text(strip=True)
            if label == "Starters":
                collecting = True
                continue
            if label == "Injuries":
                break

        if collecting:
            a = row.find("a")
            if not a:
                continue

            name = a.get_text(strip=True)
            href = a.get("href")

            # player ID is last number in URL
            player_id = href.rstrip("/").split("/")[-1]

            # position is plain text after name
            text = row.get_text(" ", strip=True)
            position = text.replace(name, "").strip()

            starters.append({
                "player_name": name,
                "player_id": player_id,
                "position": position
            })

    return starters

In [5]:
def parse_lineups(html):
    time.sleep(6)
    soup = BeautifulSoup(html, "lxml")
    games_data = []

    for game in soup.select("div.game"):
        # --- Game info ---
        info_div = game.select_one("div.header div.info > div")
        lines = list(info_div.stripped_strings)

        matchup = lines[0]
        game_time = lines[1]

        away_team, home_team = [t.strip() for t in matchup.split("@")]

        # --- Lineups ---
        away_div = game.select_one("div.away")
        home_div = game.select_one("div.home")

        away_starters = extract_starters(away_div)
        home_starters = extract_starters(home_div)

        for p in away_starters:
            games_data.append({
                "team_side": "away",
                "team": away_team,
                "opponent": home_team,
                "game_time": game_time,
                **p
            })

        for p in home_starters:
            games_data.append({
                "team_side": "home",
                "team": home_team,
                "opponent": away_team,
                "game_time": game_time,
                **p
            })

    return pd.DataFrame(games_data)

# Other functions

In [6]:
def email(error):
    
    # Email details
    sender_email = "rodolfoe7157@gmail.com"
    receiver_email = "rodolfoe7157@gmail.com"
    password = "cqgu bfey cnyx sfue"  # See note below

    subject = "NBA Starting Lineups error"
    body = f"ERROR: {error}"

    # Create message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject
    msg.attach(MIMEText(body, 'plain'))

    # Connect to Gmail SMTP server and send
    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender_email, password)
        server.send_message(msg)

    print("Email sent successfully!")

In [7]:
def remove_accents(text):
    if not isinstance(text, str):
        return text
    # Normalize to NFKD (decomposes characters)
    text = unicodedata.normalize('NFKD', text)
    # Encode to ASCII bytes, ignore non-ASCII characters, then decode back to str
    return text.encode('ascii', 'ignore').decode('ascii')

# Main

In [11]:
df_teams = pd.read_csv(f"../src/team_info_xref.csv")
df_teams['short_name'] = df_teams.Team.str.split(" ").str[-1]
df_teams['short_name'] = np.where(df_teams.ABV == 'POR', 'Trail Blazers', df_teams.short_name)
df = parse_lineups(read_page())
if df.shape[0] > 0:
    df = df.merge(df_teams, left_on=['team'], right_on=['short_name'])
    df = df[['ABV', 'player_name', 'position']].rename(columns={"ABV": "Team", "player_name": "Player", "position": "Pos"})
    df.insert(0, 'Date', pd.to_datetime(now))
    df['Player'] = df['Player'].apply(remove_accents)
    display(df)
    partition_save_df(df, f"../tables/{year}/daily_lineups.csv")
else:
    email(f'Empty lineup for {now}')

Unnamed: 0,Date,Team,Player,Pos
0,2026-01-08,NOP,Jordan Poole,PG
1,2026-01-08,NOP,Jeremiah Fears,PG
2,2026-01-08,NOP,Trey Murphy III,SF
3,2026-01-08,NOP,Zion Williamson,PF
4,2026-01-08,NOP,Derik Queen,C
5,2026-01-08,WAS,Bub Carrington,PG
6,2026-01-08,WAS,Bilal Coulibaly,SG
7,2026-01-08,WAS,Khris Middleton,SF
8,2026-01-08,WAS,Kyshawn George,SF
9,2026-01-08,WAS,Alexandre Sarr,PF
