In [1]:
import time
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from datetime import timedelta, datetime
import pandas as pd

import sys
from pathlib import Path

# ajustar a tu ruta del proyecto
PROJECT_ROOT = Path(r"C:\Users\andre\Documents\GitHub\nba-data-analytics")
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
from src.Data.maper import EQUIPOS_ODDS

#def get_upcoming_games():
url = "https://www.oddsportal.com/basketball/usa/nba/"

options = Options()
#options.add_experimental_option("detach", True)
options.add_argument("--disable-search-engine-choice-screen")
#options.add_argument("--headless")
options.add_argument("--log-level=3")   # 0 = ALL, 3 = ERROR
options.add_experimental_option("excludeSwitches", ["enable-logging"])

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

driver.get(url)
driver.maximize_window()
time.sleep(3)
svg_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.ID, "onetrust-reject-all-handler"))
    #EC.element_to_be_clickable((By.CSS_SELECTOR, "div.overlay-bookie-modal svg.cursor-pointer"))
)
svg_button.click()
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Espera un poco para que se cargue el contenido dinámico
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
    
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
divs = soup.find_all('div', class_='eventRow flex w-full flex-col text-xs')
partidos = []
home_team = []
away_team = []
fechas = []
for div in divs:
    
    fecha_div = div.find('div', class_='text-black-main font-main w-full truncate text-xs font-normal leading-5')
    
    if fecha_div is not None:
        fecha_div = fecha_div.text
        fecha = fecha_div.split('-')[0].rstrip()
        if 'Today' in fecha:
            fecha = datetime.now().strftime("%d %b %Y")
        elif 'Yesterday' in fecha:
            fecha = (datetime.now() - timedelta(days=1)).strftime("%d %b %Y")
        elif 'Tomorrow' in fecha or 'tomorrow' in fecha:
            fecha = (datetime.now() + timedelta(days=1)).strftime("%d %b %Y")
    divs2 = div.find_all('a', class_='next-m:flex next-m:!mt-0 ml-2 mt-2 min-h-[32px] w-full hover:cursor-pointer')

    for div2 in divs2:
        
        partidos.append('https://www.oddsportal.com' + div2['href'])
        equipos = div2.find_all('p', class_='participant-name truncate')
        home_team.append(equipos[0].text)
        away_team.append(equipos[1].text)
        fechas.append(fecha)       
driver.quit()    

temp_data_df = pd.DataFrame()
temp_data_df['H_TEAM_NICKNAME'] = home_team
temp_data_df['A_TEAM_NICKNAME'] = away_team
temp_data_df['GAME_DATE'] = fechas

temp_data_df['H_TEAM_ID'] = temp_data_df['H_TEAM_NICKNAME'].map(EQUIPOS_ODDS).replace({float('nan'): None})
temp_data_df['A_TEAM_ID'] = temp_data_df['A_TEAM_NICKNAME'].map(EQUIPOS_ODDS).replace({float('nan'): None})

In [2]:
temp_data_df = temp_data_df.rename(columns={
    "H_TEAM_NICKNAME": "HOME_H_TEAM_NICKNAME",
    "A_TEAM_NICKNAME": "HOME_A_TEAM_NICKNAME",
    "GAME_DATE": "HOME_GAME_DATE",
    "H_TEAM_ID": "HOME_TEAM_ID",
    "A_TEAM_ID": "AWAY_TEAM_ID"
})
temp_data_df['HOME_GAME_DATE'] = temp_data_df['HOME_GAME_DATE'].str.strip()

# 2. Convierte a datetime
temp_data_df['HOME_GAME_DATE'] = pd.to_datetime(
    temp_data_df['HOME_GAME_DATE'],
    format='%d %b %Y',
    errors='raise'   # así te avisa si algo no encaja
)
temp_data_df['HOME_GAME_DATE'] = temp_data_df['HOME_GAME_DATE'].dt.strftime('%Y-%m-%d %H:%M:%S')
temp_data_df

Unnamed: 0,HOME_H_TEAM_NICKNAME,HOME_A_TEAM_NICKNAME,HOME_GAME_DATE,HOME_TEAM_ID,AWAY_TEAM_ID
0,New Orleans Pelicans,Atlanta Hawks,2025-11-23 00:00:00,1610612740,1610612737
1,Chicago Bulls,Washington Wizards,2025-11-23 00:00:00,1610612741,1610612764
2,Milwaukee Bucks,Detroit Pistons,2025-11-23 00:00:00,1610612749,1610612765
3,Dallas Mavericks,Memphis Grizzlies,2025-11-23 00:00:00,1610612742,1610612763
4,Denver Nuggets,Sacramento Kings,2025-11-23 00:00:00,1610612743,1610612758
5,Philadelphia 76ers,Miami Heat,2025-11-23 00:00:00,1610612755,1610612748
6,Atlanta Hawks,Charlotte Hornets,2025-11-24 00:00:00,1610612737,1610612766
7,Boston Celtics,Orlando Magic,2025-11-24 00:00:00,1610612738,1610612753
8,Cleveland Cavaliers,Los Angeles Clippers,2025-11-24 00:00:00,1610612739,1610612746
9,Toronto Raptors,Brooklyn Nets,2025-11-24 00:00:00,1610612761,1610612751


In [3]:
def add_dynamic_rankings(df):
    df = df.copy()
    # Ordena por temporada y fecha de partido
    df = df.sort_values(['SEASON', 'HOME_GAME_DATE'])
    # Calcula victorias acumuladas para cada equipo local y visitante
    df['HOME_W'] = df['HOME_W'].fillna(0).astype(int)
    df['AWAY_W'] = df['AWAY_W'].fillna(0).astype(int)
    df['HOME_WINS_CUM'] = df.groupby(['SEASON', 'HOME_TEAM_ID'])['HOME_W'].cumsum()
    df['AWAY_WINS_CUM'] = df.groupby(['SEASON', 'AWAY_TEAM_ID'])['AWAY_W'].cumsum() 

    df['HOME_GAMES_PLAYED'] = df.groupby(['SEASON', 'HOME_TEAM_ID']).cumcount() + 1
    df['AWAY_GAMES_PLAYED'] = df.groupby(['SEASON', 'AWAY_TEAM_ID']).cumcount() + 1
    
    # Función para calcular ranking tras cada partido
    def get_ranks(subdf):
        home_ranks = []
        away_ranks = []
        # Para cada partido en la temporada
        for idx, row in subdf.iterrows():
            # Ranking local
            home_table = subdf.loc[:idx].groupby('HOME_TEAM_ID')['HOME_WINS_CUM'].max()
            home_rank = home_table.rank(ascending=False, method='min')[row['HOME_TEAM_ID']]
            home_ranks.append(int(home_rank))
            # Ranking visitante
            away_table = subdf.loc[:idx].groupby('AWAY_TEAM_ID')['AWAY_WINS_CUM'].max()
            away_rank = away_table.rank(ascending=False, method='min')[row['AWAY_TEAM_ID']]
            away_ranks.append(int(away_rank))
        return pd.DataFrame({'HOME_RANK': home_ranks, 'AWAY_RANK': away_ranks}, index=subdf.index)
    
    # Aplica por temporada
    rankings = df.groupby('SEASON', group_keys=False).apply(get_ranks)
    df = pd.concat([df, rankings], axis=1)
    return df


In [4]:
def add_dynamic_rankings(df):
    df = df.copy()
    df = df.sort_values(['SEASON', 'HOME_GAME_DATE'])

    df['HOME_W'] = df['HOME_W'].fillna(0).astype(int)
    df['AWAY_W'] = df['AWAY_W'].fillna(0).astype(int)

    df['HOME_WINS_CUM'] = df.groupby(['SEASON', 'HOME_TEAM_ID'])['HOME_W'].cumsum()
    df['AWAY_WINS_CUM'] = df.groupby(['SEASON', 'AWAY_TEAM_ID'])['AWAY_W'].cumsum()

    df['HOME_GAMES_PLAYED'] = df.groupby(['SEASON', 'HOME_TEAM_ID']).cumcount()
    df['AWAY_GAMES_PLAYED'] = df.groupby(['SEASON', 'AWAY_TEAM_ID']).cumcount()

    def get_ranks(subdf):
        home_ranks = []
        away_ranks = []

        for i, (idx, row) in enumerate(subdf.iterrows()):
            # si es el primer partido de la temporada → nadie ha jugado aún
            if i == 0:
                home_ranks.append(None)
                away_ranks.append(None)
                continue

            # usar SOLO los partidos anteriores
            past_matches = subdf.iloc[:i]

            # tabla local basada SOLO en los anteriores
            home_table = past_matches.groupby('HOME_TEAM_ID')['HOME_WINS_CUM'].max()
            if row['HOME_TEAM_ID'] in home_table:
                home_rank = home_table.rank(ascending=False, method='min')[row['HOME_TEAM_ID']]
                home_ranks.append(int(home_rank))
            else:
                # si el equipo aún no ha jugado
                home_ranks.append(None)

            # tabla visitante basada SOLO en los anteriores
            away_table = past_matches.groupby('AWAY_TEAM_ID')['AWAY_WINS_CUM'].max()
            if row['AWAY_TEAM_ID'] in away_table:
                away_rank = away_table.rank(ascending=False, method='min')[row['AWAY_TEAM_ID']]
                away_ranks.append(int(away_rank))
            else:
                away_ranks.append(None)

        return pd.DataFrame({'HOME_RANK': home_ranks, 'AWAY_RANK': away_ranks}, index=subdf.index)

    rankings = df.groupby('SEASON', group_keys=False).apply(get_ranks)
    df = pd.concat([df, rankings], axis=1)

    return df

In [5]:
import sqlite3
import pandas as pd

# Conectar a la base de datos SQLite
conexion = sqlite3.connect("C:\\Users\\andre\\Documents\\GitHub\\nba-data-analytics\\NBA_DATA.db")

# Leer todos los registros de la tabla GAMES
query = """
SELECT 
    gs.GAME_ID,
    gs.HOME_FLAG,
    gs.W,
    gs.Team_ID,
    g.GAME_DATE,
    g.SEASON,
    g.H_TEAM_ID,
    g.A_TEAM_ID,
    g.H_TEAM_NICKNAME,
    g.A_TEAM_NICKNAME
FROM GAME_STATS gs
LEFT JOIN GAMES g ON gs.GAME_ID = g.GAME_ID
WHERE g.SEASON = '2025-26'
"""
games_df = pd.read_sql_query(query, conexion)

# # Cerrar la conexión
conexion.close()

In [6]:
# Separa locales y visitantes
home_df = games_df[games_df['HOME_FLAG'] == 1].copy()
away_df = games_df[games_df['HOME_FLAG'] == 0].copy()

# Renombra columnas para distinguir
home_df = home_df.add_prefix('HOME_')
away_df = away_df.add_prefix('AWAY_')

# Ajusta los nombres de las columnas clave para el merge
home_df = home_df.rename(columns={'HOME_GAME_ID': 'GAME_ID', 'HOME_SEASON': 'SEASON'})
away_df = away_df.rename(columns={'AWAY_GAME_ID': 'GAME_ID', 'AWAY_SEASON': 'SEASON'})

# Haz el merge
merged = pd.merge(home_df, away_df, on=['GAME_ID', 'SEASON'], suffixes=('_HOME', '_AWAY'))

# merged.drop(['HOME_HOME_FLAG', 'HOME_AWAY_FLAG', 'HOME_CITY', 'HOME_NICKNAME',
#              'AWAY_HOME_FLAG', 'AWAY_AWAY_FLAG', 'AWAY_CITY', 'AWAY_NICKNAME'
#              ], axis=1, inplace=True)

In [7]:
merged = merged.drop(["GAME_ID", "SEASON", "AWAY_H_TEAM_NICKNAME", "AWAY_A_TEAM_NICKNAME", "HOME_HOME_FLAG", 
             "AWAY_A_TEAM_ID" ,"AWAY_H_TEAM_ID", "AWAY_GAME_DATE" , "HOME_A_TEAM_ID", "AWAY_HOME_FLAG", "HOME_H_TEAM_ID"], axis=1)
merged = pd.concat([merged, temp_data_df], ignore_index=True, sort=False)
merged["SEASON"] = '2025-26'

In [8]:
merged

Unnamed: 0,HOME_W,HOME_TEAM_ID,HOME_GAME_DATE,HOME_H_TEAM_NICKNAME,HOME_A_TEAM_NICKNAME,AWAY_W,AWAY_TEAM_ID,SEASON
0,1.0,1610612737,2025-11-04 00:00:00,Hawks,Magic,0.0,1610612753,2025-26
1,1.0,1610612739,2025-11-02 00:00:00,Cavaliers,Hawks,0.0,1610612737,2025-26
2,0.0,1610612754,2025-10-31 00:00:00,Pacers,Hawks,1.0,1610612737,2025-26
3,0.0,1610612751,2025-10-29 00:00:00,Nets,Hawks,1.0,1610612737,2025-26
4,1.0,1610612741,2025-10-27 00:00:00,Bulls,Hawks,0.0,1610612737,2025-26
...,...,...,...,...,...,...,...,...
331,,1610612743,2025-12-26 00:00:00,Denver Nuggets,Minnesota Timberwolves,,1610612750,2025-26
332,,1610612737,2026-01-19 00:00:00,Atlanta Hawks,Milwaukee Bucks,,1610612749,2025-26
333,,1610612739,2026-01-19 00:00:00,Cleveland Cavaliers,Oklahoma City Thunder,,1610612760,2025-26
334,,1610612752,2026-01-19 00:00:00,New York Knicks,Dallas Mavericks,,1610612742,2025-26


In [9]:
merged = add_dynamic_rankings(merged)[['HOME_RANK', 'AWAY_RANK', 'HOME_GAME_DATE', 'HOME_H_TEAM_NICKNAME', 'HOME_A_TEAM_NICKNAME']].dropna()
merged

  rankings = df.groupby('SEASON', group_keys=False).apply(get_ranks)


Unnamed: 0,HOME_RANK,AWAY_RANK,HOME_GAME_DATE,HOME_H_TEAM_NICKNAME,HOME_A_TEAM_NICKNAME
151,3.0,1.0,2025-10-09 00:00:00,Knicks,Timberwolves
115,8.0,1.0,2025-10-10 00:00:00,Raptors,Celtics
160,9.0,2.0,2025-10-10 00:00:00,76ers,Magic
111,11.0,14.0,2025-10-11 00:00:00,Grizzlies,Hawks
127,4.0,15.0,2025-10-11 00:00:00,Mavericks,Hornets
...,...,...,...,...,...
331,9.0,8.0,2025-12-26 00:00:00,Denver Nuggets,Minnesota Timberwolves
332,24.0,9.0,2026-01-19 00:00:00,Atlanta Hawks,Milwaukee Bucks
333,6.0,1.0,2026-01-19 00:00:00,Cleveland Cavaliers,Oklahoma City Thunder
334,1.0,21.0,2026-01-19 00:00:00,New York Knicks,Dallas Mavericks


In [10]:
x = 4
y = 12
stage = 1
predictions = []
results = []
resultados = merged.copy()
for index, row in merged.iterrows():
    if row['HOME_RANK'] <= x:
        if row['AWAY_RANK'] <= x:
            prediction = -1
        else:
            prediction = 1
    elif row['HOME_RANK'] >= y:
        if row['AWAY_RANK'] >= y:
            prediction = -1
        else:
            prediction = 0
    elif row['AWAY_RANK'] <= x:
        prediction = 0
    elif row['AWAY_RANK'] >= y:
        prediction = 1
    else:
        prediction = -1
    predictions.append(prediction)

resultados['PREDICTION'] = predictions
resultados.tail(19)

Unnamed: 0,HOME_RANK,AWAY_RANK,HOME_GAME_DATE,HOME_H_TEAM_NICKNAME,HOME_A_TEAM_NICKNAME,PREDICTION
317,3.0,15.0,2025-11-24 00:00:00,Oklahoma City Thunder,Portland Trail Blazers,1
318,6.0,9.0,2025-11-24 00:00:00,Phoenix Suns,San Antonio Spurs,-1
319,17.0,5.0,2025-11-24 00:00:00,Utah Jazz,Los Angeles Lakers,0
320,24.0,5.0,2025-11-25 00:00:00,Indiana Pacers,Detroit Pistons,0
321,13.0,15.0,2025-11-25 00:00:00,Toronto Raptors,Cleveland Cavaliers,-1
322,21.0,4.0,2025-11-25 00:00:00,Memphis Grizzlies,Denver Nuggets,0
323,28.0,15.0,2025-11-25 00:00:00,New Orleans Pelicans,Chicago Bulls,-1
324,6.0,5.0,2025-11-25 00:00:00,Phoenix Suns,Houston Rockets,-1
325,9.0,28.0,2025-11-25 00:00:00,Golden State Warriors,Utah Jazz,1
326,27.0,8.0,2025-11-25 00:00:00,Sacramento Kings,Minnesota Timberwolves,0


In [11]:
x = 4
y = 12
stage = 1
predictions = []
results = []
resultados = merged.copy()
for index, row in merged.iterrows():
    if row['HOME_RANK'] <= x:
        if row['AWAY_RANK'] <= x:
            prediction = -1
        else:
            prediction = 1
    elif row['HOME_RANK'] >= y:
        if row['AWAY_RANK'] >= y:
            prediction = -1
        else:
            prediction = 0
    elif row['AWAY_RANK'] <= x:
        prediction = 0
    elif row['AWAY_RANK'] >= y:
        prediction = 1
    else:
        prediction = -1
    predictions.append(prediction)

resultados['PREDICTION'] = predictions
resultados.tail(25)


Unnamed: 0,HOME_RANK,AWAY_RANK,HOME_GAME_DATE,HOME_H_TEAM_NICKNAME,HOME_A_TEAM_NICKNAME,PREDICTION
311,9.0,21.0,2025-11-23 00:00:00,Denver Nuggets,Sacramento Kings,1
312,13.0,21.0,2025-11-23 00:00:00,Philadelphia 76ers,Miami Heat,-1
313,24.0,25.0,2025-11-24 00:00:00,Atlanta Hawks,Charlotte Hornets,-1
314,13.0,9.0,2025-11-24 00:00:00,Boston Celtics,Orlando Magic,0
315,6.0,15.0,2025-11-24 00:00:00,Cleveland Cavaliers,Los Angeles Clippers,1
316,13.0,15.0,2025-11-24 00:00:00,Toronto Raptors,Brooklyn Nets,-1
317,3.0,15.0,2025-11-24 00:00:00,Oklahoma City Thunder,Portland Trail Blazers,1
318,6.0,9.0,2025-11-24 00:00:00,Phoenix Suns,San Antonio Spurs,-1
319,17.0,5.0,2025-11-24 00:00:00,Utah Jazz,Los Angeles Lakers,0
320,24.0,5.0,2025-11-25 00:00:00,Indiana Pacers,Detroit Pistons,0
