# 1. Libraries imports

In [1]:
# !pip install selenium
# !pip install chromedriver-py==94.0.4606.41
# !pip install requests

# libraries
import os
import pandas as pd
import shutil
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from bs4 import BeautifulSoup as bs
from chromedriver_py import binary_path
import requests
import warnings; warnings.filterwarnings("ignore")

# unhide all rows and columns
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

# 2. Scrape MVP List

In [2]:
years = list(range(1991,2025))
url_start = "https://www.basketball-reference.com/awards/awards_{}.html"

In [6]:
dfs = []

for year in years:
    url = url_start.format(year)
    data = requests.get(url)
    
    with open("html/mvp/{}.html".format(year), "w+", encoding='utf-8') as f:
        f.write(data.text)
    
    with open("html/mvp/{}.html".format(year), encoding='utf-8') as f:
        page = f.read()
        
    soup = bs(page, "html.parser")
    over_header = soup.find("tr", class_ = "over_header")
    if over_header:
        over_header.decompose()
    mvp_table = soup.find(id = "mvp")
    if mvp_table:
        mvp = pd.read_html(str(mvp_table))[0]
        mvp["Year"] = year
        dfs.append(mvp)
    else:
        print(f"No MVP table found for year {year}")
    
    time.sleep(10)

if dfs:
    mvps = pd.concat(dfs)
else:
    mvps = pd.DataFrame()

In [7]:
mvps

Unnamed: 0,Rank,Player,Age,Tm,First,Pts Won,Pts Max,Share,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
0,1,Michael Jordan,27,CHI,77,891,960,0.928,82,37.0,31.5,6.0,5.5,2.7,1.0,0.539,0.312,0.851,20.3,0.321,1991
1,2,Magic Johnson,31,LAL,10,497,960,0.518,79,37.1,19.4,7.0,12.5,1.3,0.2,0.477,0.320,0.906,15.4,0.251,1991
2,3,David Robinson,25,SAS,6,476,960,0.496,82,37.7,25.6,13.0,2.5,1.5,3.9,0.552,0.143,0.762,17.0,0.264,1991
3,4,Charles Barkley,27,PHI,2,222,960,0.231,67,37.3,27.6,10.1,4.2,1.6,0.5,0.570,0.284,0.722,13.4,0.258,1991
4,5,Karl Malone,27,UTA,0,142,960,0.148,82,40.3,29.0,11.8,3.3,1.1,1.0,0.527,0.286,0.770,15.5,0.225,1991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,5,Jalen Brunson,27,NYK,0,142,990,0.143,77,35.4,28.7,3.6,6.7,0.9,0.2,0.479,0.401,0.847,11.2,0.198,2024
5,6,Jayson Tatum,25,BOS,0,86,990,0.087,74,35.7,26.9,8.1,4.9,1.0,0.6,0.471,0.376,0.833,10.4,0.189,2024
6,7,Anthony Edwards,22,MIN,0,18,990,0.018,79,35.1,25.9,5.4,5.1,1.3,0.5,0.461,0.357,0.836,7.5,0.130,2024
7,8,Domantas Sabonis,27,SAC,0,3,990,0.003,82,35.7,19.4,13.7,8.2,0.9,0.6,0.594,0.379,0.704,12.6,0.206,2024


In [8]:
mvps.to_csv("raw_data/mvps.csv", index=False)

# 3. Scrape Player Stats

In [9]:
player_stats_url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html"

In [15]:
# Ruta al ChromeDriver
driver_path = "D:\Descargas\chromedriver-win64\chromedriver.exe"
chrome_options = Options()
chrome_options.binary_location = "D:\Descargas\chrome-win64\chrome.exe"

# Verifica si el ChromeDriver existe
if not os.path.exists(driver_path):
    raise FileNotFoundError(f"ChromeDriver no encontrado en la ruta: {driver_path}")

# Inicializa el navegador
driver = webdriver.Chrome(service=Service(driver_path), options=chrome_options)

In [16]:
for year in years:
    url = player_stats_url.format(year)
    
    driver.get(url)
    driver.execute_script("window.scrollTo(1,10000)")
    time.sleep(2)
    
    with open("html/player/{}.html".format(year), "w+", encoding='utf-8') as f:
        f.write(driver.page_source)

In [17]:
dfs = []

for year in years:
    with open("html/player/{}.html".format(year), encoding="utf-8") as f:
        page = f.read()
    
    soup = bs(page, 'html.parser')
    soup.find('tr', class_="thead").decompose()
    player_table = soup.find_all(id="per_game_stats")[0]
    player_df = pd.read_html(str(player_table))[0]
    player_df["Year"] = year
    dfs.append(player_df)

In [18]:
players = pd.concat(dfs)
players

Unnamed: 0,Rk,Player,Age,Team,Pos,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Awards,Year
0,1,Michael Jordan,27,CHI,SG,82,82,37.0,12.1,22.4,.539,0.4,1.1,.312,11.7,21.3,.551,.547,7.0,8.2,.851,1.4,4.6,6.0,5.5,2.7,1.0,2.5,2.8,31.5,"MVP-1,DPOY-7,AS,NBA1",1991
1,2,Karl Malone,27,UTA,PF,82,82,40.3,10.3,19.6,.527,0.0,0.2,.286,10.3,19.4,.529,.528,8.3,10.8,.770,2.9,8.9,11.8,3.3,1.1,1.0,3.0,3.3,29.0,"MVP-5,AS,NBA1",1991
2,3,Bernard King,34,WSB,SF,64,64,37.5,11.1,23.6,.472,0.1,0.6,.216,11.0,23.0,.478,.475,6.0,7.6,.790,1.8,3.2,5.0,4.6,0.9,0.3,4.0,2.9,28.4,"MVP-16,AS,NBA3",1991
3,4,Charles Barkley,27,PHI,SF,67,67,37.3,9.9,17.4,.570,0.7,2.3,.284,9.3,15.1,.614,.589,7.1,9.8,.722,3.9,6.3,10.1,4.2,1.6,0.5,3.1,2.6,27.6,"MVP-4,AS,NBA1",1991
4,5,Patrick Ewing,28,NYK,C,81,81,38.3,10.4,20.3,.514,0.0,0.1,.000,10.4,20.2,.516,.514,5.7,7.7,.745,2.4,8.8,11.2,3.0,1.0,3.2,3.6,3.5,26.6,"MVP-11,DPOY-7,AS,NBA2",1991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,732,Ron Harper Jr.,23,TOR,PF,1,0,4.0,0.0,0.0,,0.0,0.0,,0.0,0.0,,,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,,2024
755,733,Justin Jackson,28,MIN,SF,2,0,0.5,0.0,0.0,,0.0,0.0,,0.0,0.0,,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2024
756,734,Dmytro Skapintsev,25,NYK,C,2,0,1.0,0.0,0.5,.000,0.0,0.0,,0.0,0.5,.000,.000,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2024
757,735,Javonte Smart,24,PHI,PG,1,0,1.0,0.0,0.0,,0.0,0.0,,0.0,0.0,,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2024


In [19]:
players.to_csv("raw_data/players.csv", index=False)

# 4. Scrape Teams Stats

In [30]:
teams = ["ATL", "BOS", "BRK", "CHI", "CHO", "CLE", "DAL", "DEN", "DET", "GSW", "HOU", "IND", "LAC", "LAL", "MEM", "MIA", "MIL", "MIN", "NOP", "NYK", "OKC", "ORL", "PHI", "PHO", "POR", "SAC", "SAS", "TOR", "UTA", "WAS"]
team_stats_url = "https://www.basketball-reference.com/teams/{}/{}.html"

In [24]:
# Ruta al ChromeDriver
driver_path = "D:\Descargas\chromedriver-win64\chromedriver.exe"
chrome_options = Options()
chrome_options.binary_location = "D:\Descargas\chrome-win64\chrome.exe"

# Verifica si el ChromeDriver existe
if not os.path.exists(driver_path):
    raise FileNotFoundError(f"ChromeDriver no encontrado en la ruta: {driver_path}")

# Inicializa el navegador
driver = webdriver.Chrome(service=Service(driver_path), options=chrome_options)

In [27]:
for team in teams:
    for year in years:
        url = team_stats_url.format(team, year)
        
        driver.get(url)
        driver.execute_script("window.scrollTo(1,10000)")
        time.sleep(2)
        
        with open("html/team/{}_{}.html".format(team, year), "w+", encoding='utf-8') as f:
            f.write(driver.page_source)

In [None]:
dfs = []

output_dir = "raw_data/seasons_teams_data"
os.makedirs(output_dir, exist_ok=True)

all_teams_data = []  # Lista para almacenar datos de todos los equipos

for team in teams:
    team_data = []  # Lista para almacenar datos de un equipo en todas sus temporadas

    for year in years:
        file_path = f"html/team/{team}_{year}.html"
        
        if not os.path.exists(file_path):
            print(f"Archivo no encontrado: {file_path}")
            continue
        
        with open(file_path, encoding="utf-8") as f:
            page = f.read()
        
        if "Page Not Found (404 error)" in page:
            print(f"Error 404 en el archivo: {file_path}")
            continue
        
        soup = bs(page, 'html.parser')

        # Diccionario para almacenar los datos
        data = {
            "Team": team,
            "Year": year,
            "Champion": "Yes" if "NBA Champions" in page else "No"
        }

        # Extraer datos de "team_and_opponent"
        table = soup.find("table", id="team_and_opponent")
        if table:
            tr_basic_stats = table.find("tr", {"data-row": "1"})
            if tr_basic_stats:
                for stat in ["fg_per_g", "fga_per_g", "fg_pct", "fg2_per_g", "fg2a_per_g", "fg2_pct",
                             "fg3_per_g", "fg3a_per_g", "fg3_pct", "ft_per_g", "fta_per_g", "ft_pct",
                             "orb_per_g", "drb_per_g", "trb_per_g", "ast_per_g", "stl_per_g", "blk_per_g",
                             "tov_per_g", "pf_per_g", "pts_per_g"]:
                    td = tr_basic_stats.find("td", {"data-stat": stat})
                    data[stat] = td.text if td else "N/A"

            tr_opponent_stats = table.find("tr", {"data-row": "5"})
            if tr_opponent_stats:
                for stat in ["opp_pf_per_g", "opp_pts_per_g"]:
                    td = tr_opponent_stats.find("td", {"data-stat": stat})
                    data[stat] = td.text if td else "N/A"

        # Extraer datos de "team_misc"
        table = soup.find("table", id="team_misc")
        if table:
            tr_advanced_stats = table.find("tr", {"data-row": "0"})
            if tr_advanced_stats:
                for stat in ["wins", "losses", "mov", "sos", "srs", "off_rtg", "def_rtg",
                             "efg_pct", "tov_pct", "orb_pct", "drb_pct", "ft_rate",
                             "opp_efg_pct", "opp_tov_pct", "opp_ft_rate"]:
                    td = tr_advanced_stats.find("td", {"data-stat": stat})
                    data[stat] = td.text if td else "N/A"

        # Agregar los datos a la lista
        team_data.append(data)
        all_teams_data.append(data)  # Agregar a la lista general

    # Guardar todos los años del equipo en un solo CSV
    if team_data:
        df_team = pd.DataFrame(team_data)

        # Convertir a float antes de calcular WIN%
        df_team["W"] = df_team["wins"].astype(float)
        df_team["L"] = df_team["losses"].astype(float)
        df_team["WIN%"] = df_team["W"] / (df_team["W"] + df_team["L"])

        # Guardar en CSV por equipo
        output_path = f"{output_dir}/{team}_data.csv"
        df_team.to_csv(output_path, index=False)
        print(f"Datos de {team} guardados en {output_path}")

# Guardar todos los equipos en un solo CSV
if all_teams_data:
    df_all_teams = pd.DataFrame(all_teams_data)

    # Convertir a float antes de calcular WIN%
    df_all_teams["W"] = df_all_teams["wins"].astype(float)
    df_all_teams["L"] = df_all_teams["losses"].astype(float)
    df_all_teams["WIN%"] = df_all_teams["W"] / (df_all_teams["W"] + df_all_teams["L"])

    output_path = f"{output_dir}/all_teams_data.csv"
    df_all_teams.to_csv(output_path, index=False)
    print(f"Datos de todos los equipos guardados en {output_path}")

Datos de ATL guardados en raw_data/ATL_data.csv
Datos de BOS guardados en raw_data/BOS_data.csv
Error 404 en el archivo: html/team/BRK_1991.html
Error 404 en el archivo: html/team/BRK_1992.html
Error 404 en el archivo: html/team/BRK_1993.html
Error 404 en el archivo: html/team/BRK_1994.html
Error 404 en el archivo: html/team/BRK_1995.html
Error 404 en el archivo: html/team/BRK_1996.html
Error 404 en el archivo: html/team/BRK_1997.html
Error 404 en el archivo: html/team/BRK_1998.html
Error 404 en el archivo: html/team/BRK_1999.html
Error 404 en el archivo: html/team/BRK_2000.html
Error 404 en el archivo: html/team/BRK_2001.html
Error 404 en el archivo: html/team/BRK_2002.html
Error 404 en el archivo: html/team/BRK_2003.html
Error 404 en el archivo: html/team/BRK_2004.html
Error 404 en el archivo: html/team/BRK_2005.html
Error 404 en el archivo: html/team/BRK_2006.html
Error 404 en el archivo: html/team/BRK_2007.html
Error 404 en el archivo: html/team/BRK_2008.html
Error 404 en el archiv

In [None]:
teams_df = pd.concat(dfs)
teams_df

Unnamed: 0,Season,Lg,Tm,W,L,Finish,Unnamed: 6,Age,Ht.,Wt.,Unnamed: 10,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Team
1,2024,NBA,ATL,36,46,3,,26.2,6-6,211,,82,242.1,43.0,92.5,.465,13.7,37.7,.364,29.3,54.8,.535,18.5,23.2,.797,12.5,32.2,44.7,26.6,7.5,4.5,13.5,18.6,118.3,ATL
2,2023,NBA,ATL,41,41,2,,24.9,6-6,210,,82,242.1,44.6,92.4,.483,10.8,30.5,.352,33.9,61.8,.548,18.5,22.6,.818,11.2,33.2,44.4,25.0,7.1,4.9,12.9,18.8,118.4,ATL
3,2022,NBA,ATL,43,39,2,,26.1,6-6,211,,82,240.3,41.5,88.3,.470,12.9,34.4,.374,28.6,53.9,.531,18.1,22.3,.812,10.0,33.9,44.0,24.6,7.2,4.2,11.9,18.7,113.9,ATL
4,2021,NBA,ATL,41,31,1,,25.4,6-6,212,,72,241.7,40.8,87.2,.468,12.4,33.4,.373,28.4,53.9,.526,19.7,24.2,.812,10.6,35.1,45.6,24.1,7.0,4.8,13.2,19.3,113.7,ATL
5,2020,NBA,ATL,20,47,5,,24.1,6-6,213,,67,243.0,40.6,90.6,.449,12.0,36.1,.333,28.6,54.5,.525,18.5,23.4,.790,9.9,33.4,43.3,24.0,7.8,5.1,16.2,23.1,111.8,ATL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31,1995,NBA,WSB,21,61,7,,24.5,6-7,224,,82,242.1,38.7,84.1,.460,5.3,15.4,.343,33.5,68.7,.487,17.8,24.5,.724,12.7,27.1,39.8,21.3,7.9,4.9,15.9,23.8,100.5,WAS
32,1994,NBA,WSB,24,58,7,,25.8,6-6,214,,82,240.3,39.0,83.2,.468,2.7,9.1,.297,36.3,74.2,.489,19.7,26.4,.748,13.1,26.7,39.8,22.2,8.5,3.9,17.1,20.9,100.4,WAS
33,1993,NBA,WSB,22,60,7,,26.1,6-6,202,,82,241.5,40.3,86.2,.467,2.1,7.0,.301,38.1,79.1,.482,19.2,25.7,.748,12.6,28.3,40.8,25.7,8.2,4.4,16.1,21.9,101.9,WAS
34,1992,NBA,WSB,25,57,6,,26.1,6-5,197,,82,242.1,41.0,89.0,.461,1.8,6.5,.272,39.2,82.5,.476,18.5,23.9,.778,13.0,28.6,41.6,24.5,8.7,5.1,15.3,22.6,102.4,WAS


In [None]:
teams_df.to_csv(f"raw_data/teams.csv", index=False)

# 5. Season matches

In [None]:
# Año de la temporada a extraer
year = 2024

# URL base de partidos por mes
base_url = f"https://www.basketball-reference.com/leagues/NBA_{year}_games-"

# Meses de la temporada regular (desde octubre)
# months = ["october", "november", "december", "january", "february", "march", "april"]
# months = ["october", "november", "december"]

# Lista para almacenar datos
team_data = {}

team_abbreviations = {
    "Atlanta Hawks": "ATL",
    "Boston Celtics": "BOS",
    "Brooklyn Nets": "BRK",
    "Charlotte Hornets": "CHO",
    "Chicago Bulls": "CHI",
    "Cleveland Cavaliers": "CLE",
    "Dallas Mavericks": "DAL",
    "Denver Nuggets": "DEN",
    "Detroit Pistons": "DET",
    "Golden State Warriors": "GSW",
    "Houston Rockets": "HOU",
    "Indiana Pacers": "IND",
    "Los Angeles Clippers": "LAC",
    "Los Angeles Lakers": "LAL",
    "Memphis Grizzlies": "MEM",
    "Miami Heat": "MIA",
    "Milwaukee Bucks": "MIL",
    "Minnesota Timberwolves": "MIN",
    "New Orleans Pelicans": "NOP",
    "New York Knicks": "NYK",
    "Oklahoma City Thunder": "OKC",
    "Orlando Magic": "ORL",
    "Philadelphia 76ers": "PHI",
    "Phoenix Suns": "PHO",
    "Portland Trail Blazers": "POR",
    "Sacramento Kings": "SAC",
    "San Antonio Spurs": "SAS",
    "Toronto Raptors": "TOR",
    "Utah Jazz": "UTA",
    "Washington Wizards": "WAS"
}

In [8]:
def get_boxscore_links(month):
    url = base_url + month + ".html"
    response = requests.get(url)
    soup = bs(response.text, "html.parser")

    # Encontrar la tabla de partidos
    table = soup.find("table", {"id": "schedule"})
    if not table:
        print("No se encontró la tabla de partidos.")
        return []

    # Extraer los enlaces a los boxscores
    links = []
    for row in table.find("tbody").find_all("tr"):
        link = row.find("a", text="Box Score")
        if link:
            links.append("https://www.basketball-reference.com" + link["href"])
    
    return links

def extract_team_stats(soup, team):
    team_data = {}
    
    # Get the team abbreviation
    team_abbr = team_abbreviations.get(team.strip(), None)
    if not team_abbr:
        print(f"No se encontró la abreviatura para el equipo {team}.")
        return team_data
    
    # Construct table IDs
    basic_table_id = f"box-{team_abbr}-game-basic"
    advanced_table_id = f"box-{team_abbr}-game-advanced"
    
    # Extraer estadísticas básicas
    basic_table = soup.find("table", id=basic_table_id)
    if basic_table:
        foot = basic_table.find("tfoot")
        if foot:
            for td in foot.find_all("td"):
                stat = td.get("data-stat")
                value = td.text.strip()
                team_data[stat] = value
    
    # Extraer estadísticas avanzadas
    advanced_table = soup.find("table", id=advanced_table_id)
    if advanced_table:
        foot = advanced_table.find("tfoot")
        if foot:
            for td in foot.find_all("td"):
                stat = td.get("data-stat")
                value = td.text.strip()
                team_data[stat] = value
    
    return team_data

def get_game_stats(boxscore_url):
    response = requests.get(boxscore_url)
    soup = bs(response.text, "html.parser")

    # Verificar si el partido es de playoffs
    if "NBA Eastern Conference" in soup.text or "NBA Western Conference" in soup.text or "Play-In Game:" in soup.text:
        print("Partido de playoffs detectado. Finalizando extracción.")
        return None

    if "Page Not Found (404 error)" in soup.text:
        print("Error 404: Página no encontrada.")
        return None

    # Obtener equipos
    scorebox = soup.find("div", class_="scorebox")
    teams = [team.text.strip() for team in scorebox.find_all("strong")]
    
    if len(teams) < 2:
        return None

    home_team, away_team = teams[1], teams[0]
    home_abbr = team_abbreviations.get(home_team, "UNK")
    away_abbr = team_abbreviations.get(away_team, "UNK")

    # Extraer datos de cada equipo
    home_stats = extract_team_stats(soup, home_team)
    away_stats = extract_team_stats(soup, away_team)
    
    # Agregar información sobre localía y oponente
    home_stats.update({"Location": "Home", "Opponent": away_abbr})
    away_stats.update({"Location": "Away", "Opponent": home_abbr})
    
    # Agregar prefijo "opp_" a las estadísticas del oponente
    home_stats_with_opp = {f"opp_{k}": v for k, v in away_stats.items()}
    away_stats_with_opp = {f"opp_{k}": v for k, v in home_stats.items()}
    
    game_stats = {
        home_team: {**home_stats, **home_stats_with_opp},
        away_team: {**away_stats, **away_stats_with_opp}
    }
    
    return game_stats

In [25]:
all_team_data = team_data

In [24]:
# months = ["october", "november", "december", "january", "february", "march", "april"]
months = ["april"]
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

# Configurar sesión con reintentos
session = requests.Session()
retry = Retry(
    total=5,  # Número total de reintentos
    backoff_factor=1,  # Factor de espera exponencial
    status_forcelist=[500, 502, 503, 504],  # Códigos de estado para reintentar
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)

# Extraer datos de todos los partidos de la temporada regular
for month in months:
    print(f"Extrayendo partidos de {month}...")
    boxscore_links = get_boxscore_links(month)
    print(f"Encontrados {len(boxscore_links)} partidos.")
    
    for boxscore in boxscore_links:
        try:
            game_stats = get_game_stats(boxscore)
        except requests.exceptions.RequestException as e:
            print(f"Error al obtener estadísticas para el boxscore: {boxscore}, error: {e}")
            continue
        
        if game_stats is None:
            print(f"No se pudieron extraer estadísticas para el boxscore: {boxscore}")
            break

        for team, stats in game_stats.items():
            team = team.strip()
            if team not in team_data:
                team_data[team] = []
            
            team_data[team].append(stats)

        time.sleep(1.5)  # Pequeña pausa para evitar bloqueos
    
    if game_stats is None:
        break  # Detener si ya encontramos un partido de playoffs


Extrayendo partidos de april...
Encontrados 157 partidos.
Partido de playoffs detectado. Finalizando extracción.
No se pudieron extraer estadísticas para el boxscore: https://www.basketball-reference.com/boxscores/202404160NOP.html


In [26]:
all_team_data

{'Denver Nuggets': [{'mp': '240',
   'fg': '48',
   'fga': '91',
   'fg_pct': '.527',
   'fg3': '14',
   'fg3a': '34',
   'fg3_pct': '.412',
   'ft': '9',
   'fta': '12',
   'ft_pct': '.750',
   'orb': '9',
   'drb': '33',
   'trb': '42',
   'ast': '29',
   'stl': '9',
   'blk': '6',
   'tov': '12',
   'pf': '15',
   'pts': '119',
   'game_score': '',
   'plus_minus': '',
   'ts_pct': '.618',
   'efg_pct': '.604',
   'fg3a_per_fga_pct': '.374',
   'fta_per_fga_pct': '.132',
   'orb_pct': '22.5',
   'drb_pct': '71.7',
   'trb_pct': '48.8',
   'ast_pct': '60.4',
   'stl_pct': '9.3',
   'blk_pct': '9.8',
   'tov_pct': '11.1',
   'usg_pct': '100.0',
   'off_rtg': '123.6',
   'def_rtg': '111.1',
   'bpm': '',
   'Location': 'Home',
   'Opponent': 'LAL',
   'opp_mp': '240',
   'opp_fg': '41',
   'opp_fga': '90',
   'opp_fg_pct': '.456',
   'opp_fg3': '10',
   'opp_fg3a': '29',
   'opp_fg3_pct': '.345',
   'opp_ft': '15',
   'opp_fta': '20',
   'opp_ft_pct': '.750',
   'opp_orb': '13',
   'op

In [27]:
# Guardar un CSV por equipo
output_dir = f"raw_data/season_team_stats_{year}"
os.makedirs(output_dir, exist_ok=True)

for team, data in team_data.items():
    df = pd.DataFrame(data)
    team = team.strip()  # Remove newline characters
    team_abbr = team_abbreviations.get(team, team)  # Use abbreviation if available, else use team name
    output_path = os.path.join(output_dir, f"{team_abbr}.csv")
    df.to_csv(output_path, index=False)

print(f"Datos guardados en la carpeta {output_dir}")

Datos guardados en la carpeta raw_data/season_team_stats_2024
