In [47]:
pip install nba_api




In [13]:
from nba_api.stats.endpoints import leaguegamelog, boxscoretraditionalv2
import pandas as pd
import os
import time

# Funzione per ottenere le partite con punteggi corretti
def get_season_games(season, season_type="Regular Season"):
    try:
        # Estrai il registro delle partite
        gamelog = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type)
        games = gamelog.get_data_frames()[0]

        # Filtra le colonne principali
        games = games[["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_ABBREVIATION", "PTS", "WL"]]

        # Identifica le squadre di casa e trasferta
        games["HOME_TEAM"] = games["MATCHUP"].apply(lambda x: x.split(" vs. ")[0] if "vs." in x else x.split(" @ ")[1])
        games["AWAY_TEAM"] = games["MATCHUP"].apply(lambda x: x.split(" vs. ")[1] if "vs." in x else x.split(" @ ")[0])

        # Assegna i punteggi
        games["HOME_SCORE"] = games.apply(lambda x: x["PTS"] if x["TEAM_ABBREVIATION"] == x["HOME_TEAM"] else None, axis=1)
        games["AWAY_SCORE"] = games.apply(lambda x: x["PTS"] if x["TEAM_ABBREVIATION"] == x["AWAY_TEAM"] else None, axis=1)

        # Raggruppa i dati per partita
        games = games.groupby("GAME_ID").agg({
            "GAME_DATE": "first",
            "HOME_TEAM": "first",
            "AWAY_TEAM": "first",
            "HOME_SCORE": "max",
            "AWAY_SCORE": "max"
        }).reset_index()

        return games
    except Exception as e:
        print(f"Errore durante l'estrazione delle partite per la stagione {season} ({season_type}): {e}")
        return pd.DataFrame()

# Stagione desiderata
season = "2023-24"

# Ottieni le partite della Regular Season con punteggi
regular_season_games = get_season_games(season, season_type="Regular Season")

# Ottieni le partite dei Playoffs con punteggi
playoff_games = get_season_games(season, season_type="Playoffs")

# Combina le partite Regular Season e Playoffs
all_games = pd.concat([regular_season_games, playoff_games], ignore_index=True)

# Mostra un'anteprima delle partite combinate
print(all_games.head(10))

# Salva l'elenco completo in un file CSV
all_games.to_csv("nba_2023_24_all_games.csv", index=False)
print("Elenco completo delle partite con punteggi salvato in 'nba_2023_24_all_games.csv'.")


      GAME_ID   GAME_DATE HOME_TEAM AWAY_TEAM  HOME_SCORE  AWAY_SCORE
0  0022300001  2023-11-03       IND       CLE       121.0       116.0
1  0022300002  2023-11-03       MIL       NYK       110.0       105.0
2  0022300003  2023-11-03       MIA       WAS       121.0       114.0
3  0022300004  2023-11-03       CHI       BKN       107.0       109.0
4  0022300005  2023-11-03       OKC       GSW       139.0       141.0
5  0022300006  2023-11-03       DEN       DAL       125.0       114.0
6  0022300007  2023-11-03       POR       MEM       115.0       113.0
7  0022300008  2023-11-10       DET       PHI       106.0       114.0
8  0022300009  2023-11-10       WAS       CHA       117.0       124.0
9  0022300010  2023-11-10       BOS       BKN       121.0       107.0
Elenco completo delle partite con punteggi salvato in 'nba_2023_24_all_games.csv'.


In [27]:
# Funzione per calcolare TS% e TOV%
def calculate_advanced_metrics(stats):
    # Calcola il True Shooting Percentage (TS%)
    stats["TS%"] = stats["PTS"] / (2 * (stats["FGA"] + 0.44 * stats["FTA"]))
    
    # Calcola il Turnover Percentage (TOV%)
    stats["TOV%"] = stats["TO"] / (stats["FGA"] + (0.44 * stats["FTA"]) + stats["TO"])
    
    return stats

# Funzione per ripulire il minutaggio
def clean_minutes_string(minutes):
    try:
        if isinstance(minutes, str) and ":" in minutes:
            # Mantieni solo la parte prima del punto e unisci con i secondi dopo i due punti
            mins = minutes.split(".")[0]
            secs = minutes.split(":")[1]
            return f"{mins}:{secs}"
        else:
            return minutes
    except Exception as e:
        print(f"Errore durante la pulizia del minutaggio: {e}")
        return minutes

# Funzione per estrarre e salvare le statistiche di una singola partita
def process_game_stats(game_id, output_dir):
    try:
        # Estrai le statistiche della partita
        boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
        stats = boxscore.get_data_frames()[0]

        # Filtra le colonne necessarie
        stats = stats[["PLAYER_NAME", "MIN", "PTS", "REB", "AST", "STL", "BLK", "TO", "FGA", "FTA", "FG_PCT"]]
        stats.rename(columns={"PLAYER_NAME": "player", "MIN": "mp"}, inplace=True)

        # Ripulisci il minutaggio
        stats["mp"] = stats["mp"].apply(clean_minutes_string)

        # Calcola TS% e TOV%
        stats = calculate_advanced_metrics(stats)

        # Salva il file CSV
        file_name = f"{game_id}.csv"
        output_path = os.path.join(output_dir, file_name)
        stats.to_csv(output_path, index=False)
        print(f"Statistiche salvate in {output_path}.")
    except Exception as e:
        print(f"Errore durante l'elaborazione delle statistiche per GAME_ID {game_id}: {e}")

# Funzione per processare tutte le partite
def process_all_games(season, season_type="Regular Season", output_dir="games_csv"):
    # Crea la directory di output se non esiste
    os.makedirs(output_dir, exist_ok=True)

    # Ottieni tutti i GAME_ID della stagione
    try:
        gamelog = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type)
        game_ids = gamelog.get_data_frames()[0]["GAME_ID"].unique()
        print(f"Trovati {len(game_ids)} GAME_ID per la stagione {season} ({season_type}).")

        # Processa ogni partita
        for i, game_id in enumerate(game_ids):
            print(f"Elaborazione GAME_ID {game_id} ({i + 1}/{len(game_ids)})...")
            process_game_stats(game_id, output_dir)
            time.sleep(1)  # Ritardo per evitare blocchi API

    except Exception as e:
        print(f"Errore durante l'estrazione dei GAME_ID: {e}")

# Esegui il codice per Regular Season e Playoff
season = "2023-24"
process_all_games(season, season_type="Regular Season", output_dir="regular_season_games")
process_all_games(season, season_type="Playoffs", output_dir="playoff_games")


Trovati 1230 GAME_ID per la stagione 2023-24 (Regular Season).
Elaborazione GAME_ID 0022300061 (1/1230)...
Statistiche salvate in regular_season_games\0022300061.csv.
Elaborazione GAME_ID 0022300062 (2/1230)...
Statistiche salvate in regular_season_games\0022300062.csv.
Elaborazione GAME_ID 0022300069 (3/1230)...
Statistiche salvate in regular_season_games\0022300069.csv.
Elaborazione GAME_ID 0022300065 (4/1230)...
Statistiche salvate in regular_season_games\0022300065.csv.
Elaborazione GAME_ID 0022300064 (5/1230)...
Statistiche salvate in regular_season_games\0022300064.csv.
Elaborazione GAME_ID 0022300063 (6/1230)...
Statistiche salvate in regular_season_games\0022300063.csv.
Elaborazione GAME_ID 0022300074 (7/1230)...
Statistiche salvate in regular_season_games\0022300074.csv.
Elaborazione GAME_ID 0022300072 (8/1230)...
Statistiche salvate in regular_season_games\0022300072.csv.
Elaborazione GAME_ID 0022300071 (9/1230)...
Statistiche salvate in regular_season_games\0022300071.csv.
E

In [37]:
# Funzione per ottenere tutti i GAME_ID
def get_all_game_ids(season, season_type="Regular Season"):
    try:
        gamelog = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type)
        game_ids = gamelog.get_data_frames()[0]["GAME_ID"].unique()
        return game_ids
    except Exception as e:
        print(f"Errore durante l'estrazione dei GAME_ID: {e}")
        return []

# Estrai i GAME_ID per la Regular Season e i Playoff
season = "2023-24"
regular_season_game_ids = get_all_game_ids(season, "Regular Season")
playoff_game_ids = get_all_game_ids(season, "Playoffs")

# Confronto per Regular Season
regular_season_saved_files = [f.split(".")[0] for f in os.listdir("regular_season_games") if f.endswith(".csv")]
missing_regular_season_ids = [game_id for game_id in regular_season_game_ids if game_id not in regular_season_saved_files]
print(f"GAME_ID mancanti per la Regular Season: {missing_regular_season_ids}")

# Confronto per Playoff
playoff_saved_files = [f.split(".")[0] for f in os.listdir("playoff_games") if f.endswith(".csv")]
missing_playoff_ids = [game_id for game_id in playoff_game_ids if game_id not in playoff_saved_files]
print(f"GAME_ID mancanti per i Playoffs: {missing_playoff_ids}")

# Funzione per recuperare le partite mancanti
def process_missing_games(missing_game_ids, output_dir):
    for i, game_id in enumerate(missing_game_ids):
        print(f"Rielaborazione GAME_ID {game_id} ({i + 1}/{len(missing_game_ids)})...")
        try:
            process_game_stats(game_id, output_dir)  # Usa la funzione di elaborazione già definita
            time.sleep(1)  # Ritardo per evitare blocchi API
        except Exception as e:
            print(f"Errore durante la rielaborazione di GAME_ID {game_id}: {e}")

# Recupera i dati mancanti
process_missing_games(missing_regular_season_ids, "regular_season_games")
process_missing_games(missing_playoff_ids, "playoff_games")


GAME_ID mancanti per la Regular Season: []
GAME_ID mancanti per i Playoffs: []
