In [1]:
# -----------------------------------------
# Import Packages
# -----------------------------------------
import os
from datetime import datetime, timezone

import sqlite3
import pandas as pd
from sqlalchemy import create_engine
from nba_api.stats.endpoints import playergamelogs

In [2]:
# -----------------------------------------
# Configuration
# -----------------------------------------
DB_NAME = "nba_data.db"
DB_URI = f"sqlite:///{DB_NAME}"

# Dynamically generate seasons from 2000 to 2023
START_YEAR = 2000
END_YEAR = 2023
SEASONS = [f"{year}-{str(year+1)[-2:]}" for year in range(START_YEAR, END_YEAR + 1)]
SEASON_TYPE = "Regular Season"

# -----------------------------------------
# Create SQLAlchemy engine
# -----------------------------------------
engine = create_engine(DB_URI, echo=False)

In [3]:
# -----------------------------------------
# Fetch Data for Multiple Seasons
# -----------------------------------------
all_data = []
for season in SEASONS:
    print(f"Fetching data for season {season}...")
    gamelogs = playergamelogs.PlayerGameLogs(
        season_nullable=season,
        season_type_nullable=SEASON_TYPE
    )
    df_season = gamelogs.get_data_frames()[0]
    df_season["season"] = season
    all_data.append(df_season)

# Concatenate all seasons into one DataFrame
df_all = pd.concat(all_data, ignore_index=True)
print(f"Total records fetched: {len(df_all)}")

Fetching data for season 2000-01...
Fetching data for season 2001-02...
Fetching data for season 2002-03...
Fetching data for season 2003-04...
Fetching data for season 2004-05...
Fetching data for season 2005-06...
Fetching data for season 2006-07...
Fetching data for season 2007-08...
Fetching data for season 2008-09...
Fetching data for season 2009-10...
Fetching data for season 2010-11...
Fetching data for season 2011-12...
Fetching data for season 2012-13...
Fetching data for season 2013-14...
Fetching data for season 2014-15...
Fetching data for season 2015-16...
Fetching data for season 2016-17...
Fetching data for season 2017-18...
Fetching data for season 2018-19...
Fetching data for season 2019-20...
Fetching data for season 2020-21...
Fetching data for season 2021-22...
Fetching data for season 2022-23...
Fetching data for season 2023-24...
Total records fetched: 596475


  df_all = pd.concat(all_data, ignore_index=True)


In [4]:
# -----------------------------------------
# Select Necessary Columns and Transform
# -----------------------------------------
columns_needed = ["PLAYER_ID", "GAME_ID", "GAME_DATE", "TEAM_ID", "MATCHUP", 
                  "PTS", "MIN", "FGM", "FGA", "REB", "AST", "season"]

df_selected = df_all[columns_needed].copy()

# Convert GAME_DATE to a proper datetime
df_selected["GAME_DATE"] = pd.to_datetime(df_selected["GAME_DATE"])

# Add a last_updated column
df_selected["last_updated"] = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")

# Normalize column names (lowercase)
df_selected.columns = [col.lower() for col in df_selected.columns]

In [5]:
# -----------------------------------------
# Load Data into Local SQLite Database
# -----------------------------------------
df_selected.to_sql("player_game_data", engine, if_exists="replace", index=False)

print("Data collection and management complete.")
print(f"Data stored in {DB_NAME}, table: player_game_data")

Data collection and management complete.
Data stored in nba_data.db, table: player_game_data
