In [64]:
import os
import json
import pandas as pd
import requests
from dotenv import load_dotenv

In [65]:
year = 2023  # adjust as needed
folder_path = f"../../data/nba/api_data/match_stats/{year}"

# List all JSON files in the folder
json_files = [f for f in os.listdir(folder_path) if f.endswith(".json")]
print(f"📂 Found {len(json_files)} game files")


📂 Found 105 game files


In [66]:
rows = []

for filename in json_files:
    file_path = os.path.join(folder_path, filename)

    try:
        with open(file_path, "r") as f:
            game_data = json.load(f)
        if game_data.get("status")!="closed":
            continue
        # Loop for home & away
        for side in ["home", "away"]:
            team = game_data.get(side, {})
            if not team:
                continue

            row = {
                # game details
                "game_id": game_data.get("id"),
                "game_sr_id": game_data.get("sr_id"),
                "scheduled": game_data.get("scheduled"),
                "duration": game_data.get("duration"),
                "game_date":game_data.get("scheduled").split('T')[0],
                "status": game_data.get("status"),
                "attendance":game_data.get("attendance"),
                "track_on_court":game_data.get("track_on_court"),
                # team details
                "team_id": team.get("id"),
                "team_sr_id": team.get("sr_id"),
                "team_name": team.get("name"),
                "team_alias": team.get("alias"),
                "team_market": team.get("market"),
                "points": team.get("points"),
                "bonus": team.get("bonus"),
                "timeouts_remaining": team.get("remaining_timeouts"),
                # team records
                "record_wins":team.get("record").get("wins"),
                "record_losses":team.get("record").get("losses"),
                
            }
            # venue details
            venue = game_data.get("venue", {})
            row.update({
                "venue_id": venue.get("id"),
                "venue_name": venue.get("name"),
                "venue_capacity": venue.get("capacity"),
                "venue_address": venue.get("address"),
                "venue_city": venue.get("city"),
                "venue_state": venue.get("state"),
                "venue_zip": venue.get("zip"),
                "venue_country": venue.get("country"),
                "venue_sr_id": venue.get("sr_id"),
            })

            # location details (if present inside venue)
            location = venue.get("location", {})
            row.update({
                "venue_lat": location.get("lat"),
                "venue_lon": location.get("lng"),
            })
            
            # team scoring
            scoring = team.get("scoring", [])
            for i, s in enumerate(scoring):
                row[f"scoring_{i}_points"] = s.get("points")
                row[f"scoring_{i}_type"] = s.get("type")
                row[f"scoring_{i}_number"] = s.get("number")
                row[f"scoring_{i}_sequence"] = s.get("sequence")

            # team statistics
            stats_data = team.get("statistics", [])
            stats_dict = {}
            if stats_data:
                # usually list with one dict inside
                for k, v in stats_data.items():
                        row[k] = v

                # team most_unanswered
                most_unanswered = stats_data.get("most_unanswered", {})
                if most_unanswered:
                    for k, v in most_unanswered.items():
                        row[f"most_unanswered_{k}"] = v

                # team periodic statistics
                periods = stats_data.get("periods", [])
                for p in periods:
                        period_no = p.get("number")
                        suffix = f"{period_no}th_period"
                        for stat_name, value in p.items():
                            if stat_name != "number":
                                row[f"{suffix}_{stat_name}"] = value
            
            rows.append(row)

    except Exception as e:
        print(f"❌ Failed {filename}: {e}")


In [67]:
df = pd.DataFrame(rows)
df.drop(columns=['most_unanswered','periods'],inplace=True)
# Composite key = (game_id, team_id)
print(f"✅ Processed {len(df)} rows from {len(json_files)} files")

output_path = f"../../data/nba/api_data/team_stats/nba_team_stats_{year}.csv"
df.to_csv(output_path, index=False)
print(f"📄 Saved CSV → {output_path}")


✅ Processed 164 rows from 105 files
📄 Saved CSV → ../../data/nba/api_data/team_stats/nba_team_stats_2023.csv


In [68]:
# df = pd.read_csv('../../data/nba/api_data/')
df = pd.read_csv(output_path)
df.shape

(164, 417)