In [None]:
# Day 1: Sanity Check for T20 JSON Files
# Goal: Verify that Cricsheet JSON files load correctly without freezing
# Uses only ONE file from each folder and prints top-level keys & metadata

import json
from pathlib import Path
import os

# ------------------------------
# 1. Check working directory
# ------------------------------
print("Current working directory:", os.getcwd())

# ------------------------------
# 2. Define paths to raw data folders
# ------------------------------
t20i_path = Path("data/raw/t20_all_matches")
wc_path   = Path("data/raw/t20_worldcup_matches")

# ------------------------------
# 3. Confirm folders exist
# ------------------------------
for folder in [t20i_path, wc_path]:
    if folder.exists() and folder.is_dir():
        print(f"✅ Folder exists: {folder}")
    else:
        print(f"❌ Folder NOT found: {folder}")
        print("Make sure ZIP files are extracted correctly.\n")

# ------------------------------
# 4. List files in each folder
# ------------------------------
print("\nListing contents of folders:")
print("T20I folder contents:", [f.name for f in t20i_path.iterdir()] if t20i_path.exists() else [])
print("WC folder contents  :", [f.name for f in wc_path.iterdir()] if wc_path.exists() else [])

# ------------------------------
# 5. Select ONE JSON file safely from each folder
# ------------------------------
t20i_file = next(t20i_path.glob("*.json"), None)
wc_file   = next(wc_path.glob("*.json"), None)

if not t20i_file:
    print("\n❌ No JSON files found in T20I folder.")
if not wc_file:
    print("\n❌ No JSON files found in WC folder.")

if t20i_file and wc_file:
    print("\nFiles selected for sanity check:")
    print("T20I:", t20i_file)
    print("WC  :", wc_file)

    # ------------------------------
    # 6. Load ONE JSON file from each folder
    # ------------------------------
    with open(t20i_file, "r", encoding="utf-8") as f:
        t20i_match = json.load(f)

    with open(wc_file, "r", encoding="utf-8") as f:
        wc_match = json.load(f)

    # ------------------------------
    # 7. Inspect top-level keys
    # ------------------------------
    print("\nT20I JSON top-level keys:", t20i_match.keys())
    print("WC JSON top-level keys  :", wc_match.keys())

    # ------------------------------
    # 8. Peek at match info (metadata only)
    # ------------------------------
    print("\nSample T20I match info keys:", t20i_match['info'].keys())
    print("Teams:", t20i_match['info']['teams'])
    print("Venue:", t20i_match['info'].get('venue', 'Unknown'))
    print("Date :", t20i_match['info'].get('dates', 'Unknown'))

    print("\nSample WC match info keys:", wc_match['info'].keys())
    print("Teams:", wc_match['info']['teams'])
    print("Venue:", wc_match['info'].get('venue', 'Unknown'))
    print("Date :", wc_match['info'].get('dates', 'Unknown'))

else:
    print("\n❌ Sanity check cannot run because one or both folders have no JSON files.")



Hello World
