## SQLITE DATABASE

## Define Paths & Match Formats

In [1]:
import os, json
import pandas as pd
import sqlite3
from pathlib import Path
from sqlalchemy import create_engine

engine = create_engine("sqlite:///cricket_matches.db")  


match_paths = {
    "Test": "data/tests_json/",
    "ODI": "data/odis_json/",
    "T20": "data/it20s_json/"
}


# # Base data folder
# base_path = Path("data")

# # Match formats and their folders
# formats = {
#     "Test": base_path / "tests_json",
#     "ODI": base_path / "odis_json",
#     "T20I": base_path / "it20s_json"
# }

# # SQLite DB
# db_path = "cricsheet.db"
# conn = sqlite3.connect(db_path)
# cursor = conn.cursor()

## Parse JSON into DataFrames

In [2]:
import os, json
import pandas as pd

def parse_match_json(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    info = data.get("info", {})
    return {
        "match_id": os.path.basename(file_path).split('.')[0],
        "date": info.get("dates", [None])[0],
        "venue": info.get("venue"),
        "city": info.get("city"),
        "teams": ", ".join(info.get("teams", [])),
        "winner": info.get("outcome", {}).get("winner"),
        "match_type": info.get("match_type"),
        "player_of_match": ", ".join(info.get("player_of_match", [])),
        "overs": info.get("overs"),
        "gender": info.get("gender")
    }

## Create DataFrames by Match Type

In [3]:
def build_match_df(match_type, path):
    records = []
    for file in os.listdir(path):
        if file.endswith(".json"):
            full_path = os.path.join(path, file)
            record = parse_match_json(full_path)
            records.append(record)
    return pd.DataFrame(records)

test_df = build_match_df("Test", match_paths["Test"])
odi_df = build_match_df("ODI", match_paths["ODI"])
t20_df = build_match_df("T20", match_paths["T20"])

## Insert Data Frame in SQL

In [4]:
test_df.to_sql("test_matches", con=engine, if_exists="replace", index=False)
odi_df.to_sql("odi_matches", con=engine, if_exists="replace", index=False)
t20_df.to_sql("t20_matches", con=engine, if_exists="replace", index=False)

320