In [None]:
# ðŸ§  FOOTBASE DATABASE SANITY CHECK
# --------------------------------
# This cell connects to your SQLite DB, validates the structure,
# previews sample data, runs a few quick analytical queries,
# and safely closes the connection at the end.

import sqlite3
import pandas as pd

# === CONFIG ===
DB_PATH = "../db/footbase_big5.db"

# === 1. CONNECT ===
print("ðŸ”Œ Connecting to database...")
conn = sqlite3.connect(DB_PATH)
print("âœ… Connected!")

# === 2. CHECK TABLES ===
print("\nðŸ“‹ Tables available:")
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn)
print(tables)

# === 3. CHECK SCHEMA ===
print("\nðŸ§± Table schema for 'matches':")
schema = pd.read_sql_query("PRAGMA table_info(matches);", conn)
print(schema)

# === 4. COUNT & SAMPLE ===
count_df = pd.read_sql_query("SELECT COUNT(*) AS total_rows FROM matches;", conn)
print(f"\nðŸ§® Total rows: {count_df.iloc[0]['total_rows']:,}")

sample = pd.read_sql_query("SELECT * FROM matches LIMIT 5;", conn)
print("\nðŸ”Ž Sample rows:")
display(sample)

# === 5. QUICK ANALYTICS ===

# Top 10 scoring teams by season
print("\nðŸ¥‡ Top 10 teams by total home goals:")
query_goals = """
SELECT league, season, home_team AS team,
       SUM(home_goals) AS goals_scored
FROM matches
GROUP BY league, season, home_team
ORDER BY goals_scored DESC
LIMIT 10;
"""
display(pd.read_sql_query(query_goals, conn))

# Average total shots per match per league
print("\nâš½ Average total shots per match by league:")
query_shots = """
SELECT league,
       ROUND(AVG(home_shots + away_shots), 1) AS avg_total_shots
FROM matches
GROUP BY league
ORDER BY avg_total_shots DESC;
"""
display(pd.read_sql_query(query_shots, conn))

# === 6. CLEANUP ===
conn.close()
print("\nðŸ”’ Connection closed. All good!")
