In [1]:
import pandas as pd

# Load data
matches = pd.read_csv("matches.csv")
deliveries = pd.read_csv("deliveries.csv")

# Clean data (drop NaN in key columns)
matches = matches.dropna(subset=['winner', 'venue'])

# Best Batsman
best_batsman = deliveries.groupby("batter")["batsman_runs"].sum().sort_values(ascending=False).head(10).reset_index()

# Best Bowler (most wickets)
wickets = deliveries[deliveries["is_wicket"] == 1]
best_bowler = wickets.groupby("bowler").size().sort_values(ascending=False).head(10).reset_index(name="wickets")

# Win % between teams
def get_head_to_head(team1, team2):
    df = matches[((matches["team1"] == team1) & (matches["team2"] == team2)) |
                 ((matches["team1"] == team2) & (matches["team2"] == team1))]
    win1 = (df["winner"] == team1).sum()
    win2 = (df["winner"] == team2).sum()
    total = len(df)
    return {
        team1: f"{win1 / total * 100:.1f}%" if total else "N/A",
        team2: f"{win2 / total * 100:.1f}%" if total else "N/A",
        "total_matches": total
    }

# Toss decision stats
def get_toss_decision_stats(team):
    df = matches[matches["toss_winner"] == team]
    bat = (df["toss_decision"] == "bat").sum()
    field = (df["toss_decision"] == "field").sum()
    total = len(df)
    return {
        "bat": f"{bat / total * 100:.1f}%" if total else "N/A",
        "field": f"{field / total * 100:.1f}%" if total else "N/A"
    }

# Venue average runs
def get_average_runs_in_venue(venue):
    return matches[matches["venue"] == venue]["target_runs"].mean()

# Average score for a team when batting first
def team_avg_runs_batting_first(team):
    df = matches[(matches["toss_winner"] == team) & (matches["toss_decision"] == "bat")]
    return df["target_runs"].mean()

# Save precomputed best players
best_batsman.to_csv("stats_best_batsman.csv", index=False)
best_bowler.to_csv("stats_best_bowler.csv", index=False)
