In [22]:
import os
import json
import csv

def process_json(json_file, matches_rows, info_rows, ball_rows):
    with open(json_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    info = data.get("info", {})
    match_id = os.path.splitext(os.path.basename(json_file))[0]

    # ---------- Matches.csv ----------
    matches_rows.append({
        "match_id": match_id,
        "team_type": info.get("team_type"),
        "match_type": info.get("match_type"),
        "city": info.get("city"),
        "venue": info.get("venue"),
        "winner": info.get("outcome", {}).get("winner"),
        "season": info.get("season"),
        "teams": ",".join(info.get("teams", [])),
        "dates": ",".join(info.get("dates", [])),
    })

    # ---------- Info_summary.csv ----------
    info_rows.append({
        "match_id": match_id,
        "balls_per_over": info.get("balls_per_over"),
        "gender": info.get("gender"),
        "event_name": info.get("event", {}).get("name"),
        "match_number": info.get("event", {}).get("match_number"),
        "match_type_number": info.get("match_type_number"),
        "toss_winner": info.get("toss", {}).get("winner"),
        "toss_decision": info.get("toss", {}).get("decision"),
    })

    # ---------- Ballbyball.csv ----------
    for inning_idx, inning in enumerate(data.get("innings", []), start=1):
        team = inning.get("team")
        for over in inning.get("overs", []):
            over_num = over.get("over")
            for ball_idx, delivery in enumerate(over.get("deliveries", []), start=1):
                # Extract wicket details
                wicket_type = ""
                wicket_player_out = ""
                fielders = []
                if "wickets" in delivery:
                    for w in delivery["wickets"]:
                        if "kind" in w:
                            wicket_type = w.get("kind", "")
                        if "player_out" in w:
                            wicket_player_out = w.get("player_out", "")
                        if "fielders" in w:
                            fielders.extend([f.get("name", "") for f in w.get("fielders", [])])

                # Extract extras type (wide, no-ball, bye, etc.)
                extras_type = ""
                if "extras" in delivery:
                    extras_type = ";".join(delivery["extras"].keys())

                row = {
                    "match_id": match_id,
                    "inning": inning_idx,
                    "team": team,
                    "date": info.get("dates", [None])[0],
                    "over": over_num,
                    "ball": ball_idx,
                    "batter": delivery.get("batter"),
                    "bowler": delivery.get("bowler"),
                    "non_striker": delivery.get("non_striker"),
                    "runs_batter": delivery.get("runs", {}).get("batter", 0),
                    "runs_extras": delivery.get("runs", {}).get("extras", 0),
                    "runs_total": delivery.get("runs", {}).get("total", 0),
                    "extras_type": extras_type,
                    "wicket_type": wicket_type,
                    "wicket_player_out": wicket_player_out,
                    "fielders": ";".join([f for f in fielders if f])
                }
                ball_rows.append(row)

def save_csv(rows, filepath, fieldnames):
    if not rows:
        return
    with open(filepath, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

def process_folder(folder):
    matches_rows, info_rows, ball_rows = [], [], []

    for file in os.listdir(folder):
        if file.endswith(".json"):
            process_json(os.path.join(folder, file), matches_rows, info_rows, ball_rows)

    analysis_path = os.path.join("cricket/analysis", os.path.basename(folder))
    os.makedirs(analysis_path, exist_ok=True)

    save_csv(matches_rows, os.path.join(analysis_path, "matches.csv"),
             ["match_id","team_type","match_type","city","venue","winner","season","teams","dates"])

    save_csv(info_rows, os.path.join(analysis_path, "info_summary.csv"),
             ["match_id","balls_per_over","gender","event_name","match_number","match_type_number","toss_winner","toss_decision"])

    save_csv(ball_rows, os.path.join(analysis_path, "ballbyball.csv"),
             ["match_id","inning","team","date","over","ball","batter","bowler","non_striker",
              "runs_batter","runs_extras","runs_total","extras_type","wicket_type","wicket_player_out","fielders"])

# ---------- Run for all 6 folders ----------
subfolders = ["all_json","ipl_json","mdms_json","odis_json","t20s_json","tests_json"]
for sub in subfolders:
    process_folder(os.path.join("cricket", sub))
    print(f"✅ Processed {sub}")


✅ Processed all_json
✅ Processed ipl_json
✅ Processed mdms_json
✅ Processed odis_json
✅ Processed t20s_json
✅ Processed tests_json


In [23]:
import os
import csv
import pandas as pd

base_folder = "cricket/analysis"
subfolders = ["all_json","ipl_json","mdms_json","odis_json","t20s_json","tests_json"]

def process_players(sub):
    print(f"🔄 Processing players for {sub} ...")

    analysis_path = os.path.join(base_folder, sub)
    ballbyball_path = os.path.join(analysis_path, "ballbyball.csv")
    if not os.path.exists(ballbyball_path):
        print(f"⚠️ Skipping {sub}, no ballbyball.csv")
        return

    df = pd.read_csv(ballbyball_path)

    # Collect all unique players from batter, non_striker, bowler, fielders
    players = set(df["batter"].dropna()) \
              | set(df["non_striker"].dropna()) \
              | set(df["bowler"].dropna())

    # Fielders column is semicolon-separated
    if "fielders" in df.columns:
        for f_list in df["fielders"].dropna():
            for f in str(f_list).split(";"):
                if f.strip():
                    players.add(f.strip())

    # Create player folders
    player_base = os.path.join(analysis_path, "player")
    os.makedirs(player_base, exist_ok=True)

    for player in players:
        safe_name = player.replace("/", "_").replace("\\", "_").replace(" ", "_")
        player_folder = os.path.join(player_base, safe_name)
        os.makedirs(player_folder, exist_ok=True)

        # 1. Batter
        batter_df = df[df["batter"] == player]
        if not batter_df.empty:
            batter_df.to_csv(os.path.join(player_folder, "batter.csv"), index=False)

        # 2. Non-striker
        non_striker_df = df[df["non_striker"] == player]
        if not non_striker_df.empty:
            non_striker_df.to_csv(os.path.join(player_folder, "non_striker.csv"), index=False)

        # 3. Bowler
        bowler_df = df[df["bowler"] == player]
        if not bowler_df.empty:
            bowler_df.to_csv(os.path.join(player_folder, "bowler.csv"), index=False)

        # 4. Fielder
        if "fielders" in df.columns:
            fielder_df = df[df["fielders"].fillna("").str.contains(player)]
            if not fielder_df.empty:
                fielder_df.to_csv(os.path.join(player_folder, "fielder.csv"), index=False)

        # 5. Batter or Non-striker
        bat_or_non_df = df[(df["batter"] == player) | (df["non_striker"] == player)]
        if not bat_or_non_df.empty:
            bat_or_non_df.to_csv(os.path.join(player_folder, "batter_or_non_striker.csv"), index=False)

    print(f"✅ Completed {sub}, created {len(players)} player folders")

if __name__ == "__main__":
    for sub in subfolders:
        process_players(sub)
    print("🎉 Player CSVs created for all folders")


🔄 Processing players for all_json ...


  fielder_df = df[df["fielders"].fillna("").str.contains(player)]


✅ Completed all_json, created 8238 player folders
🔄 Processing players for ipl_json ...


  fielder_df = df[df["fielders"].fillna("").str.contains(player)]


✅ Completed ipl_json, created 778 player folders
🔄 Processing players for mdms_json ...


  fielder_df = df[df["fielders"].fillna("").str.contains(player)]


✅ Completed mdms_json, created 1997 player folders
🔄 Processing players for odis_json ...


  fielder_df = df[df["fielders"].fillna("").str.contains(player)]


✅ Completed odis_json, created 2568 player folders
🔄 Processing players for t20s_json ...


  fielder_df = df[df["fielders"].fillna("").str.contains(player)]


✅ Completed t20s_json, created 6501 player folders
🔄 Processing players for tests_json ...


  fielder_df = df[df["fielders"].fillna("").str.contains(player)]


✅ Completed tests_json, created 1210 player folders
🎉 Player CSVs created for all folders


In [1]:
import os
import pandas as pd

base_folder = "cricket/analysis"
subfolders = ["all_json","ipl_json","mdms_json","odis_json","t20s_json","tests_json"]

def process_teams(sub):
    print(f"🔄 Processing teams for {sub} ...")

    folder = os.path.join(base_folder, sub)
    team_folder = os.path.join(folder, "team")
    os.makedirs(team_folder, exist_ok=True)

    # Load ballbyball and matches
    ball_path = os.path.join(folder, "ballbyball.csv")
    match_path = os.path.join(folder, "matches.csv")
    if not os.path.exists(ball_path) or not os.path.exists(match_path):
        print(f"⚠️ Missing ballbyball or matches in {sub}, skipping.")
        return

    ball_df = pd.read_csv(ball_path)
    matches_df = pd.read_csv(match_path)

    # Build match_id → [team1, team2] mapping
    match_teams = {}
    for _, row in matches_df.iterrows():
        if "match_id" in row and "teams" in row:
            tlist = str(row["teams"]).split(",")
            match_teams[row["match_id"]] = [t.strip() for t in tlist]

    # Collect all unique teams
    unique_teams = set()
    if "team" in ball_df.columns:
        unique_teams.update(ball_df["team"].dropna().unique())
    for tlist in matches_df["teams"].dropna():
        for t in str(tlist).split(","):
            unique_teams.add(t.strip())

    # Process each team
    for team in unique_teams:
        team_dir = os.path.join(team_folder, team.replace(" ", "_"))
        os.makedirs(team_dir, exist_ok=True)

        # Batting
        batting_df = ball_df[ball_df["team"] == team]
        if not batting_df.empty:
            batting_df.to_csv(os.path.join(team_dir, "batting.csv"), index=False)

        # Bowling + Fielding
        bowling_rows = []
        fielding_rows = []

        for _, row in ball_df.iterrows():
            mid = row["match_id"]
            batting_team = row["team"]
            if mid not in match_teams:
                continue
            teams = match_teams[mid]
            if len(teams) < 2:
                continue
            bowling_team = teams[0] if teams[1] == batting_team else teams[1]

            if bowling_team == team:
                bowling_rows.append(row)
                if pd.notna(row.get("wicket_type")) and row["wicket_type"] != "":
                    fielding_rows.append(row)

        if bowling_rows:
            pd.DataFrame(bowling_rows).to_csv(os.path.join(team_dir, "bowling.csv"), index=False)
        if fielding_rows:
            pd.DataFrame(fielding_rows).to_csv(os.path.join(team_dir, "fielding.csv"), index=False)

    print(f"✅ Completed {sub}, {len(unique_teams)} teams processed")

if __name__ == "__main__":
    for sub in subfolders:
        process_teams(sub)
    print("🎉 Team CSVs created for all folders")


🔄 Processing teams for all_json ...
✅ Completed all_json, 109 teams processed
🔄 Processing teams for ipl_json ...
✅ Completed ipl_json, 19 teams processed
🔄 Processing teams for mdms_json ...
✅ Completed mdms_json, 58 teams processed
🔄 Processing teams for odis_json ...
✅ Completed odis_json, 28 teams processed
🔄 Processing teams for t20s_json ...
✅ Completed t20s_json, 106 teams processed
🔄 Processing teams for tests_json ...
✅ Completed tests_json, 12 teams processed
🎉 Team CSVs created for all folders


In [None]:
while ipl is selected show following
-a pie chart of count of 0 1 2 3 4 6 from runs_batter column in ballbyball.csv in ipl_json folder in analysis folder followed by table for the same
-a pie chart of count of every wicket type from wicket_type column in ballbyball.csv in ipl_json folder in analysis folder followed by table for the same
- a pie chart count of extras type from extras_type column in ballbyball.csv in ipl_json folder in analysis folder followed by table for the same                                                                                                                                                  
-table for total runs sum of runs_total column , total wicket sum of count of wicket_type column,total extras sum of runs_extras column , total balls bowled total number of rows in ballbyball.csv in ipl_json folder in analysis folder
-bargraph for top 10 most run getter you can calc this from runs_batter column in batter.csv in player names in player folder in ipl_json folder in analysis folder
-bargraph of top 10 wicket taker you can cal this from wicket_type column in bowler.csv in player names in player folder in ipl_json folder in analysis folder
-bargrapg of top 10 most catch/runout taken then also make table for them for total number of catches total number of runouts you can cal this from fielder column in fielder.csv in player names in player folder in ipl_json folder in analysis folder
-bargraph and table fro count of toss won by which team from toss_winner column info_summary.csv in ipl_json folder
--bargraph and table fro count of toss won setted batting or bowling by which team from toss_winner and toss_decision column info_summary.csv in ipl_json folder

In [None]:
now i wnat to create a dashboard for analysis folder 
which contains following drop downs
- 'matches' which contails folder inside analysis folder ,all_json as all internatinal matches ,ipl_json as ipl matches,mdms_json as multiday matches, odis_json as odi matches ,t20s_json as t20 international matches, tests_json as test matches
- 'team' containg all the unique teams from the seleted folder in matches dropdown
- 'player' containg all the players played for selected team from the team dropdown

while ipl is selected show following
-a pie chart of count of 0 1 2 3 4 6 from runs_batter column in ballbyball.csv in all_json folder in analysis folder followed by table for the same
-a pie chart of count of every wicket type from wicket_type column in ballbyball.csv in all_json folder in analysis folder followed by table for the same
- a pie chart count of extras type from extras_type column in ballbyball.csv in all_json folder in analysis folder followed by table for the same                                                                                                                                                  
-table for total runs sum of runs_total column , total wicket sum of count of wicket_type column,total extras sum of runs_extras column , total balls bowled total number of rows in ballbyball.csv in all_json folder in analysis folder
-bargraph for top 10 most run getter you can calc this from runs_batter column in batter.csv in player names in player folder in all_json folder in analysis folder
-bargraph of top 10 wicket taker you can cal this from wicket_type column in bowler.csv in player names in player folder in all_json folder in analysis folder
-bargrapg of top 10 most catch/runout taken then also make table for them for total number of catches total number of runouts you can cal this from fielder column in fielder.csv in player names in player folder in all_json folder in analysis folder