In [11]:
import os
import pandas as pd
import json
from collections import defaultdict
from datetime import datetime

In [12]:
json_folder = "F:/Machine and Deep Learning/IPL/cricsheet/ipl_json"
all_processed_data = []

In [13]:
all_processed_data = []

for json_file in os.listdir(json_folder):
    if json_file.endswith(".json"):
        file_path = os.path.join(json_folder, json_file)

        with open(file_path, "r") as f:
            data = json.load(f)

        # Parse and filter by year
        raw_date = data["info"].get("dates", [])
        if not raw_date:
            continue

        match_date_str = raw_date[0]
        try:
            match_date = datetime.strptime(match_date_str, "%Y-%m-%d")
        except:
            continue

        if match_date.year not in [2023, 2024]:
            continue  # Skip if not in 2023 or 2024

        venue = data["info"].get("venue", "Unknown Venue")
        match_number = data["info"].get("event", {}).get("match_number", "Unknown Match")

        innings_list = data.get("innings", [])
        team1 = innings_list[0]["team"] if len(innings_list) > 0 else None
        team2 = innings_list[1]["team"] if len(innings_list) > 1 else None

        batsman_cum = defaultdict(int)
        bowler_cum = defaultdict(int)
        team_cum = defaultdict(int)

        for inning_idx, inning in enumerate(innings_list):
            batting_team = inning["team"]
            bowling_team = team2 if batting_team == team1 else team1
            innings_no = inning_idx + 1
            cumulative_runs = 0

            for over in inning["overs"]:
                over_number = over["over"]
                for delivery_number, delivery in enumerate(over["deliveries"]):
                    striker = delivery["batter"]
                    non_striker = delivery["non_striker"]
                    bowler = delivery["bowler"]
                    runs_batter = delivery["runs"]["batter"]
                    total_runs = delivery["runs"]["total"]
                    extras_info = delivery.get("extras", {})
                    extras_sum = delivery["runs"].get("extras", 0)

                    wides = extras_info.get("wides", 0)
                    noballs = extras_info.get("noballs", 0)
                    legbyes = extras_info.get("legbyes", 0)

                    run_conceded_by_bowler = runs_batter + wides + noballs
                    batsman_cum[striker] += runs_batter
                    bowler_cum[bowler] += run_conceded_by_bowler
                    cumulative_runs += total_runs
                    team_cum[batting_team] = cumulative_runs

                    over_ball = f"{over_number}.{delivery_number + 1}"

                    all_processed_data.append({
                        "Venue": venue,
                        "Date": match_date.strftime("%Y-%m-%d"),
                        "Match No.": match_number,
                        "Innings No.": innings_no,
                        "Batting Team": batting_team,
                        "Bowling Team": bowling_team,
                        "Over": over_ball,
                        "Striker": striker,
                        "Non Striker": non_striker,
                        "Bowler": bowler,
                        "Runs by Batsman": runs_batter,
                        "Extras": extras_info,
                        "Total Runs (Ball)": total_runs,
                        "Runs Conceded by Bowler": run_conceded_by_bowler,
                        "Cumulative Runs by Batsman": batsman_cum[striker],
                        "Cumulative Runs Conceded by Bowler": bowler_cum[bowler],
                        "Cumulative Team Runs": team_cum[batting_team]
                    })

In [14]:
df = pd.DataFrame(all_processed_data)

In [15]:
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df["Match No."] = pd.to_numeric(df["Match No."], errors="coerce")
df["Over"] = df["Over"].astype(float)
df = df.sort_values(by=["Date", "Match No.", "Innings No.","Over"])
df.reset_index(drop=True, inplace=True)

In [16]:
output_csv = "match_data_23-24.csv"
df.to_csv(output_csv, index=False)

print(f"CSV file created successfully: {output_csv}")

CSV file created successfully: match_data_23-24.csv


In [17]:
all_processed_data = []

for json_file in os.listdir(json_folder):
    if json_file.endswith(".json"):
        file_path = os.path.join(json_folder, json_file)

        with open(file_path, "r") as f:
            data = json.load(f)

        venue = data["info"].get("venue", "Unknown Venue")
        date = data["info"]["dates"][0] if data["info"].get("dates") else "Unknown Date"
        match_number = data["info"].get("event", {}).get("match_number", "Unknown Match")

        innings_list = data.get("innings", [])
        team1 = innings_list[0]["team"] if len(innings_list) > 0 else None
        team2 = innings_list[1]["team"] if len(innings_list) > 1 else None

        batsman_cum = defaultdict(int)
        bowler_cum = defaultdict(int)
        team_cum = defaultdict(int)

        for inning_idx, inning in enumerate(innings_list):
            batting_team = inning["team"]
            bowling_team = team2 if batting_team == team1 else team1
            innings_no = inning_idx + 1
            cumulative_runs = 0

            for over in inning["overs"]:
                over_number = over["over"]
                for delivery_number, delivery in enumerate(over["deliveries"]):
                    striker = delivery["batter"]
                    non_striker = delivery["non_striker"]
                    bowler = delivery["bowler"]
                    runs_batter = delivery["runs"]["batter"]
                    total_runs = delivery["runs"]["total"]
                    extras_info = delivery.get("extras", {})
                    extras_sum = delivery["runs"].get("extras", 0)

                    wides = extras_info.get("wides", 0)
                    noballs = extras_info.get("noballs", 0)
                    legbyes = extras_info.get("legbyes", 0)

                    run_conceded_by_bowler = runs_batter + wides + noballs
                    batsman_cum[striker] += runs_batter
                    bowler_cum[bowler] += run_conceded_by_bowler
                    cumulative_runs += total_runs
                    team_cum[batting_team] = cumulative_runs

                    over_ball = f"{over_number}.{delivery_number + 1}"

                    all_processed_data.append({
                        "Venue": venue,
                        "Date": date,
                        "Match No.": match_number,
                        "Innings No.": innings_no,
                        "Batting Team": batting_team,
                        "Bowling Team": bowling_team,
                        "Over": over_ball,
                        "Striker": striker,
                        "Non Striker": non_striker,
                        "Bowler": bowler,
                        "Runs by Batsman": runs_batter,
                        "Extras": extras_info,
                        "Total Runs (Ball)": total_runs,
                        "Runs Conceded by Bowler": run_conceded_by_bowler,
                        "Cumulative Runs by Batsman": batsman_cum[striker],
                        "Cumulative Runs Conceded by Bowler": bowler_cum[bowler],
                        "Cumulative Team Runs": team_cum[batting_team]
                    })

In [18]:
df = pd.DataFrame(all_processed_data)

In [19]:
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df["Match No."] = pd.to_numeric(df["Match No."], errors="coerce")
df["Over"] = df["Over"].astype(float)
df = df.sort_values(by=["Date", "Match No.", "Innings No.","Over"])
df.reset_index(drop=True, inplace=True)

In [20]:
output_csv = "match_data_08-24.csv"
df.to_csv(output_csv, index=False)

print(f"CSV file created successfully: {output_csv}")

CSV file created successfully: match_data_08-24.csv
