In [5]:
import os
import pandas as pd
import json

In [None]:
json_folder = "ipl_json"
all_processed_data = []

In [None]:
for json_file in os.listdir(json_folder):
    if json_file.endswith(".json"):  # Process only JSON files
        file_path = os.path.join(json_folder, json_file)

        # Load JSON data
        with open(file_path, "r") as f:
            data = json.load(f)

        # Extract venue, date, and match number safely
        venue = data["info"].get("venue", "Unknown Venue")
        date = data["info"]["dates"][0] if "dates" in data["info"] and data["info"]["dates"] else "Unknown Date"
        match_number = data["info"]["event"].get("match_number", "Unknown Match") if "event" in data["info"] else "Unknown Match"

        # Loop through innings and deliveries to extract information
        for inning in data['innings']:
            team = inning['team']
            cumulative_runs = 0  # Track cumulative runs for each innings
            for over in inning['overs']:
                over_number = over['over']
                for delivery_number, delivery in enumerate(over['deliveries']):
                    striker = delivery['batter']
                    non_striker = delivery['non_striker']
                    bowler = delivery['bowler']
                    runs_batter = delivery['runs']['batter']
                    extras = delivery['runs'].get('extras', 0)
                    total_runs = delivery['runs']['total']

                    # Handling extras if it's a dictionary or an integer
                    if isinstance(extras, dict):
                        extras_info = {
                            'wides': extras.get('wides', 0),
                            'noballs': extras.get('noballs', 0),
                            'legbyes': extras.get('legbyes', 0)
                        }
                    else:
                        extras_info = {
                            'wides': 0,
                            'noballs': 0,
                            'legbyes': extras
                        }

                    # Calculate cumulative runs
                    cumulative_runs += total_runs

                    # Format over as 'over.ball'
                    over_ball = f"{over_number}.{delivery_number+1}"

                    # Append row data to the list
                    all_processed_data.append({
                        'Venue': venue,
                        'Date': date,
                        'Match No.': match_number,
                        'Innings': team,
                        'Over': over_ball,
                        'Striker': striker,
                        'Non Striker': non_striker,
                        'Bowler': bowler,
                        'Batter Runs': runs_batter,
                        'Extras': str(extras_info),
                        'Cumulative Runs': cumulative_runs,
                        'Runs by Batsman': runs_batter
                    })


In [None]:
df = pd.DataFrame(all_processed_data)

In [None]:
output_csv = "match_data_full.csv"
df.to_csv(output_csv, index=False)

print(f"CSV file created successfully: {output_csv}")

CSV file created successfully: F:/Machine and Deep Learning/Models/Match Data/cricsheet/match_data.csv
