In [1]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
print(os.getcwd())

d:\Projects\Mini_Projects\CricSheet_Analysis\Scripts


In [2]:
class CricketDataProcessor:
    def __init__(self, base_folder_path):
        self.base_folder_path = base_folder_path
        self.df = pd.DataFrame()
        self.info_data = {}
        self.innings_df = pd.DataFrame()

    def fetch_data(self, folder_name):
        folder_path = os.path.join(self.base_folder_path, folder_name)
        cric_data = []

        for file in os.listdir(folder_path):
            if file.endswith('.json'):
                file_path = os.path.join(folder_path, file)
                with open(file_path, 'r') as file_obj:
                    data = json.load(file_obj)

                # Extract match information
                info_details = {
                    'Date': data['info']['dates'][0] if 'dates' in data['info'] else None,
                    'Match_Number': data['info']['event'].get('match_number') if 'event' in data['info'] else None,
                    'City': data['info'].get('city', None),
                    'Venue': data['info'].get('venue', None),
                    'Season': data['info'].get('season', None),
                    'Name': data['info']['event'].get('name') if 'event' in data['info'] else None,
                    'Match_Type': data['info'].get('match_type', None),
                    'Total_Overs': data['info'].get('overs', None),
                    'Teams_Participated': ','.join(data['info'].get('teams', [])) if 'teams' in data['info'] else None,
                    'Toss_Winner': data['info']['toss'].get('winner') if 'toss' in data['info'] else None,
                    'Choose_To': data['info']['toss'].get('decision') if 'toss' in data['info'] else None,
                    'Total_Runs': data['info']['outcome'].get('by', {}).get('runs') if 'outcome' in data['info'] else None,
                    'Match_Winner': data['info']['outcome'].get('winner') if 'outcome' in data['info'] else None,
                    'Man_Of_Match': ','.join(data['info'].get('player_of_match', [])) if 'player_of_match' in data['info'] else None
                }

                # Store match info for the corresponding file
                self.info_data[file] = info_details
                cric_data.append(pd.DataFrame([info_details]))  # Convert dict to DataFrame

        if cric_data:
            self.df = pd.concat(cric_data, ignore_index=True)  # Concatenate list of DataFrames
            print(f"Match data fetched successfully from {folder_name}.")
        else:
            print("No data found.")

    def innings_fetch(self, folder_name):
        folder_path = os.path.join(self.base_folder_path, folder_name)
        innings_data = []

        for file in os.listdir(folder_path):
            if file.endswith('.json'):
                file_path = os.path.join(folder_path, file)

                with open(file_path, 'r') as file_obj:
                    data = json.load(file_obj)

                # Fetch info details for the current file
                info_details = self.info_data.get(file, {})

                for innings in data.get('innings', []):
                    team = innings.get('team', '')

                    for over in innings.get('overs', []):
                        over_number = over.get('over', 0)

                        for delivery in over.get('deliveries', []):
                            batter = delivery.get('batter', '')
                            bowler = delivery.get('bowler', '')
                            non_striker = delivery.get('non_striker', '')
                            runs = delivery.get('runs', {})
                            batter_runs = runs.get('batter', 0)
                            extras = runs.get('extras', 0)
                            total_runs = runs.get('total', 0)

                            # Add info details to each innings entry
                            if 'wickets' in delivery:
                                for wicket in delivery['wickets']:
                                    kind = wicket.get('kind', '')
                                    player_out = wicket.get('player_out', '')
                                    fielder_name = wicket.get('fielders', [''])[0] if wicket.get('fielders') else None

                                    innings_data.append({
                                        **info_details,  # Add info details to each innings entry
                                        'Team': team,
                                        'Over_Number': over_number,
                                        'Batter': batter,
                                        'Bowler': bowler,
                                        'Non_striker': non_striker,
                                        'Batter_runs': batter_runs,
                                        'Extras': extras,
                                        'Total_runs': total_runs,
                                        'Type': kind,
                                        'Player_Out': player_out,
                                        'Fielder_Name': fielder_name
                                    })
                            else:
                                innings_data.append({
                                    **info_details,  # Add info details to each innings entry
                                    'Team': team,
                                    'Over': over_number,
                                    'Batter': batter,
                                    'Bowler': bowler,
                                    'Non_striker': non_striker,
                                    'Batter_runs': batter_runs,
                                    'Extras': extras,
                                    'Total_runs': total_runs,
                                    'Type': None,
                                    'Player_Out': None,
                                    'Fielder_Name': None
                                })

        self.innings_df = pd.DataFrame(innings_data)
        output_file = f"{folder_name}_combined.csv"
        self.innings_df.to_csv(output_file, index=False)

        print(f'{output_file} has been created successfully!')
        
    
    def load_csv_to_dataframe(self, csv_file_path):
        """Load CSV to the innings_df instance."""
        if os.path.exists(csv_file_path):
            self.innings_df = pd.read_csv(csv_file_path)
            print(f"{csv_file_path} loaded successfully!")
        else:
            print(f"Error: {csv_file_path} not found.")
        
    
        

# Initialize the class with the folder path where the JSON files are located
processor = CricketDataProcessor(r"D:\Projects\Mini_Projects\CricSheet_Analysis\Raw_Json_Files")

# Fetch match data
processor.fetch_data("ipl_json")
processor.fetch_data("odis_json")
processor.fetch_data("t20s_json")
processor.fetch_data("tests_json")


# Fetch innings data and combine
processor.innings_fetch("ipl_json")
processor.innings_fetch("odis_json")
processor.innings_fetch("t20s_json")
processor.innings_fetch("tests_json")

print(processor.innings_df.head())


Match data fetched successfully from ipl_json.
Match data fetched successfully from odis_json.
Match data fetched successfully from t20s_json.
Match data fetched successfully from tests_json.
ipl_json_combined.csv has been created successfully!
odis_json_combined.csv has been created successfully!
t20s_json_combined.csv has been created successfully!
tests_json_combined.csv has been created successfully!
         Date  Match_Number   City  \
0  2016-11-03           1.0  Perth   
1  2016-11-03           1.0  Perth   
2  2016-11-03           1.0  Perth   
3  2016-11-03           1.0  Perth   
4  2016-11-03           1.0  Perth   

                                          Venue   Season  \
0  Western Australia Cricket Association Ground  2016/17   
1  Western Australia Cricket Association Ground  2016/17   
2  Western Australia Cricket Association Ground  2016/17   
3  Western Australia Cricket Association Ground  2016/17   
4  Western Australia Cricket Association Ground  2016/17   

  

In [5]:
processor.load_csv_to_dataframe(r"D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\ipl_json_combined.csv")
processor.load_csv_to_dataframe(r"D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\odis_json_combined.csv")
processor.load_csv_to_dataframe(r"D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\t20s_json_combined.csv")
processor.load_csv_to_dataframe(r"D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\tests_json_combined.csv")

D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\ipl_json_combined.csv loaded successfully!
D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\odis_json_combined.csv loaded successfully!
D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\t20s_json_combined.csv loaded successfully!
D:\Projects\Mini_Projects\CricSheet_Analysis\CSV_Combined\tests_json_combined.csv loaded successfully!
