In [38]:
import pandas as pd
import json
import os

In [39]:
def info_fetch(folder_name):
    
    folder_path = f'C:\\Users\\ADMIN\\Cricsheet\\Raw_json_Files\\{folder_name}'
    output_folder = f'C:\\Users\\ADMIN\\Cricsheet\\General_Datasets'
    
    if not os.path.exists(folder_path) or not os.listdir(folder_path):
        print(f"Warning: No files found in {folder_path}")
        return  

    os.makedirs(output_folder, exist_ok=True)

    dataframe = []
    
    for file in os.listdir(folder_path):
        if file.endswith('.json'):  
            file_path = os.path.join(folder_path, file)

            print(f"Fetching file: {file_path}")

            with open(file_path, 'r') as file:
                data = json.load(file)

            Teams = data['info'].get('teams', [])
            Team1, Team2 = (Teams + [None, None])[:2]

            info_details = pd.DataFrame([{

                'Date': data['info']['dates'][0] if 'dates' in data['info'] and data['info']['dates'] else None,
                'Match_Number': data['info']['event'].get('match_number', None) if 'event' in data['info'] else None,
                'City': data['info'].get('city', None),
                'Venue': data['info'].get('venue', None),
                'Season': data['info'].get('season', None),
                'Name': data['info']['event'].get('name', None) if 'event' in data['info'] else None,
                'Match_Type': data['info'].get('match_type', None),
                'Total_Overs': data['info']['overs'] if 'overs' in data['info'] and data['info']['overs'] else None,
                'Teams_Participated': ", ".join(data['info']['teams']) if 'teams' in data['info'] else None,
                'Team_1': Team1,
                'Team_2': Team2,
                'Team_Type': data['info'].get('team_type', None),
                'Toss_Winner': data['info']['toss'].get('winner', None) if 'toss' in data['info'] else None,
                'Choose_To': data['info']['toss'].get('decision', None) if 'toss' in data['info'] else None,
                'Match_Winner': data['info']['outcome'].get('winner', None) if 'outcome' in data['info'] else None,
                'Match_Result': data['info']['outcome'].get('result', None) if 'outcome' in data['info'] else None,
                'Win_By_Runs': data['info']['outcome']['by'].get('runs', None) if 'outcome' in data['info'] and 'by' in data['info']['outcome'] else None,
                'Win_By_Wickets': data['info']['outcome']['by'].get('wickets', None) if 'outcome' in data['info'] and 'by' in data['info']['outcome'] else None,
                'Win_By_Innings': data['info']['outcome']['by'].get('innings', None) if 'outcome' in data['info'] and 'by' in data['info']['outcome'] else None,
                'Man_Of_Match': ", ".join(data['info']['player_of_match']) if 'player_of_match' in data['info'] else None

            }])

            
            dataframe.append(info_details)

            
    final_df = pd.concat(dataframe, ignore_index=True)

    output_file_path = os.path.join(output_folder, f"{folder_name}.csv")

    final_df.to_csv(output_file_path, index=False)

    print(f"Data from {folder_name} folder is saved to {output_file_path}........")

In [40]:
import os
import json
import pandas as pd

def innings_fetch(folder_name):
    folder_path = f'C:\\Users\\ADMIN\\Cricsheet\\Raw_json_Files\\{folder_name}'
    output_folder = f'C:\\Users\\ADMIN\\Cricsheet\\Innings_Datasets'
    
    if not os.path.exists(folder_path) or not os.listdir(folder_path):
        print(f"Warning: No files found in {folder_path}")
        return  

    os.makedirs(output_folder, exist_ok=True)

    innings_data = []

    for file in os.listdir(folder_path):
        if file.endswith('.json'):  
            file_path = os.path.join(folder_path, file)

            print(f"Fetching file: {file_path}")

            with open(file_path, 'r') as file:
                data = json.load(file)

            for innings in data.get('innings', []):  

                team = innings.get('team', '')
                powerplays = innings.get('powerplays', [])
                target = innings.get('target', {})

                for over in innings.get('overs', []):  
                    over_number = over.get('over', 0)

                    for delivery in over.get('deliveries', []):

                        batter = delivery.get('batter', '')
                        bowler = delivery.get('bowler', '')
                        non_striker = delivery.get('non_striker', '')
                        runs = delivery.get('runs', {})
                        batter_runs = runs.get('batter', 0)
                        extras = runs.get('extras', 0)
                        total_runs = runs.get('total', 0)

                        powerplay_type = None
                        
                        for pp in powerplays:

                            if pp['from'] <= over_number + (delivery.get('ball', 0) / 6) <= pp['to']:
                                powerplay_type = pp['type']
                                break
                            
                        target_runs = target.get('runs', None)
                        target_overs = target.get('overs', None)

                        if 'wickets' in delivery:

                            for wicket in delivery['wickets']:

                                kind = wicket.get('kind', '')
                                player_out = wicket.get('player_out', '')
                                
                                if 'fielders' in wicket:

                                    for fielder in wicket['fielders']:

                                        fielder_name = fielder.get('name', '')

                                        innings_data.append({
                                            'Team': team,
                                            'Over': over_number,
                                            'Batter': batter,
                                            'Bowler': bowler,
                                            'Non_striker': non_striker,
                                            'Batter_runs': batter_runs,
                                            'Extras': extras,   
                                            'Total_runs': total_runs,
                                            'Type': kind,
                                            'Player_Out': player_out,
                                            'Fielder_Name': fielder_name,
                                            'Powerplay_Type': powerplay_type,
                                            'Target_Runs': target_runs,
                                            'Target_Overs': target_overs
                                        })
                        else:
                            innings_data.append({
                                'Team': team,
                                'Over': over_number,
                                'Batter': batter,
                                'Bowler': bowler,
                                'Non_striker': non_striker,
                                'Batter_runs': batter_runs,
                                'Extras': extras,
                                'Total_runs': total_runs,
                                'Type': None,
                                'Player_Out': None,
                                'Fielder_Name': None,
                                'Powerplay_Type': powerplay_type,
                                'Target_Runs': target_runs,
                                'Target_Overs': target_overs
                            })

    innings_df = pd.DataFrame(innings_data)

    output_file_path = os.path.join(output_folder, f"{folder_name}_INNINGS.csv")

    innings_df.to_csv(output_file_path, index=False)

    print(f"Data from {folder_name} folder is saved to {output_file_path}........")

In [41]:
info_fetch('IPL_DATA')
info_fetch('ODI_DATA')
info_fetch('T20_DATA')
info_fetch('TEST_DATA')

Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082591.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082592.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082593.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082594.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082595.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082596.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082597.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082598.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082599.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082600.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082601.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082602.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082603.json

In [42]:
innings_fetch('IPL_DATA')
innings_fetch('ODI_DATA')
innings_fetch('T20_DATA')
innings_fetch('TEST_DATA')

Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082591.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082592.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082593.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082594.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082595.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082596.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082597.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082598.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082599.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082600.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082601.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082602.json
Fetching file: C:\Users\ADMIN\Cricsheet\Raw_json_Files\IPL_DATA\1082603.json