In [145]:
import json
import pandas as pd
import os

# Class to represent a single cricket match
class CricketMatch:
    def __init__(self,  season, city, venue, date, match_type, match_type_number,teams, toss_winner, toss_decision, outcome_by_runs, outcome_by_wickets, winner, player_of_match, target, innings):
        self.season = season
        self.city = city
        self.venue = venue
        self.date = date
        self.match_type = match_type
        self.match_type_number = match_type_number
        self.teams = teams
        self.toss_winner = toss_winner
        self.toss_decision = toss_decision
        self.outcome_by_runs = outcome_by_runs
        self.outcome_by_wickets = outcome_by_wickets
        self.winner = winner
        self.player_of_match = player_of_match
        self.target = target
        self.innings = innings  

    def to_dict(self):
        
        return {
            'season': self.season,
            'city': self.city,
            'venue': self.venue,
            'date': self.date,
            'match_type': self.match_type,
            'match_type_number': self.match_type_number,
            'teams': ', '.join(self.teams),
            'toss_winner': self.toss_winner,
            'toss_decision': self.toss_decision,
            'outcome_by_runs': self.outcome_by_runs,
            'outcome_by_wickets': self.outcome_by_wickets,
            'winner' : self.winner,
            'player_of_match': ', '.join(self.player_of_match),
            'target': self.target
        }

    def innings_to_dict(self):
        # Convert the innings data into a list of dictionaries for each delivery
        innings_data = []
        for inning in self.innings:
            team = inning['team']
            for over in inning['overs']:
                for delivery in over['deliveries']:
                    innings_data.append({
                        'season': self.season,
                        'match_type_number': self.match_type_number,
                        'team': team,
                        'over': over['over'],
                        'batter': delivery['batter'],
                        'bowler': delivery['bowler'],
                        'runs_batter': delivery['runs'].get('batter', 0),
                        'runs_extras': delivery['runs'].get('extras', 0),
                        'runs_total': delivery['runs'].get('total', 0),
                        'non_striker': delivery['non_striker'],
                    })
        return innings_data




In [146]:
class CricketDataLoader:
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.matches = []

    def load_data(self):
        # Check if the folder exists
        if not os.path.exists(self.folder_path):
            print(f"Error: The folder {self.folder_path} does not exist.")
            return
        
        # Get all JSON files from the folder
        json_files = [f for f in os.listdir(self.folder_path) if f.endswith('.json')]
        
        # If no JSON files are found in the folder
        if not json_files:
            print(f"Error: No JSON files found in the folder {self.folder_path}.")
            return
        
        # Iterate over each JSON file in the folder
        for json_file in json_files:
            try:
                file_path = os.path.join(self.folder_path, json_file)
                with open(file_path, 'r') as file:
                    data = json.load(file)
                    
                    season = data['info'].get('season', 'N/A')  
                    city = data['info'].get('city', 'N/A')
                    venue = data['info'].get('venue', 'N/A')
                    date = data['info']['dates'][0] if 'dates' in data['info'] else 'N/A'
                    match_type = data['info'].get('match_type', 'N/A')
                    match_type_number = data['info'].get('match_type_number','N/A')
                    teams = data['info'].get('teams', [])
                    toss_winner = data['info']['toss'].get('winner', 'N/A') if 'toss' in data['info'] else 'N/A'
                    toss_decision = data['info']['toss'].get('decision', 'N/A') if 'toss' in data['info'] else 'N/A'
                    #outcome = f"Winner: {data['info'].get('outcome', {}).get('winner', 'N/A')}, By {data['info'].get('outcome', {}).get('by', {}).get('runs', 0)} runs"
                    outcome_by_runs = data['info']['outcome']['by'].get('runs', 'N/A') if 'by' in data['info']['outcome'] else 'N/A'
                    outcome_by_wickets = data['info']['outcome']['by'].get('wickets', 'N/A') if 'by' in data['info']['outcome'] else 'N/A'
                    winner = data['info']['outcome'].get('winner','N/A')
                    player_of_match = data['info'].get('player_of_match', [])
                    target= data['innings'][1].get('target', {}).get('runs', 'N/A')       
                    innings = data.get('innings', [])
                    
                    match = CricketMatch(season, city, venue, date, match_type, match_type_number, teams, toss_winner, toss_decision, outcome_by_runs, outcome_by_wickets, winner, player_of_match, target, innings)
                    self.matches.append(match)
                    
            except PermissionError:
                print(f"Error: Permission denied while trying to open the file {json_file}. Please check your file permissions.")
            except Exception as e:
                print(f"An unexpected error occurred with file {json_file}: {e}")

    def to_dataframe(self):
        # Convert list of CricketMatch objects to a pandas DataFrame for match data
        match_data = [match.to_dict() for match in self.matches]
        match_df = pd.DataFrame(match_data)
        
        # Convert innings data to a DataFrame
        innings_data = []
        for match in self.matches:
            innings_data.extend(match.innings_to_dict())
        innings_df = pd.DataFrame(innings_data)
        
        return match_df, innings_df

In [147]:
folder_path = 'C:/Users/Sajiv/Documents/Cricket/t20'  # Replace with the path to your folder containing JSON files

# Print the current working directory to help with debugging
print(f"Current working directory: {os.getcwd()}")

loader = CricketDataLoader(folder_path)
loader.load_data()

# Check if data was loaded successfully before creating the DataFrames
if loader.matches:
    match_df, innings_df = loader.to_dataframe()

    # Displaying the DataFrames
    print("\nMatch DataFrame:")
    print(match_df)

    print("\nInnings DataFrame:")
    print(innings_df)
else:
    print("No data loaded. Please check the error messages above.")

Current working directory: c:\Users\Sajiv\Documents\Cricket\Notebook
An unexpected error occurred with file 1115799.json: list index out of range
An unexpected error occurred with file 1123209.json: list index out of range
An unexpected error occurred with file 1141835.json: list index out of range
An unexpected error occurred with file 1144991.json: list index out of range
An unexpected error occurred with file 1157710.json: list index out of range
An unexpected error occurred with file 1173066.json: list index out of range
An unexpected error occurred with file 1185187.json: list index out of range
An unexpected error occurred with file 1186492.json: list index out of range
An unexpected error occurred with file 1198244.json: list index out of range
An unexpected error occurred with file 1233956.json: list index out of range
An unexpected error occurred with file 1249240.json: list index out of range
An unexpected error occurred with file 1263164.json: list index out of range
An unex

In [148]:
match_df

Unnamed: 0,season,city,venue,date,match_type,match_type_number,teams,toss_winner,toss_decision,outcome_by_runs,outcome_by_wickets,winner,player_of_match,target
0,2016/17,,Melbourne Cricket Ground,2017-02-17,T20,596,"Australia, Sri Lanka",Sri Lanka,field,,5,Sri Lanka,DAS Gunaratne,169
1,2016/17,Victoria,"Simonds Stadium, South Geelong",2017-02-19,T20,597,"Australia, Sri Lanka",Sri Lanka,field,,2,Sri Lanka,DAS Gunaratne,174
2,2016/17,,Adelaide Oval,2017-02-22,T20,598,"Australia, Sri Lanka",Sri Lanka,field,41,,Australia,A Zampa,188
3,2016,Londonderry,"Bready Cricket Club, Magheramason",2016-09-05,T20,564,"Ireland, Hong Kong",Hong Kong,bat,40,,Hong Kong,,170
4,2016,,Harare Sports Club,2016-06-18,T20,558,"Zimbabwe, India",India,field,2,,Zimbabwe,E Chigumbura,171
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3935,2015/16,Mirpur,Shere Bangla National Stadium,2016-03-04,T20,518,"Pakistan, Sri Lanka",Pakistan,field,,6,Pakistan,Umar Akmal,151
3936,2015/16,Mirpur,Shere Bangla National Stadium,2016-03-06,T20,521,"Bangladesh, India",India,field,,8,India,S Dhawan,121
3937,2015/16,Dubai,ICC Academy,2016-02-03,T20,490,"United Arab Emirates, Netherlands",United Arab Emirates,field,84,,Netherlands,Mudassar Bukhari,158
3938,2016,,Pallekele International Cricket Stadium,2016-09-06,T20,565,"Sri Lanka, Australia",Sri Lanka,field,85,,Australia,GJ Maxwell,264


In [149]:
match_df[match_df['target']=='N/A']

Unnamed: 0,season,city,venue,date,match_type,match_type_number,teams,toss_winner,toss_decision,outcome_by_runs,outcome_by_wickets,winner,player_of_match,target
280,2019,Derry,Bready,2019-07-14,T20,831,"Ireland, Zimbabwe",Zimbabwe,field,,8.0,Zimbabwe,,
524,2019,Hamilton,Bermuda National Stadium,2019-08-22,T20,861,"United States of America, Bermuda",Bermuda,field,,4.0,Bermuda,DAP Darrell,
731,2020/21,Ahmedabad,Narendra Modi Stadium,2021-03-12,T20,1131,"India, England",England,field,,8.0,England,JC Archer,
1837,2022/23,,Tafawa Balewa Square (TBS) Cricket Oval,2023-03-27,T20,1390,"Rwanda, Ghana",Ghana,field,117.0,,Rwanda,H Ishimwe,
2273,2023/24,,San Albano,2023-10-14,T20,1684,"Argentina, Chile",Chile,field,281.0,,Argentina,,
2274,2023/24,,San Albano,2023-10-15,T20,1686,"Argentina, Chile",Argentina,bat,311.0,,Argentina,,
2282,2023/24,,St Georges Quilmes,2023-10-18,T20,2320,"Chile, Mexico",Chile,bat,,5.0,Mexico,Shashikant Laxman,
2283,2023/24,,St Georges Quilmes,2023-10-19,T20,2322,"Mexico, Argentina",Argentina,field,,4.0,Argentina,T Rossi,
2284,2023/24,,St Georges Quilmes,2023-10-20,T20,2323,"Chile, Argentina",Chile,bat,,10.0,Argentina,Lucas Rossi,
2852,2024,Krefeld,Bayer Uerdingen Cricket Ground,2024-07-09,T20,2744,"Slovenia, Norway",Slovenia,bat,,8.0,Norway,Raza Iqbal,


In [150]:
match_df.to_csv("C:/Users/Sajiv/Documents/Cricket/T20_set/T20_match_info.csv")

In [151]:
innings_df

Unnamed: 0,season,match_type_number,team,over,batter,bowler,runs_batter,runs_extras,runs_total,non_striker
0,2016/17,596,Australia,0,AJ Finch,SL Malinga,0,0,0,M Klinger
1,2016/17,596,Australia,0,AJ Finch,SL Malinga,0,0,0,M Klinger
2,2016/17,596,Australia,0,AJ Finch,SL Malinga,1,0,1,M Klinger
3,2016/17,596,Australia,0,M Klinger,SL Malinga,2,0,2,AJ Finch
4,2016/17,596,Australia,0,M Klinger,SL Malinga,0,0,0,AJ Finch
...,...,...,...,...,...,...,...,...,...,...
900137,2016,567,Australia,17,TM Head,SS Pathirana,1,0,1,PM Nevill
900138,2016,567,Australia,17,PM Nevill,SS Pathirana,3,0,3,TM Head
900139,2016,567,Australia,17,TM Head,SS Pathirana,0,0,0,PM Nevill
900140,2016,567,Australia,17,TM Head,SS Pathirana,0,0,0,PM Nevill


In [152]:
innings_df.to_csv("C:/Users/Sajiv/Documents/Cricket/T20_set/T20_innings.csv")