In [14]:
import json
import pandas as pd 
import os

In [15]:
class Testdata:
    def __init__(self,  season, city, venue, date, match_type, match_type_number,teams, toss_winner, toss_decision, outcome_by_runs, outcome_by_wickets, draw_outcome, winner, player_of_match, target, innings):
        self.season = season
        self.city = city
        self.venue = venue
        self.date = date
        self.match_type = match_type
        self.match_type_number = match_type_number
        self.teams = teams
        self.toss_winner = toss_winner
        self.toss_decision = toss_decision
        self.outcome_by_runs = outcome_by_runs
        self.outcome_by_wickets = outcome_by_wickets
        self.draw_outcome = draw_outcome
        self.winner = winner
        self.player_of_match = player_of_match
        self.target = target
        self.innings = innings  

    def to_dict(self):
        
        return {
            'season': self.season,
            'city': self.city,
            'venue': self.venue,
            'date': self.date,
            'match_type': self.match_type,
            'match_type_number': self.match_type_number,
            'teams': ', '.join(self.teams),
            'toss_winner': self.toss_winner,
            'toss_decision': self.toss_decision,
            'outcome_by_runs': self.outcome_by_runs,
            'outcome_by_wickets': self.outcome_by_wickets,
            'draw_outcome': self.draw_outcome,
            'winner' : self.winner,
            'player_of_match': ', '.join(self.player_of_match),
            'target': self.target
        }

    def innings_dict(self):
        innings_data = []
        for inning in self.innings:
            team = inning['team']
            for over in inning['overs']:
                for delivery in over['deliveries']:
                    innings_data.append({
                        'season': self.season,
                        'match_type_number': self.match_type_number,
                        'team': team,
                        'over': over['over'],
                        'batter': delivery['batter'],
                        'bowler': delivery['bowler'],
                        'runs_batter': delivery['runs'].get('batter', 0),
                        'runs_extras': delivery['runs'].get('extras', 0),
                        'runs_total': delivery['runs'].get('total', 0),
                        'non_striker': delivery['non_striker'],

                    })
        return innings_data       


In [16]:
class Testdataloader:
    def __init__(self,folder_path):
        self.folder_path = folder_path
        self.matches = []

    def load_data(self):
        if not os.path.exists(self.folder_path):
            print(f"Error: The folder {self.folder_path} does not exist.")
            return
        
        json_files = [f for f in os.listdir(self.folder_path) if f.endswith('.json')]

        if not json_files:
            print(f"Error: No JSON files found in the folder {self.folder_path}.")
            return
        
        for json_file in json_files:
            try:
                file_path = os.path.join(self.folder_path, json_file)
                with open(file_path, 'r') as file:
                    data = json.load(file)

                    season = data['info'].get('season', 'N/A')  
                    city = data['info'].get('city', 'N/A')
                    venue = data['info'].get('venue', 'N/A')
                    date = data['info']['dates'][0] if 'dates' in data['info'] else 'N/A'
                    match_type = data['info'].get('match_type', 'N/A')
                    match_type_number = data['info'].get('match_type_number', 'N/A')
                    teams = data['info'].get('teams', [])
                    toss_winner = data['info']['toss'].get('winner', 'N/A') if 'toss' in data['info'] else 'N/A'
                    toss_decision = data['info']['toss'].get('decision', 'N/A') if 'toss' in data['info'] else 'N/A'
                    #outcome = f"Winner: {data['info'].get('outcome', {}).get('winner', 'N/A')}, By {data['info'].get('outcome', {}).get('by', {}).get('runs', 0)} runs"
                    outcome_by_runs = data['info']['outcome']['by'].get('runs', 'N/A') if 'by' in data['info']['outcome'] else 'N/A'
                    outcome_by_wickets = data['info']['outcome']['by'].get('wickets', 'N/A') if 'by' in data['info']['outcome'] else 'N/A'
                    draw_outcome = data['info']['outcome'].get('result', 'N/A')
                    winner = data['info']['outcome'].get('winner','N/A')
                    player_of_match = data['info'].get('player_of_match', [])
                    target= data['innings'][1].get('target', {}).get('runs', 'N/A') 
                    innings = data.get('innings', [])
                    
                    # Create an instance of CricketMatch
                    match = Testdata(season, city, venue, date, match_type, match_type_number, teams, toss_winner, toss_decision, outcome_by_runs, outcome_by_wickets, draw_outcome, winner, player_of_match, target, innings)
                    self.matches.append(match)

            except PermissionError:
                print(f"Error: Permission denied while trying to open the file {json_file}. Please check your file permissions.")
            except Exception as e:
                print(f"An unexpected error occurred with file {json_file}: {e}")

    def to_dataframe(self):
        # Convert list of CricketMatch objects to a pandas DataFrame for match data
        match_data = [match.to_dict() for match in self.matches]
        match_df = pd.DataFrame(match_data)

        # Convert innings data to a DataFrame
        innings_data = []
        for match in self.matches:
            innings_data.extend(match.innings_dict())
        innings_df = pd.DataFrame(innings_data)

        return match_df, innings_df    

In [17]:
folder_path = 'C:/Users/Sajiv/Documents/Cricket/test'  

print(f"Current working directory: {os.getcwd()}")

loader = Testdataloader(folder_path)
loader.load_data()

if loader.matches:
    match_df, innings_df = loader.to_dataframe()

    # Displaying the DataFrames
    print("\nMatch DataFrame:")
    print(match_df)

    print("\nInnings DataFrame:")
    print(innings_df)
else:
    print("No data loaded. Please check the error messages above.")

Current working directory: c:\Users\Sajiv\Documents\Cricket\Notebook
An unexpected error occurred with file 1000851.json: Expecting value: line 1 column 1 (char 0)
An unexpected error occurred with file 1022599.json: list index out of range
An unexpected error occurred with file 352662.json: list index out of range
An unexpected error occurred with file 464989.json: list index out of range
An unexpected error occurred with file 817215.json: list index out of range

Match DataFrame:
      season          city                                    venue  \
0    2016/17        Hobart                           Bellerive Oval   
1    2016/17           N/A                            Adelaide Oval   
2    2016/17      Brisbane   Brisbane Cricket Ground, Woolloongabba   
3    2016/17           N/A                 Melbourne Cricket Ground   
4    2016/17           N/A                    Sydney Cricket Ground   
..       ...           ...                                      ...   
851  2016/17    

In [18]:
match_df.head(5)

Unnamed: 0,season,city,venue,date,match_type,match_type_number,teams,toss_winner,toss_decision,outcome_by_runs,outcome_by_wickets,draw_outcome,winner,player_of_match,target
0,2016/17,Hobart,Bellerive Oval,2016-11-12,Test,2233,"Australia, South Africa",South Africa,field,80.0,,,South Africa,KJ Abbott,
1,2016/17,,Adelaide Oval,2016-11-24,Test,2236,"Australia, South Africa",South Africa,bat,,7.0,,Australia,UT Khawaja,
2,2016/17,Brisbane,"Brisbane Cricket Ground, Woolloongabba",2016-12-15,Test,2240,"Australia, Pakistan",Australia,bat,39.0,,,Australia,Asad Shafiq,
3,2016/17,,Melbourne Cricket Ground,2016-12-26,Test,2242,"Australia, Pakistan",Pakistan,bat,18.0,,,Australia,SPD Smith,
4,2016/17,,Sydney Cricket Ground,2017-01-03,Test,2245,"Australia, Pakistan",Australia,bat,220.0,,,Australia,DA Warner,


In [19]:
match_df.to_csv("C:/Users/Sajiv/Documents/Cricket/Test_set/Test_match_info.csv")

In [20]:
innings_df

Unnamed: 0,season,match_type_number,team,over,batter,bowler,runs_batter,runs_extras,runs_total,non_striker
0,2016/17,2233,Australia,0,DA Warner,VD Philander,1,0,1,JA Burns
1,2016/17,2233,Australia,0,JA Burns,VD Philander,0,0,0,DA Warner
2,2016/17,2233,Australia,0,JA Burns,VD Philander,1,0,1,DA Warner
3,2016/17,2233,Australia,0,DA Warner,VD Philander,0,0,0,JA Burns
4,2016/17,2233,Australia,0,DA Warner,VD Philander,0,0,0,JA Burns
...,...,...,...,...,...,...,...,...,...,...
1665466,2016,2217,Australia,43,JM Holland,MDK Perera,0,0,0,NM Lyon
1665467,2016,2217,Australia,43,JM Holland,MDK Perera,0,0,0,NM Lyon
1665468,2016,2217,Australia,43,JM Holland,MDK Perera,0,0,0,NM Lyon
1665469,2016,2217,Australia,43,JM Holland,MDK Perera,0,0,0,NM Lyon


In [21]:
innings_df.to_csv("C:/Users/Sajiv/Documents/Cricket/Test_set/Test_innings.csv")