In [12]:
import pandas as pd
import json
import os

In [13]:
def process_single_match(json_data):
    batting_stats = {}
    bowling_stats = {}
    fielding_stats = {}

    for inning in json_data['innings']:
        team = inning['team']
        for over in inning['overs']:
            for delivery in over['deliveries']:
                batter = delivery['batter']
                bowler = delivery['bowler']
                runs = delivery['runs']['batter']
                extras = delivery['runs']['extras']
                total = delivery['runs']['total']

                if batter not in batting_stats:
                    batting_stats[batter] = {'runs': 0, 'balls': 0, 'dismissals': 0}
                batting_stats[batter]['runs'] += runs
                batting_stats[batter]['balls'] += 1

                if bowler not in bowling_stats:
                    bowling_stats[bowler] = {'runs_conceded': 0, 'balls_bowled': 0, 'wickets': 0}
                bowling_stats[bowler]['runs_conceded'] += total
                bowling_stats[bowler]['balls_bowled'] += 1

                if 'wickets' in delivery:
                    for wicket in delivery['wickets']:
                        player_out = wicket['player_out']
                        if player_out in batting_stats:
                            batting_stats[player_out]['dismissals'] += 1
                        if 'fielders' in wicket:
                            for fielder in wicket['fielders']:
                                if 'name' in fielder:
                                    if fielder['name'] not in fielding_stats:
                                        fielding_stats[fielder['name']] = {'catches': 0, 'run_outs': 0, 'stumpings': 0}
                                    if wicket['kind'] == 'caught':
                                        fielding_stats[fielder['name']]['catches'] += 1
                                    elif wicket['kind'] == 'run out':
                                        fielding_stats[fielder['name']]['run_outs'] += 1
                                    elif wicket['kind'] == 'stumped':
                                        fielding_stats[fielder['name']]['stumpings'] += 1

    return batting_stats, bowling_stats, fielding_stats

In [14]:
def aggregate_player_data(json_files):
    all_batting_stats = {}
    all_bowling_stats = {}
    all_fielding_stats = {}

    for file in json_files:
        with open(file, 'r') as f:
            json_data = json.load(f)
            batting_stats, bowling_stats, fielding_stats = process_single_match(json_data)

            for player, stats in batting_stats.items():
                if player not in all_batting_stats:
                    all_batting_stats[player] = {'runs': 0, 'balls': 0, 'dismissals': 0}
                all_batting_stats[player]['runs'] += stats['runs']
                all_batting_stats[player]['balls'] += stats['balls']
                all_batting_stats[player]['dismissals'] += stats['dismissals']

            for player, stats in bowling_stats.items():
                if player not in all_bowling_stats:
                    all_bowling_stats[player] = {'runs_conceded': 0, 'balls_bowled': 0, 'wickets': 0}
                all_bowling_stats[player]['runs_conceded'] += stats['runs_conceded']
                all_bowling_stats[player]['balls_bowled'] += stats['balls_bowled']
                all_bowling_stats[player]['wickets'] += stats['wickets']

            for player, stats in fielding_stats.items():
                if player not in all_fielding_stats:
                    all_fielding_stats[player] = {'catches': 0, 'run_outs': 0, 'stumpings': 0}
                all_fielding_stats[player]['catches'] += stats['catches']
                all_fielding_stats[player]['run_outs'] += stats['run_outs']
                all_fielding_stats[player]['stumpings'] += stats['stumpings']

    return all_batting_stats, all_bowling_stats, all_fielding_stats

In [15]:
def convert_to_dataframes(batting_stats, bowling_stats, fielding_stats):
    batting_df = pd.DataFrame.from_dict(batting_stats, orient='index')
    batting_df.index.name = 'Player'
    batting_df.reset_index(inplace=True)

    bowling_df = pd.DataFrame.from_dict(bowling_stats, orient='index')
    bowling_df.index.name = 'Player'
    bowling_df.reset_index(inplace=True)

    fielding_df = pd.DataFrame.from_dict(fielding_stats, orient='index')
    fielding_df.index.name = 'Player'
    fielding_df.reset_index(inplace=True)

    return batting_df, bowling_df, fielding_df


In [16]:
json_files = [f for f in os.listdir('odis_json') if f.endswith('.json')]
json_files=['odis_json/'+f for f in json_files]
json_files

['odis_json/366711.json',
 'odis_json/378753.json',
 'odis_json/378755.json',
 'odis_json/351684.json',
 'odis_json/351685.json',
 'odis_json/385749.json',
 'odis_json/385750.json',
 'odis_json/378759.json',
 'odis_json/351686.json',
 'odis_json/385751.json',
 'odis_json/351687.json',
 'odis_json/386530.json',
 'odis_json/351688.json',
 'odis_json/386531.json',
 'odis_json/385025.json',
 'odis_json/351689.json',
 'odis_json/386532.json',
 'odis_json/386533.json',
 'odis_json/351690.json',
 'odis_json/386534.json',
 'odis_json/351691.json',
 'odis_json/351692.json',
 'odis_json/351693.json',
 'odis_json/366623.json',
 'odis_json/366626.json',
 'odis_json/366627.json',
 'odis_json/357962.json',
 'odis_json/366624.json',
 'odis_json/366625.json',
 'odis_json/352665.json',
 'odis_json/352667.json',
 'odis_json/352668.json',
 'odis_json/390204.json',
 'odis_json/352669.json',
 'odis_json/350477.json',
 'odis_json/350478.json',
 'odis_json/390227.json',
 'odis_json/350479.json',
 'odis_json/

In [17]:
with open(json_files[0],'r')as d:
    d=json.load(d)
d

{'meta': {'data_version': '1.0.0', 'created': '2013-03-05', 'revision': 1},
 'info': {'balls_per_over': 6,
  'city': 'Wellington',
  'dates': ['2009-01-07'],
  'event': {'match_number': 3,
   'name': 'West Indies in New Zealand ODI Series'},
  'gender': 'male',
  'match_type': 'ODI',
  'match_type_number': 2788,
  'officials': {'match_referees': ['J Srinath'],
   'reserve_umpires': ['EA Watkin'],
   'tv_umpires': ['GAV Baxter'],
   'umpires': ['MR Benson', 'AL Hill']},
  'outcome': {'by': {'wickets': 7}, 'winner': 'New Zealand'},
  'overs': 50,
  'player_of_match': ['DL Vettori'],
  'players': {'New Zealand': ['JD Ryder',
    'BB McCullum',
    'JM How',
    'LRPL Taylor',
    'DR Flynn',
    'JDP Oram',
    'GD Elliott',
    'DL Vettori',
    'KD Mills',
    'TG Southee',
    'JS Patel'],
   'West Indies': ['CH Gayle',
    'XM Marshall',
    'RR Sarwan',
    'S Chanderpaul',
    'SE Findlay',
    'D Ramdin',
    'KA Pollard',
    'JE Taylor',
    'DBL Powell',
    'NO Miller',
    'FH

In [18]:
batting_stats, bowling_stats, fielding_stats = aggregate_player_data(json_files)
batting_df, bowling_df, fielding_df = convert_to_dataframes(batting_stats, bowling_stats, fielding_stats)

In [19]:
batting_df

Unnamed: 0,Player,runs,balls,dismissals
0,CH Gayle,6433,7434,189
1,XM Marshall,379,677,25
2,RR Sarwan,3717,5143,97
3,S Chanderpaul,3804,5605,88
4,SE Findlay,88,140,3
...,...,...,...,...
2367,JC Balt,35,51,0
2368,Hashir Dafedar,15,20,1
2369,V Shukla,25,44,1
2370,Hassnain Shah,16,17,0


In [20]:
bowling_df

Unnamed: 0,Player,runs_conceded,balls_bowled,wickets
0,KD Mills,5109,6548,0
1,TG Southee,7284,7917,0
2,JDP Oram,3561,4880,0
3,GD Elliott,1122,1232,0
4,DL Vettori,5322,7996,0
...,...,...,...,...
1828,E Bosch,33,44,0
1829,S Muthusamy,67,71,0
1830,M Mpongwana,25,19,0
1831,Hassnain Shah,37,56,0


In [None]:
def extract_per_match_data(json_files):
    per_match_data = []

    for file in json_files:
        with open(file, 'r') as f:
            json_data = json.load(f)

            match_id = json_data['info'].get('match_type_number', 'N/A')
            match_date = json_data['info']['dates'][0] if json_data['info']['dates'] else 'N/A'
            teams = json_data['info']['teams']

            batting_stats = {}
            bowling_stats = {}
            fielding_stats = {}

            for inning in json_data['innings']:
                team = inning['team']
                for over in inning['overs']:
                    for delivery in over['deliveries']:
                        batter = delivery['batter']
                        bowler = delivery['bowler']
                        runs = delivery['runs']['batter']
                        extras = delivery['runs']['extras']
                        total = delivery['runs']['total']

                        if batter not in batting_stats:
                            batting_stats[batter] = {'runs': 0, 'balls': 0, 'dismissals': 0}
                        batting_stats[batter]['runs'] += runs
                        batting_stats[batter]['balls'] += 1

                        if bowler not in bowling_stats:
                            bowling_stats[bowler] = {'runs_conceded': 0, 'balls_bowled': 0, 'wickets': 0}
                        bowling_stats[bowler]['runs_conceded'] += total
                        bowling_stats[bowler]['balls_bowled'] += 1

                        if 'wickets' in delivery:
                            for wicket in delivery['wickets']:
                                player_out = wicket['player_out']
                                if player_out in batting_stats:
                                    batting_stats[player_out]['dismissals'] += 1

                                if 'fielders' in wicket:
                                    for fielder in wicket['fielders']:
                                        if 'name' in fielder:
                                            fielder_name = fielder['name']
                                            if fielder_name not in fielding_stats:
                                                fielding_stats[fielder_name] = {'catches': 0, 'run_outs': 0, 'stumpings': 0}
                                            if wicket['kind'] == 'caught':
                                                fielding_stats[fielder_name]['catches'] += 1
                                            elif wicket['kind'] == 'run out':
                                                fielding_stats[fielder_name]['run_outs'] += 1
                                            elif wicket['kind'] == 'stumped':
                                                fielding_stats[fielder_name]['stumpings'] += 1

            for player, stats in batting_stats.items():
                per_match_data.append({
                    'Match ID': match_id,
                    'Date': match_date,
                    'Team': teams[0] if player in json_data['info']['players'][teams[0]] else teams[1],
                    'Player': player,
                    'Runs': stats['runs'],
                    'Balls': stats['balls'],
                    'Dismissals': stats['dismissals'],
                    'Runs Conceded': 0,  
                    'Balls Bowled': 0,   
                    'Wickets': 0,        
                    'Catches': 0,        
                    'Run Outs': 0,       
                    'Stumpings': 0       
                })

            for player, stats in bowling_stats.items():
                if player not in batting_stats:  
                    per_match_data.append({
                        'Match ID': match_id,
                        'Date': match_date,
                        'Team': teams[0] if player in json_data['info']['players'][teams[0]] else teams[1],
                        'Player': player,
                        'Runs': 0,              
                        'Balls': 0,             
                        'Dismissals': 0,        
                        'Runs Conceded': stats['runs_conceded'],
                        'Balls Bowled': stats['balls_bowled'],
                        'Wickets': stats['wickets'],
                        'Catches': 0,           
                        'Run Outs': 0,          
                        'Stumpings': 0          
                    })

            for player, stats in fielding_stats.items():
                if player not in batting_stats and player not in bowling_stats: 
                    per_match_data.append({
                        'Match ID': match_id,
                        'Date': match_date,
                        'Team': teams[0] if player in json_data['info']['players'][teams[0]] else teams[1],
                        'Player': player,
                        'Runs': 0,              
                        'Balls': 0,             
                        'Dismissals': 0,        
                        'Runs Conceded': 0,     
                        'Balls Bowled': 0,      
                        'Wickets': 0,           
                        'Catches': stats['catches'],
                        'Run Outs': stats['run_outs'],
                        'Stumpings': stats['stumpings']
                    })


    per_match_df = pd.DataFrame(per_match_data)
    return per_match_df

In [22]:
extract_per_match_data(json_files)

Unnamed: 0,Match ID,Date,Team,Player,Runs,Balls,Dismissals,Runs Conceded,Balls Bowled,Wickets,Catches,Run Outs,Stumpings
0,2788,2009-01-07,West Indies,CH Gayle,18,25,1,0,0,0,0,0,0
1,2788,2009-01-07,West Indies,XM Marshall,0,3,1,0,0,0,0,0,0
2,2788,2009-01-07,West Indies,RR Sarwan,5,15,1,0,0,0,0,0,0
3,2788,2009-01-07,West Indies,S Chanderpaul,45,78,1,0,0,0,0,0,0
4,2788,2009-01-07,West Indies,SE Findlay,8,25,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62774,4832,2025-02-12,Oman,Hassnain Shah,0,0,0,13,19,0,0,0,0
62775,4832,2025-02-12,Oman,S Shrivastava,0,0,0,34,62,0,0,0,0
62776,4832,2025-02-12,Oman,Jay Odedra,0,0,0,23,43,0,0,0,0
62777,4832,2025-02-12,Oman,Sufyan Mehmood,0,0,0,13,30,0,0,0,0
