In [37]:
import pandas as pd
import json
from pathlib import Path 


In [57]:

def parse_wpl_json(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    # Extract metadata from the 'info' section
    match_id = os.path.basename(file_path).split('.')[0]
    city = data['info'].get('city', 'Unknown')
    date = data['info']['dates'][0]
    
    ball_data = []
    
    for inning_num, inning in enumerate(data['innings']):
        team_batting = inning['team']
        # Find who the bowling team is
        teams = data['info']['teams']
        team_bowling = [t for t in teams if t != team_batting][0]
        
        for over_data in inning['overs']:
            over_num = over_data['over']
            
            for ball_num, delivery in enumerate(over_data['deliveries']):
                # Basic info
                record = {
                    'match_id': match_id,
                    'date': date,
                    'inning': inning_num + 1,
                    'batting_team': team_batting,
                    'bowling_team': team_bowling,
                    'over': over_num,
                    'ball': ball_num + 1,
                    'batter': delivery['batter'],
                    'bowler': delivery['bowler'],
                    'non_striker': delivery['non_striker'],
                    'runs_bat': delivery['runs']['batter'],
                    'runs_extras': delivery['runs']['extras'],
                    'runs_total': delivery['runs']['total']
                }
                
                # Check for Wides/No Balls (important for balls faced/bowled)
                if 'extras' in delivery:
                    record['is_wide'] = 1 if 'wides' in delivery['extras'] else 0
                    record['is_noball'] = 1 if 'noballs' in delivery['extras'] else 0
                else:
                    record['is_wide'] = 0
                    record['is_noball'] = 0
                
                # Check for Wickets
                if 'wickets' in delivery:
                    record['is_wicket'] = 1
                    record['player_out'] = delivery['wickets'][0]['player_out']
                    record['wicket_kind'] = delivery['wickets'][0]['kind']
                else:
                    record['is_wicket'] = 0
                    record['player_out'] = None
                    record['wicket_kind'] = None
                
                ball_data.append(record)
                
    return pd.DataFrame(ball_data)


In [58]:
base_path = Path.cwd().parent
data_folder = base_path / 'Data' / '01_raw_data' / 'wpl_json'

# 2. List to store dataframes (faster than appending to a df in a loop)
dfs_list = []

if data_folder.exists() and data_folder.is_dir():
    print('Reading data from:', data_folder)

    # 3. Iterate through files
    for file_path in data_folder.glob('*.json'): # Added .json to be specific
        try:
            df = parse_wpl_json(file_path)
            dfs_list.append(df)
        except Exception as e:
            print(f"Skipping {file_path.name} due to error: {e}")

    # 4. Concatenate everything at once
    if dfs_list:
        all_content = pd.concat(dfs_list, ignore_index=True)
        print(f"Combined {len(dfs_list)} files.")
    else:
        print("No data found.")
        all_content = pd.DataFrame()
else:
    print(f"Directory not found: {data_folder}")

# Display result
all_content.head()

Reading data from: d:\Projects\wpl_analysis\Data\01_raw_data\wpl_json
Combined 22 files.


Unnamed: 0,match_id,date,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,runs_bat,runs_extras,runs_total,is_wide,is_noball,is_wicket,player_out,wicket_kind
0,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,1,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
1,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,2,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
2,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,3,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
3,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,4,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
4,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,5,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,


In [59]:
all_content

Unnamed: 0,match_id,date,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,runs_bat,runs_extras,runs_total,is_wide,is_noball,is_wicket,player_out,wicket_kind
0,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,1,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
1,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,2,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
2,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,3,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
3,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,4,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
4,1513682,2026-01-09,1,Mumbai Indians,Royal Challengers Bengaluru,0,5,AC Kerr,LK Bell,G Kamalini,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5245,1513703,2026-02-05,2,Royal Challengers Bengaluru,Delhi Capitals,18,6,RP Yadav,CA Henry,N de Klerk,1,0,1,0,0,0,,
5246,1513703,2026-02-05,2,Royal Challengers Bengaluru,Delhi Capitals,19,1,RP Yadav,N Shree Charani,N de Klerk,1,0,1,0,0,0,,
5247,1513703,2026-02-05,2,Royal Challengers Bengaluru,Delhi Capitals,19,2,N de Klerk,N Shree Charani,RP Yadav,1,0,1,0,0,0,,
5248,1513703,2026-02-05,2,Royal Challengers Bengaluru,Delhi Capitals,19,3,RP Yadav,N Shree Charani,N de Klerk,4,0,4,0,0,0,,


In [79]:
all_content[
    (all_content['is_wicket'] != 0) & (all_content['bowler'] == 'NSS Sharma')
]

Unnamed: 0,match_id,date,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,runs_bat,runs_extras,runs_total,is_wide,is_noball,is_wicket,player_out,wicket_kind
541,1513684,2026-01-10,1,Mumbai Indians,Delhi Capitals,6,3,G Kamalini,NSS Sharma,NR Sciver-Brunt,0,0,0,0,0,1,G Kamalini,caught
615,1513684,2026-01-10,1,Mumbai Indians,Delhi Capitals,18,3,NJ Carey,NSS Sharma,H Kaur,0,0,0,0,0,1,NJ Carey,bowled
809,1513685,2026-01-11,1,Gujarat Giants,Delhi Capitals,10,5,SFM Devine,NSS Sharma,A Gardner,0,0,0,0,0,1,SFM Devine,caught
862,1513685,2026-01-11,1,Gujarat Giants,Delhi Capitals,19,2,KS Gautam,NSS Sharma,KS Ahuja,0,0,0,0,0,1,KS Gautam,caught
864,1513685,2026-01-11,1,Gujarat Giants,Delhi Capitals,19,4,KS Ahuja,NSS Sharma,TP Kanwar,0,0,0,0,0,1,KS Ahuja,stumped
865,1513685,2026-01-11,1,Gujarat Giants,Delhi Capitals,19,5,RS Gayakwad,NSS Sharma,TP Kanwar,0,0,0,0,0,1,RS Gayakwad,bowled
866,1513685,2026-01-11,1,Gujarat Giants,Delhi Capitals,19,6,Renuka Singh,NSS Sharma,TP Kanwar,0,0,0,0,0,1,Renuka Singh,bowled
1533,1513688,2026-01-14,1,UP Warriorz,Delhi Capitals,15,4,MM Lanning,NSS Sharma,H Deol,0,0,0,0,0,1,MM Lanning,caught
2640,1513692,2026-01-17,2,Royal Challengers Bengaluru,Delhi Capitals,17,2,S Mandhana,NSS Sharma,G Voll,0,0,0,0,0,1,S Mandhana,caught
2919,1513694,2026-01-20,1,Mumbai Indians,Delhi Capitals,3,5,S Sajana,NSS Sharma,HK Matthews,0,0,0,0,0,1,S Sajana,bowled


In [81]:
all_content.to_csv(r'D:\Projects\wpl_analysis\Data\02_cleaned_da\cleaned_data',index=False)