In [1]:
import pandas as pd
import numpy as np
import os
import json

In [5]:
ball_data = []
folder_path = "C:/Users/bhavi/Desktop/women-t20i-cricket-data-analysis/data/raw/t20s_female_json"

In [7]:
for file in os.listdir(folder_path):
    if file.endswith(".json"):
        
        with open(os.path.join(folder_path, file), "r", encoding="utf-8") as f:
            data = json.load(f)

        match_id = file.replace(".json", "")
        info = data["info"]

        for inning in data["innings"]:
            batting_team = inning["team"]
            
            for over in inning["overs"]:
                over_number = over["over"]
                
                for delivery in over["deliveries"]:
                    
                    batter = delivery.get("batter")
                    bowler = delivery.get("bowler")
                    non_striker = delivery.get("non_striker")
                    
                    runs = delivery.get("runs", {}).get("batter", 0)
                    total_runs = delivery.get("runs", {}).get("total", 0)
                    
                    wicket = 1 if "wickets" in delivery else 0
                    
                    ball_data.append({
                        "match_id": match_id,
                        "batting_team": batting_team,
                        "over": over_number,
                        "batter": batter,
                        "bowler": bowler,
                        "runs_batter": runs,
                        "total_runs": total_runs,
                        "wicket": wicket
                    })

In [15]:
balls_df = pd.DataFrame(ball_data)

balls_df.head()

Unnamed: 0,match_id,batting_team,over,batter,bowler,runs_batter,total_runs,wicket
0,1043989,Australia,0,MM Lanning,LMM Tahuhu,1,1,0
1,1043989,Australia,0,BL Mooney,LMM Tahuhu,0,0,0
2,1043989,Australia,0,BL Mooney,LMM Tahuhu,0,0,0
3,1043989,Australia,0,BL Mooney,LMM Tahuhu,1,1,0
4,1043989,Australia,0,MM Lanning,LMM Tahuhu,0,0,0


In [17]:
balls_df.to_csv(
    "data/processed/ball_by_ball.csv",
    index=False
)

In [19]:
balls_df = pd.read_csv("C:/Users/bhavi/Desktop/women-t20i-cricket-data-analysis/data/processed/ball_by_ball.csv")

india_batting = balls_df[balls_df["batting_team"] == "India"]

In [35]:
batting_stats = india_batting.groupby("batter").agg(
    total_runs=("runs_batter", "sum"),
    balls_faced=("runs_batter", "count"),
    dismissals=("wicket", "sum")
)

batting_stats["strike_rate"] = (
    batting_stats["total_runs"] / batting_stats["balls_faced"]
) * 100

batting_stats["average"] = (
    batting_stats["total_runs"] / batting_stats["dismissals"]
)

batting_stats.sort_values("total_runs", ascending=False).head(10)
india_bowling = balls_df[balls_df["batting_team"] != "India"]


Bowling Metrics

In [23]:
india_bowling = balls_df[balls_df["batting_team"] != "India"]

In [25]:
bowling_stats = india_bowling.groupby("bowler").agg(
    runs_conceded=("total_runs", "sum"),
    balls_bowled=("total_runs", "count"),
    wickets=("wicket", "sum")
)

bowling_stats["economy"] = (
    bowling_stats["runs_conceded"] / (bowling_stats["balls_bowled"] / 6)
)

bowling_stats.sort_values("wickets", ascending=False).head(10)

Unnamed: 0_level_0,runs_conceded,balls_bowled,wickets,economy
bowler,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DB Sharma,2851,2796,163,6.118026
ML Schutt,2036,1955,138,6.248593
T Putthawong,1039,1564,133,3.985934
C Aweko,1347,1949,131,4.146742
Nida Dar,2280,2310,124,5.922078
S Ecclestone,1798,1854,121,5.81877
RP Yadav,1967,1802,118,6.54939
O Kamchomphu,1276,1616,117,4.737624
J Mbabazi,1192,1515,116,4.720792
Vaishnave Mahesh,1213,1549,113,4.698515


Death over performance

In [29]:
india_batting = balls_df[
    balls_df["batting_team"] == "India"
].copy()

india_batting["phase"] = india_batting["over"].apply(phase)

In [31]:
phase_analysis = india_batting.groupby("phase").agg(
    total_runs=("runs_batter", "sum"),
    balls=("runs_batter", "count")
)

phase_analysis["strike_rate"] = (
    phase_analysis["total_runs"] / phase_analysis["balls"]
) * 100

phase_analysis


Unnamed: 0_level_0,total_runs,balls,strike_rate
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Death Overs,3716,2859,129.975516
Middle Overs,9636,8926,107.954291
Powerplay,5949,5826,102.111226
