In [2]:
import pandas as pd
import json

In [6]:
data_file = "/Users/saral/Documents/cricket/All data/t20s_json/1485940.json"

# Load JSON
with open(data_file, "r") as f:
    data = json.load(f)

rows = []

# Iterate over innings
for idx, inng in enumerate(data["innings"], start=1):
    team = inng["team"]

    for over_data in inng["overs"]:
        over = over_data["over"]

        for ball_number, delivery in enumerate(over_data["deliveries"], start=1):

            runs = delivery.get("runs", {})
            extras = delivery.get("extras", {})

            # Determine extra type
            if "wides" in extras:
                extra_type = "wide"
            elif "noballs" in extras:
                extra_type = "no_ball"
            elif "legbyes" in extras:
                extra_type = "legbye"
            elif "byes" in extras:
                extra_type = "bye"
            else:
                extra_type = None

            # Wicket details
            wicket_flag = "wickets" in delivery
            if wicket_flag:
                wk = delivery["wickets"][0]
                player_out = wk.get("player_out")
                wicket_kind = wk.get("kind")
                fielders = [f["name"] for f in wk.get("fielders", [])] if "fielders" in wk else []
            else:
                player_out = None
                wicket_kind = None
                fielders = []

            rows.append({
                "innings_number": idx,
                "batting_team": team,
                "over": over,
                "ball_in_over": ball_number,
                "batter": delivery.get("batter"),
                "non_striker": delivery.get("non_striker"),
                "bowler": delivery.get("bowler"),
                "runs_batter": runs.get("batter", 0),
                "runs_extras": runs.get("extras", 0),
                "runs_total": runs.get("total", 0),
                "extra_type": extra_type,
                "is_wicket": wicket_flag,
                "wicket_player_out": player_out,
                "wicket_type": wicket_kind,
                "wicket_fielders": fielders
            })

df = pd.DataFrame(rows)
df.head()


Unnamed: 0,innings_number,batting_team,over,ball_in_over,batter,non_striker,bowler,runs_batter,runs_extras,runs_total,extra_type,is_wicket,wicket_player_out,wicket_type,wicket_fielders
0,1,Scotland,0,1,HG Munsey,MRJ Watt,DS Airee,1,0,1,,False,,,[]
1,1,Scotland,0,2,MRJ Watt,HG Munsey,DS Airee,0,0,0,,False,,,[]
2,1,Scotland,0,3,MRJ Watt,HG Munsey,DS Airee,1,0,1,,False,,,[]
3,1,Scotland,0,4,HG Munsey,MRJ Watt,DS Airee,1,0,1,,False,,,[]
4,1,Scotland,0,5,MRJ Watt,HG Munsey,DS Airee,0,0,0,,True,MRJ Watt,bowled,[]


In [7]:
# create df where innings_number is 1
df_innings_1 = df[df["innings_number"] == 1]
# create df where innings_number is 2
df_innings_2 = df[df["innings_number"] == 2]

In [8]:
df_innings_1

Unnamed: 0,innings_number,batting_team,over,ball_in_over,batter,non_striker,bowler,runs_batter,runs_extras,runs_total,extra_type,is_wicket,wicket_player_out,wicket_type,wicket_fielders
0,1,Scotland,0,1,HG Munsey,MRJ Watt,DS Airee,1,0,1,,False,,,[]
1,1,Scotland,0,2,MRJ Watt,HG Munsey,DS Airee,0,0,0,,False,,,[]
2,1,Scotland,0,3,MRJ Watt,HG Munsey,DS Airee,1,0,1,,False,,,[]
3,1,Scotland,0,4,HG Munsey,MRJ Watt,DS Airee,1,0,1,,False,,,[]
4,1,Scotland,0,5,MRJ Watt,HG Munsey,DS Airee,0,0,0,,True,MRJ Watt,bowled,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,1,Scotland,18,6,MA Leask,JJ Davidson,Karan KC,1,0,1,,False,,,[]
115,1,Scotland,19,1,MA Leask,JJ Davidson,DS Airee,6,0,6,,False,,,[]
116,1,Scotland,19,2,MA Leask,JJ Davidson,DS Airee,0,0,0,,False,,,[]
117,1,Scotland,19,3,MA Leask,JJ Davidson,DS Airee,2,0,2,,False,,,[]


In [9]:
df_innings_2['extra_type'].value_counts()


extra_type
wide      3
legbye    2
Name: count, dtype: int64

In [10]:
import pandas as pd
import numpy as np

def make_scorecard(df_innings):
    # keep ball_id for batting order
    df = df_innings.reset_index().rename(columns={'index': 'ball_id'})

    # balls faced: exclude wides
    balls_df = df[df['extra_type'] != 'wide'].copy()

    # per-batter aggregation
    scorecard = (
        balls_df.groupby("batter").agg(
            Runs=("runs_batter", "sum"),
            Balls=("runs_batter", "size"),
            Fours=("runs_batter", lambda x: (x == 4).sum()),
            Sixes=("runs_batter", lambda x: (x == 6).sum()),
            Dots=("runs_batter",  lambda x: (x == 0).sum()),
        )
        .reset_index()
        .rename(columns={"batter": "Batsman"})
    )

    # batting order by first appearance
    order = (
        df.groupby("batter")["ball_id"]
        .min()
        .reset_index()
        .rename(columns={"batter": "Batsman", "ball_id": "first_ball"})
    )

    scorecard = (
        scorecard
        .merge(order, on="Batsman", how="left")
        .sort_values("first_ball")
        .drop(columns="first_ball")
    )

    # strike rate & dot percentage per batter
    scorecard["SR"] = (scorecard["Runs"] * 100 / scorecard["Balls"]).round(2)
    scorecard["Dot%"] = (scorecard["Dots"] * 100 / scorecard["Balls"]).round(0)

    # --- totals (batters only) ---
    total_runs_bat = scorecard["Runs"].sum()
    total_balls    = scorecard["Balls"].sum()
    total_fours    = scorecard["Fours"].sum()
    total_sixes    = scorecard["Sixes"].sum()
    total_dots     = scorecard["Dots"].sum()

    extras_total = df_innings["runs_extras"].sum()
    team_total   = total_runs_bat + extras_total

    team_sr   = round(team_total * 100 / total_balls, 2) if total_balls > 0 else 0
    team_dotp = round(total_dots * 100 / total_balls, 0) if total_balls > 0 else 0

    # TOTAL row (batters only)
    total_row = pd.DataFrame([{
        "Batsman": "TOTAL",
        "Runs": total_runs_bat,
        "Balls": total_balls,
        "Fours": total_fours,
        "Sixes": total_sixes,
        "Dots": total_dots,
        "Dot%": team_dotp,
        "SR": ""
    }])

    # EXTRAS row
    extras_row = pd.DataFrame([{
        "Batsman": "EXTRAS",
        "Runs": extras_total,
        "Balls": "",
        "Fours": "",
        "Sixes": "",
        "Dots": "",
        "Dot%": "",
        "SR": "",
    }])

    # GRAND TOTAL row (batters + extras)
    grand_row = pd.DataFrame([{
        "Batsman": "GRAND TOTAL",
        "Runs": team_total,
        "Balls": total_balls,
        "Fours": total_fours,
        "Sixes": total_sixes,
        "Dots": total_dots,
        "Dot%": team_dotp,
        "SR": team_sr,
    }])

    # combine
    scorecard = pd.concat([scorecard, total_row, extras_row, grand_row], ignore_index=True)

    return scorecard


In [11]:
scorecard_1 = make_scorecard(df_innings_1)
scorecard_2 = make_scorecard(df_innings_2)



In [12]:
scorecard_1


Unnamed: 0,Batsman,Runs,Balls,Fours,Sixes,Dots,SR,Dot%
0,HG Munsey,8,5.0,1.0,0.0,1.0,160.0,20.0
1,MRJ Watt,1,3.0,0.0,0.0,2.0,33.33,67.0
2,BJ McMullen,10,12.0,1.0,1.0,10.0,83.33,83.0
3,LR Naylor,1,1.0,0.0,0.0,0.0,100.0,0.0
4,FDW McCreath,2,6.0,0.0,0.0,4.0,33.33,67.0
5,MA Leask,46,46.0,4.0,1.0,21.0,100.0,46.0
6,MH Cross,15,22.0,0.0,0.0,9.0,68.18,41.0
7,CM McBride,0,1.0,0.0,0.0,1.0,0.0,100.0
8,SM Sharif,6,14.0,0.0,0.0,9.0,42.86,64.0
9,J Jarvis,0,2.0,0.0,0.0,2.0,0.0,100.0


In [13]:
scorecard_2

Unnamed: 0,Batsman,Runs,Balls,Fours,Sixes,Dots,SR,Dot%
0,K Bhurtel,30,35.0,4.0,0.0,19.0,85.71,54.0
1,Lokesh Bam,9,5.0,2.0,0.0,2.0,180.0,40.0
2,AK Sah,3,3.0,0.0,0.0,2.0,100.0,67.0
3,RK Paudel,7,14.0,0.0,0.0,7.0,50.0,50.0
4,DS Airee,14,21.0,0.0,0.0,11.0,66.67,52.0
5,Basir Ahamad,13,14.0,0.0,0.0,6.0,92.86,43.0
6,Rupesh Singh,3,7.0,0.0,0.0,4.0,42.86,57.0
7,K Thagunna,10,16.0,1.0,0.0,9.0,62.5,56.0
8,Karan KC,3,3.0,0.0,0.0,0.0,100.0,0.0
9,S Lamichhane,1,1.0,0.0,0.0,0.0,100.0,0.0


In [14]:
## innings without wides and no-balls
df_1 = df_innings_1[(df_innings_1['extra_type'] != 'wide') & (df_innings_1['extra_type'] != 'no-ball')].copy()
df_2 = df_innings_2[(df_innings_2['extra_type'] != 'wide') & (df_innings_2['extra_type'] != 'no-ball')].copy()


In [15]:
# runs in the first 6 overs, wickets lost print with /  here wides are important and noballs
print(f"{df_innings_1[df_innings_1.over < 6].runs_total.sum()} / {df_innings_1[(df_innings_1.over < 6) & df_innings_1.is_wicket].shape[0]}")
print(f"{df_innings_2[df_innings_2.over < 6].runs_total.sum()} / {df_innings_2[(df_innings_2.over < 6) & df_innings_2.is_wicket].shape[0]}")


32 / 5
31 / 2


In [16]:
#runs from 7-15 overs and wicket lost in the same 
print(f"{df_innings_1[(df_innings_1.over >= 6) & (df_innings_1.over <=15)]['runs_total'].sum()} / {df_innings_1[(df_innings_1.over >= 6) & (df_innings_1.over <=15) & (df_innings_1.is_wicket) ].shape[0]}")
print(f"{df_innings_2[(df_innings_2.over >= 6) & (df_innings_2.over <=15)]['runs_total'].sum()} / {df_innings_2[(df_innings_2.over >= 6) & (df_innings_2.over <=15) & (df_innings_2.is_wicket) ].shape[0]}")  

47 / 2
48 / 4


In [17]:
# runs in the last 5 overs wickets lost
print(f"{df_innings_1[df_innings_1.over > 15].runs_total.sum()} / {df_innings_1[(df_innings_1.over > 15) & df_innings_1.is_wicket].shape[0]}")
print(f"{df_innings_2[df_innings_2.over > 15].runs_total.sum()} / {df_innings_2[(df_innings_2.over > 15) & df_innings_2.is_wicket].shape[0]}")

18 / 3
19 / 2


In [18]:
# Dot = no run AND not a wide or no-ball
df_innings_1["is_dot"] = (
        ((df_innings_1["runs_batter"] == 0) | 
         (df_innings_1["extra_type"].isin(["bye", "legbye"]))) &
        (~df_innings_1["extra_type"].isin(["wide", "no_ball"]))
    )



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_innings_1["is_dot"] = (


In [19]:
df_innings_2["is_dot"] = (
        ((df_innings_2["runs_batter"] == 0) | 
         (df_innings_2["extra_type"].isin(["bye", "legbye"]))) &
        (~df_innings_2["extra_type"].isin(["wide", "no_ball"]))
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_innings_2["is_dot"] = (


In [20]:
# Dot balls count for all overs
df_innings_1["is_dot"].sum()


np.int64(62)

In [21]:
# check power play dot balls for innings 1
print(df_innings_1[df_innings_1.over < 6].is_dot.sum())
# for middle overs (starting the 7 over to the 15th over)
print(df_innings_1[(df_innings_1.over >=6) & (df_innings_1.over <=15)].is_dot.sum())
# for death overs (Starting from 16th over to the 20th over)
print(df_innings_1[df_innings_1.over >15].is_dot.sum())

23
26
13


In [22]:
# check power play dot balls for innings 2
print(df_innings_2[df_innings_2.over < 6].is_dot.sum())
# for middle overs
print(df_innings_2[(df_innings_2.over >=6) & (df_innings_2.over <=15)].is_dot.sum())
# for death overs
print(df_innings_2[df_innings_2.over >15].is_dot.sum())

20
30
10


In [23]:
# show total 4 runs runs_batter that were fours in power play overs for innings 2
print(df_innings_2[(df_innings_2.over < 6) ].runs_batter.sum())

# print(df_innings_2[(df_innings_2.over < 6) & (df_innings_2.runs_batter == 4)].runs_batter.sum())
# for middle overs
# print(df_innings_2[(df_innings_2.over >=6) & (df_innings_2.over <=15)].is_dot.sum())
# # for death overs
# print(df_innings_2[df_innings_2.over >15].is_dot.sum())

31


In [24]:
# Nepal Innings 2 powerplay runs_batter value counts
pp_2 = pd.DataFrame(df_2[df_2.over < 6].runs_batter.value_counts())
# South Africa Innings in middle overs
md_2 = pd.DataFrame(df_2[(df_2.over >= 6) & (df_2.over <= 15)].runs_batter.value_counts())
# # South Africa Innings in death overs

dt_2 = pd.DataFrame(df_2[df_2.over > 15].runs_batter.value_counts())

In [25]:
# merge pp_1, md_1, dt_1 dataframes
runs_summary_2 = pp_2.merge(md_2, left_index=True, right_index=True, how='outer', suffixes=('_PP', '_MD'))
runs_summary_2 = runs_summary_2.merge(dt_2, left_index=True, right_index=True, how='outer')
runs_summary_2 = runs_summary_2.rename(columns={0: 'DT'})
runs_summary_2 = runs_summary_2.fillna(0).astype(int)
runs_summary_2

Unnamed: 0_level_0,count_PP,count_MD,count
runs_batter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,20,30,10
1,10,20,12
2,1,6,0
3,1,2,0
4,4,2,1


In [26]:
# South Africa Innings 2 powerplay runs_batter value counts
pp_1 = pd.DataFrame(df_1[df_1.over < 6].runs_batter.value_counts())
# South Africa Innings in middle overs
md_1 = pd.DataFrame(df_1[(df_1.over >= 6) & (df_1.over <= 15)].runs_batter.value_counts())
# # South Africa Innings in death overs

dt_1 = pd.DataFrame(df_1[df_1.over > 15].runs_batter.value_counts())

In [27]:
# merge pp_1, md_1, dt_1 dataframes
runs_summary = pp_1.merge(md_1, left_index=True, right_index=True, how='outer', suffixes=('_PP', '_MD'))
runs_summary = runs_summary.merge(dt_1, left_index=True, right_index=True, how='outer')
runs_summary = runs_summary.rename(columns={0: 'DT'})
runs_summary = runs_summary.fillna(0).astype(int)
runs_summary

Unnamed: 0_level_0,count_PP,count_MD,count
runs_batter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,23,26,13
1,6,29,4
2,2,3,4
4,4,2,0
6,1,0,1


In [193]:
6*2 + 10 * 4 + 2 * 5 + 1 * (26+9 + 9)

106