# Batting Performance Analysis in IPL

![batsmans.jfif](attachment:batsmans.jfif)

# Reading Data 

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

balls = pd.read_csv("IPL Deliveries 2008-2020.csv")
matches = pd.read_csv("IPL Matches 2008-2020.csv")

display(balls.head(2),matches.head(2))

Unnamed: 0,id,inning,over,ball,batsman,non_striker,bowler,batsman_runs,extra_runs,total_runs,non_boundary,is_wicket,dismissal_kind,player_dismissed,fielder,extras_type,batting_team,bowling_team
0,335982,1,6,5,RT Ponting,BB McCullum,AA Noffke,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
1,335982,1,6,6,BB McCullum,RT Ponting,AA Noffke,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore


Unnamed: 0,id,city,date,player_of_match,venue,neutral_venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,eliminator,method,umpire1,umpire2
0,335982,Bangalore,2008-04-18,BB McCullum,M Chinnaswamy Stadium,0,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,N,,Asad Rauf,RE Koertzen
1,335983,Chandigarh,2008-04-19,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",0,Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,N,,MR Benson,SL Shastri


# Adding Required Data

Preparing DataSet for Inning summaries such that for every inning and batsman, `Runs`,`Balls`,`Strike Rate`,`Fours`,`Sixes` etc are recorded.

In [2]:
balls_faced = balls.loc[(balls["extras_type"] != "wides")]

batsman_runs = balls_faced.groupby(["id","batsman"], as_index=False)["batsman_runs"].sum()
ball_taken   = balls_faced.groupby(["id","batsman"], as_index=False)["ball"].count()

four_count = balls_faced.loc[balls_faced["batsman_runs"]==4]
four_inning = four_count.groupby(["id","batsman"], as_index=False)["batsman_runs"].count()

six_count = balls_faced.loc[balls_faced["batsman_runs"]==6]
six_inning = six_count.groupby(["id","batsman"], as_index=False)["batsman_runs"].count()

innings_summary1 = pd.merge(batsman_runs, ball_taken, on=["id","batsman"])

innings_summary1["strike_rate"] = innings_summary1["batsman_runs"]*100/innings_summary1["ball"]

innings_summary2 = pd.merge(innings_summary1, four_inning, on=["id","batsman"])
innings_summary3 = pd.merge(innings_summary2, six_inning, on=["id","batsman"])

innings_summaries_df = pd.DataFrame({
    "match_id" : innings_summary3.id,
    "batsman"  : innings_summary3.batsman,
    "runs" : innings_summary3.batsman_runs_x,
    "ball" : innings_summary3.ball,
    "strike_rate" : innings_summary3.strike_rate,
    "sixes" : innings_summary3.batsman_runs,
    "fours" : innings_summary3.batsman_runs_y,
})

display(innings_summaries_df)

Unnamed: 0,match_id,batsman,runs,ball,strike_rate,sixes,fours
0,335982,BB McCullum,158,73,216.438356,13,10
1,335982,P Kumar,18,15,120.000000,2,1
2,335982,RT Ponting,20,20,100.000000,1,1
3,335983,JR Hopes,71,33,215.151515,3,10
4,335983,K Goel,24,19,126.315789,1,2
...,...,...,...,...,...,...,...
3714,1237181,Q de Kock,20,12,166.666667,1,3
3715,1237181,RG Sharma,68,51,133.333333,4,5
3716,1237181,RR Pant,56,38,147.368421,2,4
3717,1237181,SA Yadav,19,20,95.000000,1,1


# 1. Average of Runs

In [3]:
balls_faced = balls.loc[balls["batsman_runs"]]
inning_runs = balls.groupby(["id","batsman"], as_index=False)["batsman_runs"].sum()

avg_runs_df = pd.DataFrame({
     "batsman" : inning_runs.batsman,
     "average" : inning_runs.batsman_runs,
})

avg_runs_df = avg_runs_df.groupby("batsman" , as_index=False)["average"].mean()
avg_runs_df = avg_runs_df.sort_values("average", ascending=False).reset_index(drop=True).loc[0:10]

avg_runs_df

Unnamed: 0,batsman,average
0,JM Bairstow,37.619048
1,LMP Simmons,37.206897
2,DA Warner,37.0
3,KL Rahul,36.763889
4,CH Gayle,36.427481
5,HM Amla,36.0625
6,SE Marsh,35.898551
7,ML Hayden,34.59375
8,MEK Hussey,34.086207
9,RD Gaikwad,34.0


In [29]:
fig = px.bar(avg_runs_df, x='batsman', y='average',labels={"batsman":"Batsman", "average":"Average Runs"}, height=400)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(title_text='Maximum Average')
fig.show()

# 2. Runs in a Match

In [30]:
highest_runs = innings_summaries_df.sort_values("runs", ascending=False).reset_index(drop=False).loc[0:11]

highest_runs_df = pd.DataFrame({
    "batsman" : highest_runs["batsman"],
    "runs"    : highest_runs["runs"],
    "balls"   : highest_runs["ball"]  
})

highest_runs_df

Unnamed: 0,batsman,runs,balls
0,CH Gayle,175,66
1,BB McCullum,158,73
2,AB de Villiers,133,59
3,KL Rahul,132,69
4,AB de Villiers,129,52
5,CH Gayle,128,62
6,RR Pant,128,63
7,M Vijay,127,56
8,DA Warner,126,59
9,V Sehwag,122,58


In [37]:
fig = px.bar(highest_runs_df, x='batsman', y='runs',labels={"batsman":"Batsman", "runs":"Runs"}, height=400)
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(title_text='Highest score in a match')
fig.show()

# 3. Total Fours

In [38]:
total_fours = innings_summaries_df.groupby("batsman", as_index=False)["fours"].sum()
fours_df = total_fours.sort_values("fours", ascending=False).reset_index(drop=True).loc[0:10]
fours_df

Unnamed: 0,batsman,fours
0,DA Warner,417
1,V Kohli,356
2,SK Raina,348
3,RG Sharma,344
4,CH Gayle,343
5,S Dhawan,337
6,RV Uthappa,311
7,AB de Villiers,299
8,SR Watson,269
9,MS Dhoni,249


In [39]:
fig = px.bar(fours_df, x='batsman', y='fours',labels={"batsman":"Batsman", "fours":"Number of Fours"}, height=400)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(title_text='Fours by individuals')
fig.show()

# 4. Fours in a Match


In [40]:
fours_df_1 = pd.DataFrame({
    "batsman" : innings_summaries_df["batsman"],
    "fours"    : innings_summaries_df["fours"],  
})

fours_df_1 = fours_df_1.sort_values("fours", ascending=False).reset_index(drop=True).loc[0:10]
fours_df_1

Unnamed: 0,batsman,fours
0,PC Valthaty,19
1,AB de Villiers,19
2,AM Rahane,16
3,MJ Lumb,16
4,RR Pant,15
5,M Vijay,15
6,GJ Maxwell,15
7,SS Iyer,15
8,Q de Kock,15
9,SE Marsh,14


In [41]:
fig = px.bar(fours_df_1, x='batsman', y='fours',labels={"batsman":"Batsman", "fours":"Number of Fours"}, height=400)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(title_text='Maximum Fours in a match')
fig.show()

# 5. Total Sixes 

In [49]:
total_sixes_1 = innings_summaries_df.groupby("batsman", as_index=False)["sixes"].sum()
total_sixes_1 = total_sixes_1.sort_values("sixes", ascending=False).reset_index(drop=True).loc[0:10]
total_sixes_1

Unnamed: 0,batsman,sixes
0,CH Gayle,334
1,AB de Villiers,224
2,RG Sharma,204
3,MS Dhoni,196
4,DA Warner,191
5,V Kohli,190
6,SK Raina,180
7,SR Watson,170
8,KA Pollard,164
9,RV Uthappa,145


In [50]:
fig = px.bar(total_sixes_1, x='batsman', y='sixes',labels={"batsman":"Batsman", "sixes":"Number of sixes"}, height=400)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(title_text='Sixes by individuals')
fig.show()

# 6. Sixes in a Match


In [45]:
sixes_df_1 = pd.DataFrame({
    "batsman" : innings_summaries_df["batsman"],
    "sixes"    : innings_summaries_df["sixes"],  
})

sixes_df_1 = sixes_df_1.sort_values("sixes", ascending=False).reset_index(drop=True).loc[0:10]
sixes_df_1

Unnamed: 0,batsman,sixes
0,CH Gayle,17
1,BB McCullum,13
2,CH Gayle,13
3,CH Gayle,12
4,AB de Villiers,12
5,ST Jayasuriya,11
6,M Vijay,11
7,AD Russell,11
8,CH Gayle,11
9,KA Pollard,10


In [47]:
fig = px.bar(sixes_df_1, x='batsman', y='sixes',labels={"batsman":"Batsman", "sixes":"Number of Sixes"}, height=400)
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(title_text='Maximum Sixes in a match')
fig.show()