In [None]:
import pandas as pd 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo

In [3]:
matches = pd.read_csv('../../datasets/matches.csv')
delivery = pd.read_csv('../../datasets/deliveries.csv')

In [4]:
ipl = pd.merge(delivery, matches, left_on='match_id', right_on='id')

In [5]:
ipl.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen


In [6]:
top50 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.tolist() 
new_ipl = ipl[ipl['batter'].isin(top50)]

In [7]:
runs = new_ipl.groupby('batter')['batsman_runs'].sum()
balls = new_ipl['batter'].value_counts()
strike_rate = ((runs/balls)*100).sort_values(ascending=False).reset_index()


In [8]:
strike_rate.rename(columns={'batter':'batsman', 0:'strike'}, inplace=True)

In [9]:
strike_rate

Unnamed: 0,batsman,strike
0,AD Russell,164.224422
1,GJ Maxwell,150.488599
2,V Sehwag,148.827059
3,AB de Villiers,148.580442
4,RR Pant,143.597561
5,SA Yadav,142.505948
6,JC Buttler,142.238984
7,CH Gayle,142.121729
8,KA Pollard,140.457703
9,HH Pandya,139.69129


In [10]:
# calculate Avg
out = ipl[ipl['player_dismissed'].isin(top50)]
nout = out['player_dismissed'].value_counts()
avg = (runs/nout).sort_values(ascending=False).reset_index()
avg.rename(columns={'index':'batsman', 0:'avg'}, inplace=True)
avg = avg.merge(strike_rate, on='batsman')

In [11]:
avg

Unnamed: 0,batsman,avg,strike
0,KL Rahul,44.657143,131.050866
1,RD Gaikwad,41.754386,133.632791
2,DA Warner,40.042683,135.429986
3,AB de Villiers,39.853846,148.580442
4,CH Gayle,39.65873,142.121729
5,SE Marsh,39.507937,130.109775
6,MS Dhoni,39.126866,132.835065
7,V Kohli,38.714976,128.511867
8,Shubman Gill,37.835294,132.236842
9,JC Buttler,37.715789,142.238984


### Scatter Plot

In [12]:
trace = go.Scatter(
    x = avg['avg'], y = avg['strike'],
    mode = 'markers')
data = [trace]
layout = go.Layout(title='Batman avg ',
                   xaxis={'title':'avg',},
                   yaxis={'title':'batsman_runs'}, )
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

### Line Chart 

In [13]:
single = ipl[ipl['batter']=='V Kohli']
performance = single.groupby('season')['batsman_runs'].sum().reset_index()
performance.rename(columns={'batsman_runs':'Total Runs'},inplace=True)

In [14]:
trace = go.Scatter(x = performance['season'],y = performance['Total Runs'],mode='lines+markers',marker={'color':'red','size':16},text=avg['batsman'])
data = [trace]
layout = go.Layout(title='Kohli Season by Season Performance',xaxis=dict(title='Season'),yaxis=dict(title='Runs'))
fig = go.Figure(data = data , layout = layout)
pyo.plot(fig)

'temp-plot.html'

In [15]:
single_vk = ipl[ipl['batter']=='V Kohli']
performance_vk = single_vk.groupby('season')['batsman_runs'].sum().reset_index()
performance_vk.rename(columns={'batsman_runs':'Total Runs'},inplace=True)

single_ms = ipl[ipl['batter']=='MS Dhoni']
performance_ms = single_ms.groupby('season')['batsman_runs'].sum().reset_index()
performance_ms.rename(columns={'batsman_runs':'Total Runs'},inplace=True)

In [16]:
trace1 = go.Scatter(x = performance_vk['season'],y = performance_vk['Total Runs'],mode='lines+markers',marker={'color':'red','size':16},text=avg['batsman'],name='Virat Kohli')
trace2 = go.Scatter(x = performance_ms['season'],y = performance_ms['Total Runs'],mode='lines+markers',marker={'color':'blue','size':16},text=avg['batsman'],name='MS Dhoni')
data = [trace1,trace2]
layout = go.Layout(title='Kohli vs Dhoni Season by Season Performance',xaxis=dict(title='Season'),yaxis=dict(title='Runs'))
fig = go.Figure(data = data , layout = layout)
pyo.plot(fig)

'temp-plot.html'

In [17]:

def batsman_comp(*name):
    data = []
    for i in name:
        single = ipl[ipl['batter'] == i]
        performance = single.groupby('season')['batsman_runs'].sum().reset_index()

        trace = go.Scatter(x=performance['season'], y=performance['batsman_runs'],
                           mode="lines+markers", name=i)

        data.append(trace)
    layout = go.Layout(title="Batsman Record Comparator",
                       xaxis={'title': 'Season'},
                       yaxis={'title': 'Runs'})

    fig = go.Figure(data=data, layout=layout)
    pyo.plot(fig)


In [18]:
batsman_comp('V Kohli', 'MS Dhoni', 'RG Sharma', 'AB de Villiers', 'CH Gayle', 'DA Warner', 'SR Watson', 'SK Rain')

### Bar Chart

In [19]:
top10 = ipl.groupby('batter')['batsman_runs'].sum().head(10).reset_index()
top10_df = ipl[ipl['batter'].isin(top10['batter'])]

In [20]:
trace = go.Bar(x = top10['batter'],y = top10['batsman_runs'])
data = [trace]
layout = go.Layout(title='Top 10 Batsman',xaxis=dict(title='Batsman'),yaxis=dict(title='Runs'))
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='top10batsman.html')

'top10batsman.html'

### Type of Bar Chart
- Nested Barchart
- Stack Barchart
- Overlayed Barchart

In [21]:
iw = top10_df.groupby(['batter','inning'])['batsman_runs'].sum().reset_index()          
mask1 = iw['inning'] == 1
mask2 = iw['inning'] == 2
one = iw[mask1]
two = iw[mask2]
one.rename(columns={'batsman_runs':'Runs','batter':"batsman"},inplace=True)
two.rename(columns={'batsman_runs':'Runs','batter':"batsman"},inplace=True)
final = one.merge(two,on='batsman')
final.rename(columns={'Runs_x':'1st inning','Runs_y':'2nd inning'},inplace=True)
final = final[['batsman','1st inning','2nd inning']]



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [22]:
final

Unnamed: 0,batsman,1st inning,2nd inning
0,A Ashish Reddy,166,114
1,A Badoni,464,170
2,A Chopra,51,2
3,A Choudhary,15,10
4,A Flintoff,22,40
5,A Kumble,11,24
6,A Manohar,175,56


In [23]:
trace1 = go.Bar(x = final['batsman'],y = final['1st inning'],name='1st inning',marker={'color':'red'})
trace2 = go.Bar(x = final['batsman'],y = final['2nd inning'],name='2nd inning',marker={'color':'blue'})
data = [trace1,trace2]
layout = go.Layout(title = '1st vs 2nd Inning Performance of Top 10 Batsman',xaxis=dict
(title='Batsman'),yaxis=dict(title='Runs'),barmode='overlay')
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='1st_vs_2nd_inning.html')

'1st_vs_2nd_inning.html'

In [24]:
trace1 = go.Bar(x = final['batsman'],y = final['1st inning'],name='1st inning',marker={'color':'red'})
trace2 = go.Bar(x = final['batsman'],y = final['2nd inning'],name='2nd inning',marker={'color':'blue'})
data = [trace1,trace2]
layout = go.Layout(title = '1st vs 2nd Inning Performance of Top 10 Batsman',xaxis=dict
(title='Batsman'),yaxis=dict(title='Runs'),barmode='stack')
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='1st_vs_2nd_inning.html')


'1st_vs_2nd_inning.html'

In [25]:
trace1 = go.Bar(x = final['batsman'],y = final['1st inning'],name='1st inning',marker={'color':'red'})
trace2 = go.Bar(x = final['batsman'],y = final['2nd inning'],name='2nd inning',marker={'color':'blue'})
data = [trace1,trace2]
layout = go.Layout(title = '1st vs 2nd Inning Performance of Top 10 Batsman',xaxis=dict
(title='Batsman'),yaxis=dict(title='Runs'),barmode='group')
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='1st_vs_2nd_inning.html')


'1st_vs_2nd_inning.html'

### Bubble Plot

In [26]:
trace = go.Scatter(x = avg['avg'],y = avg['strike'],mode='markers',text=avg['batsman'],marker={'color':'red','size':16})
data = [trace]
layout = go.Layout(title='Avg vs Strike Rate of Top 50 Batsman',xaxis=dict(title='Batsman Avg'),yaxis
=dict(title='Batsman Strike Rate'),)
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='avg_vs_strike_rate.html')

'avg_vs_strike_rate.html'

In [27]:
strike_rate

Unnamed: 0,batsman,strike
0,AD Russell,164.224422
1,GJ Maxwell,150.488599
2,V Sehwag,148.827059
3,AB de Villiers,148.580442
4,RR Pant,143.597561
5,SA Yadav,142.505948
6,JC Buttler,142.238984
7,CH Gayle,142.121729
8,KA Pollard,140.457703
9,HH Pandya,139.69129


In [28]:
new_ipl = new_ipl[new_ipl['batsman_runs']==6]

In [29]:
six = new_ipl.groupby('batter')['batsman_runs'].count().reset_index()

In [30]:
six

Unnamed: 0,batter,batsman_runs
0,AB de Villiers,253
1,AD Russell,209
2,AJ Finch,78
3,AM Rahane,103
4,AT Rayudu,173
5,BB McCullum,130
6,CH Gayle,359
7,DA Miller,134
8,DA Warner,236
9,DR Smith,117


In [31]:
x = avg.merge(six,left_on='batsman',right_on='batter')

In [32]:
trace = go.Scatter(x = x['avg'],y = x['batsman_runs'],mode='markers',marker={'color':'red',})
data = [trace]
layout = go.Layout(title="Bubble Chart",
                   xaxis={'title':'avg'},
                   yaxis={'title':'SR'})
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

### Box Plot

In [33]:
match_agg = delivery.groupby(['match_id'])['total_runs'].sum().reset_index()
season_wise = match_agg.merge(matches,left_on='match_id',right_on='id')[['match_id','total_runs','season']]
season_wise

Unnamed: 0,match_id,total_runs,season
0,335982,304,2007/08
1,335983,447,2007/08
2,335984,261,2007/08
3,335985,331,2007/08
4,335986,222,2007/08
...,...,...,...
1090,1426307,429,2024
1091,1426309,323,2024
1092,1426310,346,2024
1093,1426311,314,2024


In [34]:
trace = go.Box(x = season_wise['total_runs'],name='All Seasons')
data = [trace]
layout = go.Layout(title="Total Score Analysis",
                   xaxis={'title':"Total Score "})
fig = go.Figure(data= data , layout= layout)
pyo.plot(fig)

'temp-plot.html'

In [35]:
trace1 = go.Box(x = season_wise[season_wise['season']=='2017']['total_runs'],name='2017')
trace2 = go.Box(x = season_wise[season_wise['season']=='2009']['total_runs'],name='2008')
data = [trace1,trace2]
layout = go.Layout(title="Total Score Analysis",
                   xaxis={'title':"Total Score "})
fig = go.Figure(data= data , layout= layout)
pyo.plot(fig)

'temp-plot.html'

In [36]:
!pip install scipy




In [37]:
import scipy
print(scipy.__version__)


1.15.2


### Histogram

In [38]:
x = delivery.groupby('batter')['batsman_runs'].count()>150
x = x[x].index.tolist()
new = delivery[delivery['batter'].isin(x)]

run = new.groupby('batter')['batsman_runs'].sum()
balls = new.groupby('batter')['batsman_runs'].count()

sr = (run/balls)*100
sr = sr.reset_index()
sr.rename(columns={'batter':'batsman','batsman_runs':'sr'},inplace=True)
sr

Unnamed: 0,batsman,sr
0,A Ashish Reddy,142.857143
1,A Badoni,125.544554
2,A Manohar,127.624309
3,A Mishra,86.590909
4,A Symonds,124.711908
...,...,...
235,Y Venugopal Rao,113.872832
236,YBK Jaiswal,146.757991
237,YK Pathan,138.046272
238,YV Takawale,104.918033


In [39]:
trace = go.Histogram(x = sr['sr'],xbins={'size':2,'start':50,'end':100},)
data = [trace]
layout = go.Layout(title='Strike Rate Analysis')
fig = go.Figure(data= data , layout= layout)
pyo.plot(fig)

'temp-plot.html'

### Distplots

In [40]:
# Import required libraries
import plotly.figure_factory as ff
hist_data = [avg['avg']]
group_label = ['Average']
fig = ff.create_distplot(hist_data=hist_data,group_labels=group_label)
pyo.plot(fig)

'temp-plot.html'

In [41]:
# Import required libraries
import plotly.figure_factory as ff
hist_data = [avg['avg'],avg['strike']]
group_label = ['Average','strike']
fig = ff.create_distplot(hist_data=hist_data,group_labels=group_label,bin_size=[10,20])
pyo.plot(fig)

'temp-plot.html'

### Heatmaps

In [42]:
six = delivery[delivery['batsman_runs']==6]
six = six.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()
six.rename(columns={'batting_team':'team','batsman_runs':'six'},inplace=True)
six

Unnamed: 0,team,over,six
0,Chennai Super Kings,0,9
1,Chennai Super Kings,1,36
2,Chennai Super Kings,2,67
3,Chennai Super Kings,3,71
4,Chennai Super Kings,4,75
...,...,...,...
371,Sunrisers Hyderabad,15,56
372,Sunrisers Hyderabad,16,59
373,Sunrisers Hyderabad,17,73
374,Sunrisers Hyderabad,18,94


In [43]:
# plot heatmap
trace = go.Heatmap(x= six['team'],y = six['over'], z = six['six'])
data = [trace]
layout = go.Layout(title="Six heatmaps")
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [44]:
dot = delivery[delivery['batsman_runs']==0]
dot = dot.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()
dot.rename(columns={'batting_team':'team','batsman_runs':'dot'},inplace=True)
dot

Unnamed: 0,team,over,dot
0,Chennai Super Kings,0,927
1,Chennai Super Kings,1,846
2,Chennai Super Kings,2,741
3,Chennai Super Kings,3,697
4,Chennai Super Kings,4,666
...,...,...,...
375,Sunrisers Hyderabad,15,376
376,Sunrisers Hyderabad,16,365
377,Sunrisers Hyderabad,17,340
378,Sunrisers Hyderabad,18,335


In [51]:
from  plotly import tools 
trace1 = go.Heatmap(x= six['team'],y = six['over'], z = six['six'].values.tolist())
trace2 = go.Heatmap(x= dot['team'],y = dot['over'], z = dot['dot'].values.tolist())
fig = tools.make_subplots(rows=1,cols=2,subplot_titles = ['6s','os'],shared_yaxes=True)
fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,1)
pyo.plot(fig)

'temp-plot.html'