# Plotly Graph Objects
### Session Objectives
- Introduction to Plotly
- Scatter Plots
- Line Charts
- Bar Charts
- Bubble plot
- Box Plot
- Histograms
- Distplots
- Heatmaps

In [146]:
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go

In [147]:
match = pd.read_csv('matches.csv')
delivery = pd.read_csv('deliveries.csv')

ipl = delivery.merge(match, left_on='match_id', right_on='id')
print(ipl.shape)
ipl.head()

(260920, 37)


Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen


## 1. Scatter Plots
![Ice Cream Scatter Plot](scatter_plot.png)

In [148]:
# Scatter plots are drawn between to continuous variables 
# Problem - We are going to draw a scatter plot between Batsman Avg(X axis) and
# Batsman Strike Rate (Y axis) of the top 50 batsman in IPL (All time)

In [149]:
# Avg Vs SR graph of Top 50 batsman (in terms of total runs)
top50 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.tolist()
top50

['V Kohli',
 'S Dhawan',
 'RG Sharma',
 'DA Warner',
 'SK Raina',
 'MS Dhoni',
 'AB de Villiers',
 'CH Gayle',
 'RV Uthappa',
 'KD Karthik',
 'KL Rahul',
 'AM Rahane',
 'F du Plessis',
 'SV Samson',
 'AT Rayudu',
 'G Gambhir',
 'SR Watson',
 'MK Pandey',
 'SA Yadav',
 'JC Buttler',
 'KA Pollard',
 'RR Pant',
 'YK Pathan',
 'Shubman Gill',
 'Q de Kock',
 'SS Iyer',
 'RA Jadeja',
 'WP Saha',
 'DA Miller',
 'BB McCullum',
 'PA Patel',
 'GJ Maxwell',
 'Yuvraj Singh',
 'V Sehwag',
 'MA Agarwal',
 'Ishan Kishan',
 'N Rana',
 'M Vijay',
 'HH Pandya',
 'SPD Smith',
 'SE Marsh',
 'AD Russell',
 'JH Kallis',
 'DR Smith',
 'RD Gaikwad',
 'SR Tendulkar',
 'RA Tripathi',
 'R Dravid',
 'KS Williamson',
 'AJ Finch']

In [150]:
# Fetching a new dataframe with top 50 batsman
new_ipl = ipl.query("batter in @top50")
new_ipl

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
5,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,6,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260763,1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,4,1,RA Tripathi,MA Starc,AK Markram,0,...,bat,Kolkata Knight Riders,wickets,8.0,114.0,20.0,N,,J Madanagopal,Nitin Menon
260764,1426312,1,Sunrisers Hyderabad,Kolkata Knight Riders,4,2,RA Tripathi,MA Starc,AK Markram,0,...,bat,Kolkata Knight Riders,wickets,8.0,114.0,20.0,N,,J Madanagopal,Nitin Menon
260910,1426312,2,Kolkata Knight Riders,Sunrisers Hyderabad,8,6,SS Iyer,Shahbaz Ahmed,VR Iyer,4,...,bat,Kolkata Knight Riders,wickets,8.0,114.0,20.0,N,,J Madanagopal,Nitin Menon
260915,1426312,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,5,SS Iyer,AK Markram,VR Iyer,1,...,bat,Kolkata Knight Riders,wickets,8.0,114.0,20.0,N,,J Madanagopal,Nitin Menon


In [151]:
# Calculating SR
# SR = [(number of runs scored) / (number of balls played)] * 100
runs = new_ipl.groupby('batter')['batsman_runs'].sum()
balls = new_ipl.groupby('batter')['batsman_runs'].count()

sr = (runs / balls) * 100

sr = sr.reset_index()
sr

Unnamed: 0,batter,batsman_runs
0,AB de Villiers,148.580442
1,AD Russell,164.224422
2,AJ Finch,123.349057
3,AM Rahane,120.32141
4,AT Rayudu,124.584527
5,BB McCullum,126.848592
6,CH Gayle,142.121729
7,DA Miller,134.684477
8,DA Warner,135.429986
9,DR Smith,132.279534


In [152]:
# Calculating Avg
# Avg = (Total number of Runs)/(Number of outs)

# Calculating number of outs for top 50 batsman
out = ipl.query('player_dismissed in @top50')
print(out.shape)
out.head()

(5785, 37)


Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
131,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,1,1,R Dravid,I Sharma,W Jaffer,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
138,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,2,2,V Kohli,AB Dinda,W Jaffer,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
154,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,4,5,JH Kallis,AB Agarkar,W Jaffer,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
242,335983,1,Chennai Super Kings,Kings XI Punjab,2,5,PA Patel,B Lee,ML Hayden,0,...,bat,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri
269,335983,1,Chennai Super Kings,Kings XI Punjab,7,1,MS Dhoni,JR Hopes,MEK Hussey,0,...,bat,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri


In [153]:
nouts = out.player_dismissed.value_counts()
nouts

player_dismissed
RG Sharma         223
V Kohli           207
S Dhawan          193
KD Karthik        184
RV Uthappa        180
SK Raina          171
DA Warner         164
AT Rayudu         155
AM Rahane         154
SV Samson         144
G Gambhir         136
MS Dhoni          134
MK Pandey         133
AB de Villiers    130
F du Plessis      127
PA Patel          126
SR Watson         126
CH Gayle          126
WP Saha           121
KA Pollard        121
MA Agarwal        117
SA Yadav          113
GJ Maxwell        112
Yuvraj Singh      111
YK Pathan         110
RA Jadeja         108
KL Rahul          105
BB McCullum       104
Q de Kock         102
M Vijay           101
V Sehwag           99
SS Iyer            98
JC Buttler         95
N Rana             93
Ishan Kishan       93
RR Pant            93
HH Pandya          89
AD Russell         86
Shubman Gill       85
JH Kallis          85
AJ Finch           84
DR Smith           84
RA Tripathi        83
DA Miller          82
R Dravid       

In [154]:
avg = runs / nouts

avg = avg.reset_index()
avg.rename(columns={'index': 'batter', 0: 'avg'}, inplace=True)

avg = avg.merge(sr, on='batter')
avg.rename(columns={'batsman_runs': 'strike_rate'}, inplace=True)
avg

Unnamed: 0,batter,avg,strike_rate
0,AB de Villiers,39.853846,148.580442
1,AD Russell,28.930233,164.224422
2,AJ Finch,24.904762,123.349057
3,AM Rahane,30.142857,120.32141
4,AT Rayudu,28.051613,124.584527
5,BB McCullum,27.711538,126.848592
6,CH Gayle,39.65873,142.121729
7,DA Miller,35.658537,134.684477
8,DA Warner,40.042683,135.429986
9,DR Smith,28.392857,132.279534


In [155]:
# Plot scatter Plot here
import plotly

trace1 = go.Scatter(x=avg.avg, y=avg.strike_rate, mode='markers', text=avg.batter,
                    marker={'color': '#b51c76', 'size': 8})

data = [trace1]
layout = go.Layout(title='Batsman Avg Vs SR',
                   xaxis={'title': 'Batsman Average'},
                   yaxis={'title': 'Batsman Strike Rate'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)
# pyo.plot(fig, filename='myfile.html')

## Line Chart

<p>It's an extension of Scatter plot. Usually used to show a time series data</p>
![Line Chart]()

In [156]:
# Year by Year batsman performance

single = ipl.query('batter == "V Kohli"')
performance = single.groupby('season')['batsman_runs'].sum().reset_index()
performance.rename(columns={'batsman_runs': 'total_runs'}, inplace=True)
print(performance)

single1 = ipl.query('batter == "MS Dhoni"')
performance1 = single1.groupby('season')['batsman_runs'].sum().reset_index()
performance1.rename(columns={'batsman_runs': 'total_runs'}, inplace=True)
# print(performance1)

     season  total_runs
0   2007/08         165
1      2009         246
2   2009/10         307
3      2011         557
4      2012         364
5      2013         639
6      2014         359
7      2015         505
8      2016         973
9      2017         308
10     2018         530
11     2019         464
12  2020/21         471
13     2021         405
14     2022         341
15     2023         639
16     2024         741


In [157]:
# Plot Line chart here

trace1 = go.Scatter(x=performance.season, y=performance.total_runs, mode='lines+markers',
                    marker={'color': '#b51c76', 'size': 12}, name='Virat Kohli')
trace2 = go.Scatter(x=performance1.season, y=performance1.total_runs, mode='lines',
                    marker={'color': '#bb8fce', 'size': 13}, name='MS Dhoni')

data = [trace1, trace2]
layout = go.Layout(title='Year by Year Performance',
                   xaxis={'title': 'Season'},
                   yaxis={'title': 'Total Runs'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

In [158]:
# Multiple Line Charts

def batsman_comp(*name):
    data = []
    for i in name:
        single = ipl.query(f'batter == "{i}"')
        performance = single.groupby('season')['batsman_runs'].sum().reset_index()

        trace = go.Scatter(x=performance.season, y=performance.batsman_runs, mode='lines + markers', name=i)

        data.append(trace)
    layout = go.Layout(title='Batsman Record Comparator',
                       xaxis={'title': 'Season'},
                       yaxis={'title': 'Runs'})

    fig = go.Figure(data=data, layout=layout)

    plotly.io.show(fig)

# def batsman_comp(*name):
#     data=[]
#     for i in name:
#         single=ipl[ipl['batsman']==i]
#         performance=single.groupby('season')['batsman_runs'].sum().reset_index()
# 
#         trace=go.Scatter(x=performance['season'],y=performance['batsman_runs']
#                          ,mode='lines + markers',name=i)
#         
#         data.append(trace)
#     
#     layout=go.Layout(title='Batsman Record Comparator',
#                 xaxis={'title':'Season'},
#                 yaxis={'title':'Runs'})
# 
#     fig=go.Figure(data=data,layout=layout)
# 
#     pyo.plot(fig,filename='year_by_year')

In [159]:
batsman_comp('V Kohli', 'RG Sharma', 'DA Warner', 'MS Dhoni')

## Bar plot

<p>Used to show relation between one categorical and 1 numerical data</p>
<img src="https://images.ctfassets.net/fevtq3bap7tj/5FSJrJeDIIGAmGCsGcQ8S4/e2fc867a487614b47f72104a36fbcf7f/simple-column.png"/>

In [160]:
top10 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.tolist()
top10

['V Kohli',
 'S Dhawan',
 'RG Sharma',
 'DA Warner',
 'SK Raina',
 'MS Dhoni',
 'AB de Villiers',
 'CH Gayle',
 'RV Uthappa',
 'KD Karthik']

In [161]:
top10_df = ipl.query('batter in @top10')
print(top10_df.shape)
top10_df.head()

(44492, 37)


Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
132,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,1,2,V Kohli,I Sharma,W Jaffer,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
133,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,1,3,V Kohli,I Sharma,W Jaffer,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
134,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,1,4,V Kohli,I Sharma,W Jaffer,1,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
137,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,2,1,V Kohli,AB Dinda,W Jaffer,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
138,335982,2,Royal Challengers Bangalore,Kolkata Knight Riders,2,2,V Kohli,AB Dinda,W Jaffer,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen


In [162]:
top10_score = top10_df.groupby('batter')['batsman_runs'].sum().reset_index()
top10_score

Unnamed: 0,batter,batsman_runs
0,AB de Villiers,5181
1,CH Gayle,4997
2,DA Warner,6567
3,KD Karthik,4843
4,MS Dhoni,5243
5,RG Sharma,6630
6,RV Uthappa,4954
7,S Dhawan,6769
8,SK Raina,5536
9,V Kohli,8014


In [163]:
# Plot Bar Graph

trace = go.Bar(x=top10_score.batter, y=top10_score.batsman_runs, marker={'color': '#3dd4dc'})

data = [trace]
layout = go.Layout(title='Top 10 IPL Batsman',
                   xaxis={'title': 'Batsman'},
                   yaxis={'title': 'Total Runs'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

## There are 3 types of Bar graphs
1. Nested Bar Graph
2. Stacked Bar Graph
3. Overlayed Bar Graph

In [164]:
iw = top10_df.groupby(['batter', 'inning'])['batsman_runs'].sum().reset_index()

one = iw.season_wise_2008('inning==1')
two = iw.season_wise_2008('inning==2')

one.rename(columns={'batsman_runs': '1st Innings'}, inplace=True)
two.rename(columns={'batsman_runs': '2nd Innings'}, inplace=True)

final = one.merge(two, on='batter')[['batter', '1st Innings', '2nd Innings']]
final

AttributeError: 'DataFrame' object has no attribute 'season_wise_2008'

In [None]:
trace1 = go.Bar(x=final.batter, y=final['1st Innings'], name='1st Innings', marker={'color': '#bb8fce'})
trace2 = go.Bar(x=final.batter, y=final['2nd Innings'], name='2nd Innings', marker={'color': '#dca43d'})

data = [trace1, trace2]
layout = go.Layout(title='Inning Wise Scores',
                   xaxis={'title': 'Batsman'},
                   yaxis={'title': 'Runs'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

In [None]:
trace1 = go.Bar(x=final.batter, y=final['1st Innings'], name='1st Innings', marker={'color': '#bb8fce'})
trace2 = go.Bar(x=final.batter, y=final['2nd Innings'], name='2nd Innings', marker={'color': '#dca43d'})

data = [trace1, trace2]
layout = go.Layout(title='Inning Wise Scores',
                   xaxis={'title': 'Batsman'},
                   yaxis={'title': 'Runs'},
                   barmode='stack')

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

In [None]:
trace1 = go.Bar(x=final.batter, y=final['1st Innings'], name='1st Innings', marker={'color': '#bb8fce'})
trace2 = go.Bar(x=final.batter, y=final['2nd Innings'], name='2nd Innings', marker={'color': '#dca43d'})

data = [trace1, trace2]
layout = go.Layout(title='Inning Wise Scores',
                   xaxis={'title': 'Batsman'},
                   yaxis={'title': 'Runs'},
                   barmode='overlay')

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

## Bubble Plot
<p>Again an extension of Scatter plot. with some additional informations</p>
<img src="https://www.data-to-viz.com/graph/bubble_files/figure-html/unnamed-chunk-1-1.png"/>


In [None]:
print(new_ipl.shape)
new_ipl.head()

In [None]:
new_ipl = new_ipl.query('batsman_runs == 6')
new_ipl.head()

In [None]:
six = new_ipl.groupby('batter')['batsman_runs'].count().reset_index()
six.rename(columns={'batsman_runs': 'total_career_sixes'}, inplace=True)
six

In [None]:
avg.head()

In [None]:
x = avg.merge(six, on='batter')
x

In [165]:
# Bubble plot
trace = go.Scatter(x=x.avg, y=x.strike_rate, mode='markers',
                   marker={'size': x.total_career_sixes})

data = [trace]
layout = go.Layout(title='Bubble Chart',
                   xaxis={'title': 'Average'},
                   yaxis={'title': 'Strike_Rate'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

## Box Plot 

<p>A box and whisker plot—also called a box plot—displays the five-number summary of a set of data.</p>
<img src="https://www.simplypsychology.org/wp-content/uploads/boxplot-outliers.png"/>

In [166]:
match_agg = delivery.groupby('match_id')['total_runs'].sum().reset_index()
match_agg

Unnamed: 0,match_id,total_runs
0,335982,304
1,335983,447
2,335984,261
3,335985,331
4,335986,222
...,...,...
1090,1426307,429
1091,1426309,323
1092,1426310,346
1093,1426311,314


In [167]:
season_wise = match_agg.merge(match, left_on='match_id', right_on='id')[['match_id', 'total_runs', 'season']]
print(season_wise.head(1), '\n')
print(season_wise.tail(1))

   match_id  total_runs   season
0    335982         304  2007/08 

      match_id  total_runs season
1094   1426312         227   2024


In [168]:
# box plot

trace = go.Box(x=season_wise.total_runs, name='All Seasons', marker={'color': '#b51c76'})

data = [trace]
layout = go.Layout(title='Total Score Analysis',
                   xaxis={'title': 'Total Score'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

In [169]:
season_wise['season'].unique()

array(['2007/08', '2009', '2009/10', '2011', '2012', '2013', '2014',
       '2015', '2016', '2017', '2018', '2019', '2020/21', '2021', '2022',
       '2023', '2024'], dtype=object)

In [170]:
# 2 box plots of seasons 2024 and 2008
trace1 = go.Box(x=season_wise.query('season == "2024"').total_runs, name='2024', marker={'color': 'orange'})
trace2 = go.Box(x=season_wise.query('season == "2007/08"').total_runs, name='2008', marker={'color': 'yellow'})
trace3 = go.Box(x=season_wise.query('season == "2018"').total_runs, name='2018', marker={'color': 'blue'})

data = [trace1, trace2, trace3]
layout = go.Layout(title='Total Score Analysis',
                   xaxis={'title': 'Total Score'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

## 7. Distplots

<p></p>
<img src="https://plot.ly/~PythonPlotBot/10/customized-distplot.png"/>

In [171]:
# Plot Distplot

import plotly.figure_factory as ff

avg.head()

Unnamed: 0,batter,avg,strike_rate
0,AB de Villiers,39.853846,148.580442
1,AD Russell,28.930233,164.224422
2,AJ Finch,24.904762,123.349057
3,AM Rahane,30.142857,120.32141
4,AT Rayudu,28.051613,124.584527


In [172]:
hist_data = [avg.avg]

group_labels = ['Average']

fig = ff.create_distplot(hist_data, group_labels)

plotly.io.show(fig)

In [173]:
hist_data = [avg.avg, avg.strike_rate]

group_labels = ['Average', 'Strike Rate']

fig = ff.create_distplot(hist_data, group_labels, bin_size=[10, 20])

plotly.io.show(fig)

## 6. Histograms

<p>A histogram is a plot that lets you discover, and show, the underlying frequency distribution (shape) of a set of continuous data.</p>

<img src="https://www.math-only-math.com/images/histogram-problems.png"/>

In [175]:
x = delivery.groupby('batter')['batsman_runs'].count() > 150
x = x[x].index.tolist()
x

['A Ashish Reddy',
 'A Badoni',
 'A Manohar',
 'A Mishra',
 'A Symonds',
 'AA Jhunjhunwala',
 'AB Agarkar',
 'AB de Villiers',
 'AC Gilchrist',
 'AD Mathews',
 'AD Russell',
 'AJ Finch',
 'AK Markram',
 'AL Menaria',
 'AM Nayar',
 'AM Rahane',
 'AP Tare',
 'AR Patel',
 'AS Raut',
 'AT Rayudu',
 'Abdul Samad',
 'Abhishek Sharma',
 'Abishek Porel',
 'Anuj Rawat',
 'Atharva Taide',
 'Azhar Mahmood',
 'B Chipli',
 'B Kumar',
 'B Sai Sudharsan',
 'BA Stokes',
 'BB McCullum',
 'BJ Hodge',
 'C Green',
 'C de Grandhomme',
 'CA Ingram',
 'CA Lynn',
 'CA Pujara',
 'CH Gayle',
 'CH Morris',
 'CJ Anderson',
 'CL White',
 'CM Gautam',
 'D Brevis',
 'D Padikkal',
 'DA Miller',
 'DA Warner',
 'DB Das',
 'DB Ravi Teja',
 'DJ Bravo',
 'DJ Hooda',
 'DJ Hussey',
 'DJ Mitchell',
 'DJG Sammy',
 'DP Conway',
 'DPMD Jayawardene',
 'DR Smith',
 'DT Christian',
 'DW Steyn',
 'Dhruv Jurel',
 'E Lewis',
 'EJG Morgan',
 'F du Plessis',
 'FY Fazal',
 'G Gambhir',
 'GC Smith',
 'GH Vihari',
 'GJ Bailey',
 'GJ Maxwe

In [176]:
new = delivery.query('batter in @x')
new.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,legbyes,0,,,
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,wides,0,,,
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,


In [180]:
runs = new.groupby('batter')['batsman_runs'].sum()
balls = new.groupby('batter')['batsman_runs'].count()

sr = (runs / balls) * 100
sr = sr.reset_index()
sr.rename(columns={'batsman_runs': 'strike_rate'}, inplace=True)
sr

Unnamed: 0,batter,strike_rate
0,A Ashish Reddy,142.857143
1,A Badoni,125.544554
2,A Manohar,127.624309
3,A Mishra,86.590909
4,A Symonds,124.711908
...,...,...
235,Y Venugopal Rao,113.872832
236,YBK Jaiswal,146.757991
237,YK Pathan,138.046272
238,YV Takawale,104.918033


In [188]:
# Plot histogram
trace = go.Histogram(x=sr.strike_rate, xbins={'size': 2, 'start': 50, 'end': 100})

data = [trace]

layout = go.Layout(title='Strike Rate Variations',
                   xaxis={'title': 'Strike Rates'})

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

## 8. Heatmaps

<p>A heat map is a graphical representation of data where the individual values contained in a matrix are represented as colors.</p>

<img src="https://seaborn.pydata.org/_images/spreadsheet_heatmap.png"/>

In [192]:
six = delivery.query('batsman_runs == 6')
six = six.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()
six.rename(columns={'batsman_runs': 'no_of_sixes'}, inplace=True)
six.head()

Unnamed: 0,batting_team,over,no_of_sixes
0,Chennai Super Kings,0,9
1,Chennai Super Kings,1,36
2,Chennai Super Kings,2,67
3,Chennai Super Kings,3,71
4,Chennai Super Kings,4,75


In [196]:
# Plot Heatmap

trace = go.Heatmap(x=six.batting_team, y=six.over, z=six.no_of_sixes)

data = [trace]
layout = go.Layout(title='Six Heatmap')

fig = go.Figure(data=data, layout=layout)

plotly.io.show(fig)

In [198]:
# Side by Side Heatmap

dots = delivery.query('batsman_runs == 0')
dots = dots.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()
dots.rename(columns={'batsman_runs': 'no_of_dots'}, inplace=True)
dots

Unnamed: 0,batting_team,over,no_of_dots
0,Chennai Super Kings,0,927
1,Chennai Super Kings,1,846
2,Chennai Super Kings,2,741
3,Chennai Super Kings,3,697
4,Chennai Super Kings,4,666
...,...,...,...
375,Sunrisers Hyderabad,15,376
376,Sunrisers Hyderabad,16,365
377,Sunrisers Hyderabad,17,340
378,Sunrisers Hyderabad,18,335


In [201]:
from plotly import tools

trace1 = go.Heatmap(x=six.batting_team, y=six.over, z=six.no_of_sixes.values.tolist())
trace2 = go.Heatmap(x=dots.batting_team, y=dots.over, z=dots.no_of_dots.values.tolist())

fig = tools.make_subplots(rows=1, cols=2, subplot_titles=["6's", "0's"], shared_yaxes=True)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)

plotly.io.show(fig)


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead

