![KKR_RR.png](attachment:KKR_RR.png)

In [1]:
#!pip install bqplot
#!pip install ipywidgets
#!pip install ipympl
#!jupyter labextension install @jupyter-widgets/jupyterlab-manager
#!jupyter nbextension install --user --py widgetsnbextension
#!jupyter nbextension enable --user --py widgetsnbextension
#!pip install plotly
#!pip install cufflinks


import operator
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
import ipywidgets as widgets
import plotly.express as px
import plotly.graph_objects as go
import bqplot
from bqplot import Pie
from collections import Counter
from bqplot import DateScale, LinearScale, Lines, Axis, Figure
from bqplot import Tooltip
import cufflinks as cf
from matplotlib.patches import ConnectionPatch
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

from IPython.core.display import display, HTML

def display_side_by_side(dfs:list, captions:list):
    """Display tables side by side to save vertical space
    Input:
        dfs: list of pandas.DataFrame
        captions: list of table captions
    """
    output = ""
    combined = dict(zip(captions, dfs))
    for caption, df in combined.items():
        output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
        output += "\xa0\xa0\xa0"
    display(HTML(output))


from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
pd.set_option('display.max_columns', 50)
%matplotlib widget


In [2]:
matches = pd.read_csv("matches.csv")
deliveries =  pd.read_csv("deliveries.csv")
IPL_data = pd.merge(deliveries, matches[['id','season', 'team1', 'team2', 'toss_winner', 'toss_decision','winner','result', 'dl_applied','win_by_runs','win_by_wickets', 'venue' ]], 
                    left_on='match_id', right_on='id')

In [4]:
#KKR batsmen
KKR_batsmen = ['S Gill', 'N Rana', 'RA Tripathi',  'KD Karthik', 'EJG Morgan', 'SP Narine', 'PJ Cummins']
# SP Narine/AD Russell(optional)

#KKR bowlers
KKR_bowlers = ['PJ Cummins',  'P Krishna' , 'LH Ferguson', 'V Chakravarthy', 'SP Narine']
# nagarkoti no data

#RR
RR_bowlers = ['J Archer' , 'AS Rajpoot', 'S Gopal' , 'R Tewatia', 'BA Stokes', 'S Tyagi']
RR_batsmen = ['RV Uthappa','BA Stokes', 'SV Samson', 'SPD Smith', 'JC Buttler', 'R Tewatia','R Parag','J Archer']


![q1.png](attachment:q1.png)

In [165]:

bowling_avg = deliveries[(deliveries.bowler.isin(KKR_bowlers)) | (deliveries.bowler.isin(RR_bowlers))]
bowling_avg.player_dismissed.fillna(0, inplace  = True)
bowling_avg['player_dismissed'].loc[bowling_avg['player_dismissed'] != 0] = 1

b_avg1 = bowling_avg.groupby(['bowler'])['player_dismissed'].sum().reset_index(name  = "wickets")
t_b =  bowling_avg.groupby(['bowler'])['batsman_runs'].count().reset_index(name  = "balls_bowled")
b_avg1["balls_bowled"] = t_b.balls_bowled
b_avg1["bowling_avg"] = b_avg1.wickets/b_avg1.balls_bowled


display_side_by_side([b_avg1[b_avg1.bowler.isin(KKR_bowlers)], b_avg1[b_avg1.bowler.isin(RR_bowlers)]], 
                     [" 1) bowling average of KKR bowlers", " 2) bowling average of RR bowlers"])


## Batting averages of all the batsmen

batsmen_runs_all =deliveries[(deliveries.batsman.isin(KKR_batsmen)) | (deliveries.batsman.isin(RR_batsmen))]
b1_all = batsmen_runs_all.groupby(['batsman'])['batsman_runs'].sum().reset_index(name = "total_runs")
b = [];b_avg = [];tm = [];runs  = []
for i in batsmen_runs_all.batsman.unique():
    x = batsmen_runs_all[batsmen_runs_all.batsman == i]
    x.player_dismissed.fillna(0, inplace = True)
    x['player_dismissed'].loc[x['player_dismissed'] != 0] = 1
    #print(x[['match_id','player_dismissed']])
    y = x.groupby(['match_id'])['player_dismissed'].sum().reset_index(name = "out_count")
    z = x.groupby(['match_id'])['batsman_runs'].sum().reset_index(name = "runs")
    #print(y)
    b.append(i)
    b_avg.append(y.out_count.sum())
    runs.append(z.runs.sum())
    tm.append(y.shape[0])
    
bat_avg = pd.DataFrame(list(zip(b, b_avg, tm, runs)), 
               columns =['batsman','matches_out','matches', 'total_runs'])    

bat_avg["batting_average"] = bat_avg.total_runs/bat_avg.matches_out

display_side_by_side([bat_avg[bat_avg.batsman.isin(KKR_batsmen)], bat_avg[bat_avg.batsman.isin(RR_batsmen)]],
                     [" 1) batting average of KKR batsmen", " 2) batting average of RR batsmen"])

Unnamed: 0,bowler,wickets,balls_bowled,bowling_avg
3,LH Ferguson,5,87,0.057471
4,P Krishna,16,437,0.036613
5,PJ Cummins,19,379,0.050132
9,SP Narine,137,2600,0.052692
10,V Chakravarthy,1,18,0.055556

Unnamed: 0,bowler,wickets,balls_bowled,bowling_avg
0,AS Rajpoot,22,452,0.048673
1,BA Stokes,29,615,0.047154
2,J Archer,27,514,0.052529
6,R Tewatia,16,301,0.053156
7,S Gopal,39,593,0.065767
8,S Tyagi,6,220,0.027273


Unnamed: 0,batsman,matches_out,matches,total_runs,batting_average
1,N Rana,40,41,1104,27.6
4,KD Karthik,140,162,3688,26.342857
6,PJ Cummins,6,9,77,12.833333
8,SP Narine,40,54,803,20.075
9,RA Tripathi,27,33,771,28.555556
10,EJG Morgan,38,45,854,22.473684
12,S Gill,14,23,517,36.928571

Unnamed: 0,batsman,matches_out,matches,total_runs,batting_average
0,JC Buttler,38,45,1431,37.657895
2,SPD Smith,59,72,2047,34.694915
3,BA Stokes,30,32,647,21.566667
5,SV Samson,79,88,2235,28.291139
7,RV Uthappa,157,170,4446,28.318471
11,R Tewatia,7,12,119,17.0
13,J Archer,7,11,85,12.142857
14,R Parag,4,5,169,42.25


In [168]:
fig11 = px.bar(b_avg1, x="bowler", y='bowling_avg',
             hover_data=['bowler', 'bowling_avg', 'balls_bowled','wickets' ],color = 'bowler',
             title='Bowling averages of KKR and RR bowlers')
#fig11.show()

fig22 = px.bar(bat_avg, x="batsman", y='batting_average',
             hover_data=['batsman', 'batting_average', 'total_runs','matches', 'matches_out' ],color = 'batsman',
             title='Batting averages of KKR and RR batsmen')
#fig22.show()

![bowl_avg.png](attachment:bowl_avg.png)

![bat_avg.png](attachment:bat_avg.png)

## Performance indexes of batsmen
![image.png](attachment:image.png)

In [169]:
# Performance indexes of KKR batsmen against RR bowlers
batsman = [];bowler =[]; b_avg= []; runs= []; tm = []
def performance_indexes_batsmen(batsman_list, bowler_list):
    
    for i in batsman_list:
        pi1 = deliveries[(deliveries.batsman == i) & (deliveries.bowler.isin(bowler_list))]
        pi1.player_dismissed.fillna(0, inplace = True)
        pi1['player_dismissed'].loc[pi1['player_dismissed'] != 0] = 1
        for j in pi1.bowler.unique():
            x = pi1[pi1.bowler == j]
            batsman.append(i)
            bowler.append(j)
            y = x.groupby(['match_id'])['player_dismissed'].sum().reset_index(name = "out_count")
            z = x.groupby(['match_id'])['batsman_runs'].sum().reset_index(name = "runs")
            b_avg.append(y.out_count.sum())
            runs.append(z.runs.sum())
            tm.append(y.shape[0])
            
performance_indexes_batsmen(KKR_batsmen, RR_bowlers)
KKR_bat_avg = pd.DataFrame(list(zip(batsman, bowler, b_avg, tm, runs)), 
               columns =['batsman','bowler', 'matches_out','total_matches', 'total_runs'])      
KKR_bat_avg["batting_average"] = KKR_bat_avg.total_runs/KKR_bat_avg.matches_out
KKR_bat_avg.replace([np.inf, -np.inf], np.nan, inplace=True)   

bowl_avg = []
for i in KKR_bat_avg.bowler.values:
    bowl_avg.append(b_avg1[b_avg1.bowler == i].bowling_avg.values[0])
    
KKR_bat_avg["bowling_average_bowler"] = bowl_avg
KKR_bat_avg["PIBij"] = KKR_bat_avg.batting_average/KKR_bat_avg.bowling_average_bowler # not multiplying with 100 as bowling average is not in %

#############################################################################################################33
# RR batsmen's performance indexes
batsman = [];bowler =[]; b_avg= []; runs= []; tm = []
performance_indexes_batsmen(RR_batsmen, KKR_bowlers)

RR_bat_avg = pd.DataFrame(list(zip(batsman, bowler, b_avg, tm, runs)), 
               columns =['batsman','bowler', 'matches_out','total_matches', 'total_runs'])      
RR_bat_avg["batting_average"] = RR_bat_avg.total_runs/RR_bat_avg.matches_out
RR_bat_avg.replace([np.inf, -np.inf], np.nan, inplace=True)   

bowl_avg1 = []
for i in RR_bat_avg.bowler.values:
    bowl_avg1.append(b_avg1[b_avg1.bowler == i].bowling_avg.values[0])
    
RR_bat_avg["bowling_average_bowler"] = bowl_avg1
RR_bat_avg["PIBij"] = RR_bat_avg.batting_average/RR_bat_avg.bowling_average_bowler # not multiplying with 100 as bowling average is not in %
display_side_by_side([RR_bat_avg, KKR_bat_avg],
                     [" 1) Performance indexes of RR batsmen against KKR Bowlers", " 2) Performance indexes of KKR batsmen against RR Bowlers"])

Unnamed: 0,batsman,bowler,matches_out,total_matches,total_runs,batting_average,bowling_average_bowler,PIBij
0,RV Uthappa,PJ Cummins,1,2,18,18.0,0.050132,359.052632
1,RV Uthappa,SP Narine,0,2,2,,0.052692,
2,RV Uthappa,V Chakravarthy,0,1,10,,0.055556,
3,BA Stokes,PJ Cummins,1,2,8,8.0,0.050132,159.578947
4,BA Stokes,SP Narine,1,4,17,17.0,0.052692,322.627737
5,BA Stokes,P Krishna,0,1,6,,0.036613,
6,SV Samson,SP Narine,3,9,39,13.0,0.052692,246.715328
7,SV Samson,P Krishna,0,2,12,,0.036613,
8,SPD Smith,SP Narine,2,6,55,27.5,0.052692,521.89781
9,SPD Smith,P Krishna,0,1,19,,0.036613,

Unnamed: 0,batsman,bowler,matches_out,total_matches,total_runs,batting_average,bowling_average_bowler,PIBij
0,S Gill,R Tewatia,1,2,1,1.0,0.053156,18.8125
1,S Gill,AS Rajpoot,0,1,8,,0.048673,
2,S Gill,S Gopal,0,2,17,,0.065767,
3,S Gill,J Archer,1,2,6,6.0,0.052529,114.222222
4,S Gill,BA Stokes,0,1,6,,0.047154,
5,N Rana,BA Stokes,0,4,17,,0.047154,
6,N Rana,AS Rajpoot,1,1,11,11.0,0.048673,226.0
7,N Rana,R Tewatia,0,2,8,,0.053156,
8,N Rana,S Gopal,1,2,5,5.0,0.065767,76.025641
9,N Rana,J Archer,1,3,9,9.0,0.052529,171.333333


## Performance indexes for both the teams bowlers
![image.png](attachment:image.png)

In [170]:
bowls = []; bats = []; wi = []; bb = []
def performance_index_bowlers(bowlers_list, batsmen_list):
    for i in bowlers_list:
        p = deliveries[(deliveries.bowler == i) & (deliveries.batsman.isin(batsmen_list))]
        p.player_dismissed.fillna(0, inplace  = True)
        p['player_dismissed'].loc[p['player_dismissed'] != 0] = 1
        for j in p.batsman.unique():
            bowls.append(i)
            bats.append(j)
            xy = p[p.batsman == j]
            #print(p[p.batsman == j])
            wi.append(xy.player_dismissed.sum())
            bb.append(xy.batsman_runs.count())
        
performance_index_bowlers(KKR_bowlers, RR_batsmen)
KKR_bowlers_p = pd.DataFrame(list(zip(bowls, bats, wi, bb)), 
               columns =['bowler', 'batsman','wickets','balls_bowled'])  
bat_avg_p = []
for i in KKR_bowlers_p.batsman.values:
    bat_avg_p.append(bat_avg[bat_avg.batsman == i].batting_average.values[0])
    
KKR_bowlers_p["bowling_average"] = KKR_bowlers_p.wickets/KKR_bowlers_p.balls_bowled
KKR_bowlers_p["batting_avg"] = bat_avg_p
KKR_bowlers_p["PIBoij"] = KKR_bowlers_p.batting_avg/KKR_bowlers_p.bowling_average
KKR_bowlers_p.replace([np.inf, -np.inf], np.nan, inplace=True)
###################################################################################################################
bowls = []; bats = []; wi = []; bb = []        
performance_index_bowlers(RR_bowlers, KKR_batsmen)

RR_bowlers_p = pd.DataFrame(list(zip(bowls, bats, wi, bb)), 
               columns =['bowler', 'batsman','wickets','balls_bowled'])  
bat_avg_p = []
for i in RR_bowlers_p.batsman.values:
    bat_avg_p.append(bat_avg[bat_avg.batsman == i].batting_average.values[0])
    
RR_bowlers_p["bowling_average"] = RR_bowlers_p.wickets/RR_bowlers_p.balls_bowled
RR_bowlers_p["batting_avg"] = bat_avg_p
RR_bowlers_p["PIBoij"] = RR_bowlers_p.batting_avg/RR_bowlers_p.bowling_average
RR_bowlers_p.replace([np.inf, -np.inf], np.nan, inplace=True)

display_side_by_side([KKR_bowlers_p, RR_bowlers_p],
                     [" 1) Performance indexes of KKR bowlers against RR batsmen", " 2) Performance indexes of RR bowlers against KKR batsmen"])

Unnamed: 0,bowler,batsman,wickets,balls_bowled,bowling_average,batting_avg,PIBoij
0,PJ Cummins,BA Stokes,1,11,0.090909,21.566667,237.233333
1,PJ Cummins,RV Uthappa,1,11,0.090909,28.318471,311.503185
2,P Krishna,JC Buttler,0,11,0.0,37.657895,
3,P Krishna,SV Samson,0,7,0.0,28.291139,
4,P Krishna,J Archer,0,11,0.0,12.142857,
5,P Krishna,SPD Smith,0,11,0.0,34.694915,
6,P Krishna,BA Stokes,0,7,0.0,21.566667,
7,P Krishna,R Parag,0,6,0.0,42.25,
8,V Chakravarthy,RV Uthappa,0,7,0.0,28.318471,
9,SP Narine,JC Buttler,1,26,0.038462,37.657895,979.105263

Unnamed: 0,bowler,batsman,wickets,balls_bowled,bowling_average,batting_avg,PIBoij
0,J Archer,N Rana,1,12,0.083333,27.6,331.2
1,J Archer,KD Karthik,1,25,0.04,26.342857,658.571429
2,J Archer,S Gill,1,5,0.2,36.928571,184.642857
3,J Archer,SP Narine,0,1,0.0,20.075,
4,AS Rajpoot,N Rana,1,8,0.125,27.6,220.8
5,AS Rajpoot,KD Karthik,0,14,0.0,26.342857,
6,AS Rajpoot,SP Narine,0,2,0.0,20.075,
7,AS Rajpoot,S Gill,0,5,0.0,36.928571,
8,AS Rajpoot,RA Tripathi,0,2,0.0,28.555556,
9,S Gopal,KD Karthik,0,30,0.0,26.342857,


## Conclusion : By comparing above results it seems RR will win today

![q2.png](attachment:q2.png)

## Function to calculate wickets taken

In [38]:
def wickets_taken(wickets_data_matchwise, match_data):
    for i in wickets_data_matchwise.bowler.unique():
        bowler.append(i)
        wi.append(round(wickets_data_matchwise[wickets_data_matchwise.bowler ==i].wickets.mean(),1))
        more_wickets.append(len(wickets_data_matchwise[wickets_data_matchwise.bowler ==i][wickets_data_matchwise.wickets >1].match_id.unique()))
        wicket_matches.append(len(wickets_data_matchwise[wickets_data_matchwise.bowler ==i].match_id.unique()))
        
        t_matches.append(len(match_data[match_data.bowler == i].match_id.unique()))
        
    wickets_data = pd.DataFrame(list(zip(bowler, wi, t_matches, wicket_matches, more_wickets)), 
               columns =['bowler','wickets','matches', 'wicket_matches', "> 1 wicket"])
    wickets_data["wickets %"] = (wickets_data.wicket_matches/wickets_data.matches)*100
    wickets_data["more than 1 wickets %"]= (wickets_data["> 1 wicket"]/wickets_data.wicket_matches)*100

    return wickets_data

## Overall wickets taken by RR and KKR bowlers in IPL

In [172]:
wickets = deliveries[(deliveries.bowler.isin(KKR_bowlers)) | (deliveries.bowler.isin(RR_bowlers))]
wickets.player_dismissed.fillna(0, inplace = True)
wickets_RR_KKR = wickets[(wickets.player_dismissed != 0) & (wickets.dismissal_kind != 'run out')].groupby(['match_id','bowler'])['player_dismissed'].count().reset_index(name = "wickets")

bowler=[];wi=[];wicket_matches=[];t_matches=[];more_wickets=[]
wickets_data_RR_KKR = wickets_taken(wickets_RR_KKR, wickets)
import numpy as np
import cufflinks as cf
cf.set_config_file(theme='pearl',sharing='public',offline=True)
wickets_data_RR_KKR.iplot(kind="bar",x = "bowler",
                  subplots=False,
                  sortbars=True,
                  keys = ["wickets","matches", "wicket_matches", "> 1 wicket", "wickets %", "more than 1 wickets %"],
                  title="Wickets stats for RR and KKR bowlers",
                  theme="henanigans"
                  )


wickets_data_RR_KKR

Unnamed: 0,bowler,wickets,matches,wicket_matches,> 1 wicket,wickets %,more than 1 wickets %
0,BA Stokes,1.7,30,15,8,50.0,53.333333
1,PJ Cummins,1.5,16,11,6,68.75,54.545455
2,AS Rajpoot,1.7,23,13,5,56.521739,38.461538
3,SP Narine,1.7,109,72,30,66.055046,41.666667
4,LH Ferguson,1.5,4,2,1,50.0,50.0
5,R Tewatia,1.4,18,10,3,55.555556,30.0
6,S Tyagi,1.2,14,5,1,35.714286,20.0
7,S Gopal,2.0,30,19,13,63.333333,68.421053
8,J Archer,1.9,21,14,8,66.666667,57.142857
9,P Krishna,1.6,18,9,3,50.0,33.333333


![w1.png](attachment:w1.png)

In [40]:
print("total wickets will be : ",wickets_data_RR_KKR[wickets_data_RR_KKR["wickets %"] >=50]["wickets %"].count() + 1) # adding 1 as Kamalesh nagarkoti's data not available

total wickets will be :  11


## Head to head records

In [173]:
wickets_faceoff = deliveries[((deliveries.bowler.isin(KKR_bowlers)) & (deliveries.batsman.isin(RR_batsmen))) | 
                             ((deliveries.bowler.isin(RR_bowlers)) & (deliveries.batsman.isin(KKR_batsmen)))]
wickets_faceoff.player_dismissed.fillna(0, inplace = True)
wickets_RR_KKR_faceoff = wickets_faceoff[(wickets_faceoff.player_dismissed != 0) & (wickets_faceoff.dismissal_kind != 'run out')].groupby(['match_id','bowler'])['player_dismissed'].count().reset_index(name = "wickets")

bowler=[];wi=[];wicket_matches=[];t_matches=[];more_wickets=[]
wickets_data_RR_KKR_faceoff = wickets_taken(wickets_RR_KKR_faceoff, wickets_faceoff)
import numpy as np
import cufflinks as cf

cf.set_config_file(theme='pearl',sharing='public',offline=True)
wickets_data_RR_KKR_faceoff.iplot(kind="bar",x = "bowler",
                  subplots=False,
                  sortbars=True,
                  keys = ["wickets","matches", "wicket_matches", "> 1 wicket", "wickets %", "more than 1 wickets %"],
                  title="Wickets stats for RR and KKR bowlers head to head records",
                  theme="henanigans"
                  )

wickets_data_RR_KKR_faceoff

Unnamed: 0,bowler,wickets,matches,wicket_matches,> 1 wicket,wickets %,more than 1 wickets %
0,AS Rajpoot,1.0,5,1,0,20.0,0.0
1,PJ Cummins,1.0,4,2,0,50.0,0.0
2,SP Narine,1.0,19,7,0,36.842105,0.0
3,BA Stokes,1.0,8,2,0,25.0,0.0
4,R Tewatia,1.0,5,1,0,20.0,0.0
5,J Archer,2.0,4,1,1,25.0,100.0
6,S Gopal,1.0,5,2,0,40.0,0.0


![w2.png](attachment:w2.png)

## Conclusion :  11-15 wickets will be taken in today's match

![q3.png](attachment:q3.png)

## methods to calculate ratio overall and matchwise average

In [67]:
# Overall performnace in IPL
def runs_off_boundaries(total_score, batsmen_runs, runs_123s):
    off_boundaries = total_score.groupby(['batsman'])['batsman_runs'].sum().reset_index(name = "total_score")
    batsmen_boundaries = batsmen_runs.groupby(['batsman'])['batsman_runs'].sum().reset_index(name = "runs_offboundaries")
    runs_1s_2s_3s = runs_123s.groupby(['batsman'])['batsman_runs'].sum().reset_index(name = "runs_1s2s3s")
    batsmen_balls_faced = total_score.groupby(['batsman'])['batsman_runs'].count().reset_index(name = "balls_faced")
    
    off_boundaries["runs_offboundaries"] = batsmen_boundaries.runs_offboundaries
    off_boundaries["runs_1s2s3s"] = runs_1s_2s_3s.runs_1s2s3s
    
    off_boundaries["BTR"] = (off_boundaries.runs_offboundaries - off_boundaries.runs_1s2s3s)/off_boundaries.total_score
    
    off_boundaries["strike_rate"] = off_boundaries.total_score*100/batsmen_balls_faced.balls_faced
    off_boundaries.sort_values(by=['BTR'], inplace=True, ascending=False)
    return off_boundaries
###############################################################################################################################
# Matchwise average
def matchwise_runs_off_boundaries(m_total_score, m_batsmen_runs, m_runs_123s):
    matchwise_off_boundaries = m_total_score.groupby(['match_id','batsman'])['batsman_runs'].sum().reset_index(name = "total_score")
    matchwise_batsmen_boundaries = m_batsmen_runs.groupby(['match_id','batsman'])['batsman_runs'].sum().reset_index(name = "runs_offboundaries")
    matchwise_runs_1s_2s_3s = m_runs_123s.groupby(['match_id','batsman'])['batsman_runs'].sum().reset_index(name = "runs_1s2s3s")
    matchwise_balls_faced = m_total_score.groupby(['match_id','batsman'])['batsman_runs'].count().reset_index(name = "balls_faced")
    
    matchwise_off_boundaries["runs_offboundaries"] = matchwise_batsmen_boundaries.runs_offboundaries
    matchwise_off_boundaries["runs_1s2s3s"] = matchwise_runs_1s_2s_3s.runs_1s2s3s
    matchwise_off_boundaries["strike_rate"] = matchwise_off_boundaries.total_score*100/matchwise_balls_faced.balls_faced
    matchwise_off_boundaries.runs_offboundaries.fillna(0, inplace = True)
    bats = [];tscore = [];of_boundaries =[];strike_rate = []; s2s3s=[]
    
    for i in matchwise_off_boundaries.batsman.unique():
        batsmen_boundaries_data = matchwise_off_boundaries[matchwise_off_boundaries.batsman == i]
        bats.append(i)
        tscore.append(batsmen_boundaries_data.total_score.mean())
        strike_rate.append(batsmen_boundaries_data.strike_rate.mean())
        of_boundaries.append(batsmen_boundaries_data.runs_offboundaries.mean())
        s2s3s.append(batsmen_boundaries_data.runs_1s2s3s.mean())
        
    BTR =  pd.DataFrame(list(zip(bats, tscore, of_boundaries, s2s3s, strike_rate)), 
               columns =['batsman', 'total_score', 'runs_offboundaries', 'runs_1s2s3s', 'strike_rate'])
    BTR["BTR"] = (BTR.runs_offboundaries - BTR.runs_1s2s3s)/BTR.total_score
    BTR.sort_values(by = ['BTR'], inplace=True, ascending=False)
    return BTR[['batsman', 'total_score', 'runs_offboundaries', 'runs_1s2s3s','BTR','strike_rate']]

################################################################################################################################
# BTR =  Boundaries - runs 1s2s 3s to Totatl Runs Ratio

### Overall, Head to Head performance 

In [68]:
KKR_RR_batsmen  = deliveries[(deliveries.batsman.isin(RR_batsmen)) | (deliveries.batsman.isin(KKR_batsmen))]
batsmen = KKR_RR_batsmen[((KKR_RR_batsmen.batsman_runs == 4) & (KKR_RR_batsmen.extra_runs != 4)) | 
                          ((KKR_RR_batsmen.batsman_runs == 6) & (KKR_RR_batsmen.extra_runs != 6))]

runs_123 = KKR_RR_batsmen[((KKR_RR_batsmen.batsman_runs ==3) & (KKR_RR_batsmen.extra_runs != 3)) | 
                           ((KKR_RR_batsmen.batsman_runs ==2) & (KKR_RR_batsmen.extra_runs != 2)) | 
                           ((KKR_RR_batsmen.batsman_runs ==1) & (KKR_RR_batsmen.extra_runs != 1))]

# overall 
KKR_RR_boundaries = runs_off_boundaries(KKR_RR_batsmen, batsmen, runs_123)

# matchwise
boundaries_runs_ratio = matchwise_runs_off_boundaries(KKR_RR_batsmen, batsmen, runs_123)

display_side_by_side([KKR_RR_boundaries[["batsman",'runs_offboundaries', 'runs_1s2s3s',"BTR", "strike_rate"]][KKR_RR_boundaries.batsman.isin(KKR_batsmen)], 
                      KKR_RR_boundaries[["batsman",'runs_offboundaries', 'runs_1s2s3s',"BTR", "strike_rate"]][KKR_RR_boundaries.batsman.isin(RR_batsmen)],
                      boundaries_runs_ratio[["batsman",'runs_offboundaries', 'runs_1s2s3s',"BTR", "strike_rate"]][boundaries_runs_ratio.batsman.isin(KKR_batsmen)],
                      boundaries_runs_ratio[["batsman",'runs_offboundaries', 'runs_1s2s3s',"BTR", "strike_rate"]][boundaries_runs_ratio.batsman.isin(RR_batsmen)]], 
                     ['Boundaries to Runs Ratio of KKR Batsmen (Overall)',
                      'Boundaries to Runs Ratio of RR Batsmen (Overall)',
                      'Boundaries to Runs Ratio of KKR batsmen(matchwise average)',
                     'Boundaries to Runs Ratio of RR batsmen(matchwise average)'])

################################################################################################################################
# Head to head 
KKR_RR_faceoff = deliveries[((deliveries.bowler.isin(KKR_bowlers)) & (deliveries.batsman.isin(RR_batsmen))) |
                       ((deliveries.bowler.isin(RR_bowlers)) & (deliveries.batsman.isin(KKR_batsmen)))]
faceoff_batsmen = KKR_RR_faceoff[((KKR_RR_faceoff.batsman_runs == 4) & (KKR_RR_faceoff.extra_runs != 4)) |
                      ((KKR_RR_faceoff.batsman_runs == 6) & (KKR_RR_faceoff.extra_runs != 6))]
runs_123_faceoff = KKR_RR_faceoff[((KKR_RR_faceoff.batsman_runs ==3) & (KKR_RR_faceoff.extra_runs != 3)) | 
                           ((KKR_RR_faceoff.batsman_runs ==2) & (KKR_RR_faceoff.extra_runs != 2)) | 
                           ((KKR_RR_faceoff.batsman_runs ==1) & (KKR_RR_faceoff.extra_runs != 1))]


# overall records head to head in IPL
KKR_RR_boundaries_faceoff = runs_off_boundaries(KKR_RR_faceoff, faceoff_batsmen, runs_123_faceoff)

# matchwise records head to head in IPL
boundaries_runs_ratio_faceoff = matchwise_runs_off_boundaries(KKR_RR_faceoff, faceoff_batsmen, runs_123_faceoff)

display_side_by_side([KKR_RR_boundaries_faceoff[["batsman",'runs_offboundaries', 'runs_1s2s3s', "BTR", "strike_rate"]][KKR_RR_boundaries_faceoff.batsman.isin(KKR_batsmen)], 
                      KKR_RR_boundaries_faceoff[["batsman",'runs_offboundaries', 'runs_1s2s3s', "BTR", "strike_rate"]][KKR_RR_boundaries_faceoff.batsman.isin(RR_batsmen)],
                      boundaries_runs_ratio_faceoff[["batsman",'runs_offboundaries', 'runs_1s2s3s',"BTR", "strike_rate"]][boundaries_runs_ratio_faceoff.batsman.isin(KKR_batsmen)],
                      boundaries_runs_ratio_faceoff[["batsman",'runs_offboundaries', 'runs_1s2s3s',"BTR", "strike_rate"]][boundaries_runs_ratio_faceoff.batsman.isin(RR_batsmen)]], 
                     ['Boundaries to Runs Ratio of KKR Batsmen against RR bowlers(Overall)',
                      'Boundaries to Runs Ratio of RR Batsmen against KKR bowlers(Overall)',
                      'Boundaries to Runs Ratio of KKR batsmen against RR bowlers(matchwise average)',
                     'Boundaries to Runs Ratio of RR batsmen against KKR bowlers(matchwise average)'])


Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
12,SP Narine,636,131,0.628892,166.943867
5,N Rana,696,384,0.282609,132.215569
9,RA Tripathi,454,301,0.198444,135.02627
1,EJG Morgan,492,362,0.152225,117.793103
6,PJ Cummins,44,33,0.142857,137.5
4,KD Karthik,2034,1606,0.116052,127.612457
11,S Gill,262,238,0.046422,134.635417

Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
3,JC Buttler,926,456,0.328442,150.0
10,RV Uthappa,2676,1718,0.215475,127.319588
7,R Parag,98,62,0.213018,127.067669
2,J Archer,48,35,0.152941,137.096774
8,R Tewatia,62,49,0.109244,127.956989
14,SV Samson,1214,996,0.097539,129.340278
0,BA Stokes,322,314,0.012365,131.237323
13,SPD Smith,1018,1013,0.002443,126.670792

Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
5,PJ Cummins,18.444444,7.444444,1.285714,125.565731
8,SP Narine,17.222222,11.1,0.411706,131.297658
10,EJG Morgan,18.533333,13.133333,0.284543,102.772298
4,KD Karthik,13.54321,10.129032,0.149972,115.18627
2,N Rana,12.0,9.714286,0.084886,119.394282
9,RA Tripathi,10.969697,10.129032,0.035982,119.436914
12,S Gill,2.347826,9.8,-0.331528,129.469128

Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
7,RV Uthappa,15.647059,10.169697,0.209436,115.017641
11,R Tewatia,8.166667,6.5,0.168067,122.395961
6,SV Samson,14.568182,10.308642,0.167713,111.098474
1,JC Buttler,13.155556,9.906977,0.102157,136.039782
3,SPD Smith,12.416667,10.522388,0.066628,124.470364
0,BA Stokes,12.125,11.068966,0.05223,116.027981
14,R Parag,0.0,3.0,-0.088757,130.035119
13,J Archer,3.272727,7.555556,-0.554248,91.485837


Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
1,EJG Morgan,24.0,7.0,inf,0.0
8,RA Tripathi,22.0,8.0,0.823529,141.666667
6,PJ Cummins,14.0,8.0,0.666667,180.0
11,SP Narine,38.0,30.0,0.363636,104.761905
10,S Gill,14.0,5.0,0.236842,140.740741
5,N Rana,6.0,3.0,0.06,86.206897
4,KD Karthik,26.0,23.0,0.023077,158.536585

Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
3,JC Buttler,94.0,33.0,1.452381,113.513514
2,J Archer,26.0,14.0,0.375,228.571429
9,RV Uthappa,18.0,13.0,0.166667,125.0
7,R Parag,10.0,7.0,0.136364,169.230769
12,SPD Smith,20.0,31.0,-0.148649,134.545455
0,BA Stokes,8.0,20.0,-0.387097,88.571429
13,SV Samson,,,,98.076923

Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
8,EJG Morgan,4.0,1.0,inf,0.0
0,N Rana,7.333333,3.625,0.6675,82.773369
9,RA Tripathi,6.0,4.0,0.352941,175.0
6,KD Karthik,6.444444,2.375,0.281731,152.88494
7,PJ Cummins,4.0,4.0,0.0,180.0
10,S Gill,2.8,3.4,-0.078947,112.0
11,SP Narine,2.8,3.333333,-0.121212,90.857143

Unnamed: 0,batsman,runs_offboundaries,runs_1s2s3s,BTR,strike_rate
2,BA Stokes,5.0,3.0,0.387097,89.814815
1,JC Buttler,6.4,3.2,0.380952,117.0
3,RV Uthappa,4.8,3.2,0.266667,123.68254
5,SPD Smith,4.0,4.25,-0.02027,106.858974
4,SV Samson,3.555556,4.875,-0.232843,88.703704
12,J Archer,0.0,4.0,-0.25,218.181818
13,R Parag,0.0,,,169.230769


## Plots for above results

In [171]:
import numpy as np
import cufflinks as cf

def cufflinks_plot(plot_data, label):
    cf.set_config_file(theme='pearl',sharing='public',offline=True)
    plot_data.iplot(kind="bar",x = "batsman",
                            subplots=True,
                          sortbars=True,
                          keys = ["runs_offboundaries", "runs_1s2s3s", "BTR", "strike_rate"],
                          title= label,
                          theme="henanigans")

cufflinks_plot(KKR_RR_boundaries, "(runs off boundaries - runs 1s 2s 3s)/total runs scored [Overall in IPL]")
cufflinks_plot(boundaries_runs_ratio, "(runs off boundaries - runs 1s 2s 3s)/total runs scored [matchwise average]")
cufflinks_plot(KKR_RR_boundaries_faceoff, "(runs off boundaries - runs 1s 2s 3s)/total runs scored [Head to Head for RR and KKR]")
cufflinks_plot(boundaries_runs_ratio_faceoff, "(runs off boundaries - runs 1s 2s 3s)/total runs scored [Head to Head for RR and KKR matchwise average]")

## Answer: Ben Stokes or N Rana

![q4.png](attachment:q4.png)

## J Archer Death overs stats (as he is a death over bowler)

## Functions to analyse death over performance of J archer

In [136]:
# plot
from bqplot import Pie
from collections import Counter
from bqplot import DateScale, LinearScale, Lines, Axis, Figure
from bqplot import Tooltip


def J_Archer(jarcher_data):
    archer_ER = jarcher_data.groupby(['match_id'])['batsman_runs'].count().reset_index(name = "balls_bowled")
    archer_runs = jarcher_data.groupby(['match_id'])['batsman_runs'].sum().reset_index(name = "runs_conceeded")
    a_boundaries = jarcher_data[((jarcher_data.batsman_runs == 4) & (jarcher_data.extra_runs != 4)) | ((jarcher_data.batsman_runs == 6) & (jarcher_data.extra_runs != 6))]
    
    archer_boundaries = a_boundaries.groupby(['match_id'])['batsman_runs'].count().reset_index(name = "number_of_boundaries")
    archer_ER["runs_conceeded"] = archer_runs.runs_conceeded
    archer_ER["economy_rate"] = (archer_ER.runs_conceeded/archer_ER.balls_bowled)*6
    archer_ER["no_of_boundaries"] = archer_boundaries.number_of_boundaries
    archer_ER.no_of_boundaries.fillna(0, inplace = True)
    
    return archer_ER

def histogram(data, key, label):
    data.iplot(kind="hist",
                    bins=50, colors=["red"],
                    keys=[key],
                    dimensions=(600, 400),
                    title=label)
    
    


def pie_plot(pie_data,pie_labels, title):
    pie = Pie(sizes=list(pie_data), labels=list(pie_labels),
              stroke="black",
              opacities = [0.8, 0.8],
              radius=150, inner_radius=0,
              sort=True,
              display_values=True, values_format='0.0f',
              label_color="white", font_size="10px", font_weight="bolder"
              )
    fig = Figure(marks=[pie],
             legend_location = "top",
             title=title,
             fig_margin= dict(top=60, bottom=40, left=50, right=20),
             background_style = {"fill":"black"}
                )
    fig.layout.width="500px"
    fig.layout.height="500px"
    pie.tooltip = Tooltip(fields=['sizes'], labels=["Count"], formats=["0.1f"])
    return fig
    


## Performance of J Archer in deaths(overall in IPL)

In [137]:
archer = deliveries[(deliveries.bowler == 'J Archer') & (deliveries.over >=16)]
archer_ER_data = J_Archer(archer)
archer_options = [(archer_ER_data[(archer_ER_data.no_of_boundaries <=4) & (archer_ER_data.economy_rate <=7.5)].shape[0]/archer_ER_data.shape[0])*100,
 (archer_ER_data[(archer_ER_data.no_of_boundaries >4) & (archer_ER_data.economy_rate <=7.5)].shape[0]/archer_ER_data.shape[0])*100,
 (archer_ER_data[(archer_ER_data.no_of_boundaries <=4) & (archer_ER_data.economy_rate >7.5)].shape[0]/archer_ER_data.shape[0])*100,
 (archer_ER_data[(archer_ER_data.no_of_boundaries >4) & (archer_ER_data.economy_rate >7.5)].shape[0]/archer_ER_data.shape[0])*100]

archer_labels = ["[ER <= 7.5, boundaries <= 4]", "[ER <= 7.5, boundaries > 4]" , "[ER > 7.5, boundaries <= 4]", "[ER > 7.5, boundaries > 4]"]
histogram(archer_ER_data, "economy_rate", "Economy Rate of J Archer Matchwise in Death Overs") 
histogram(archer_ER_data, "no_of_boundaries", "no_of_boundaries given by J Archer Matchwise in Death Overs")
fig1  =pie_plot(archer_options ,archer_labels, "J Archer Economy and Boundaries events in deaths")
#fig1


![pie1.png](attachment:pie1.png)

## Against KKR batsmen

In [143]:
archer_KKR = archer[archer.batsman.isin(KKR_batsmen)]
archer_ER_against_KKR = J_Archer(archer_KKR)
archer_ER_against_KKR # both record are against KD Karthik

Unnamed: 0,match_id,balls_bowled,runs_conceeded,economy_rate,no_of_boundaries
0,7942,4,11,16.5,2
1,11334,4,14,21.0,2


## Conclusion: event will be : ER > 7.5, boundaries <= 4

![q5.png](attachment:q5.png)

## function to calculate economy rates of RR and KKr bowlers

In [154]:
def economy_rates(bowlers_er):
    for i in bowlers_er.bowler.unique():
        bowl.append(i)
        balls.append(round(bowlers_er[bowlers_er.bowler == i].balls_bowled.mean(),1))
        runs.append(round(bowlers_er[bowlers_er.bowler == i].runs_conceeded.mean(),1))
    
    ER_data = pd.DataFrame(list(zip(bowl, balls, runs)), 
               columns =['bowler','balls_bowled','runs_conceeded'])
    ER_data["economy_rate"] = round(((ER_data.runs_conceeded/ER_data.balls_bowled)*6),1)
    return ER_data

## Overall

In [155]:
bowlers_KKR_RR = deliveries[(deliveries.bowler.isin(KKR_bowlers)) | (deliveries.bowler.isin(RR_bowlers))]
bowlers_ER = bowlers_KKR_RR.groupby(['match_id', 'bowler'])['batsman_runs'].count().reset_index(name ="balls_bowled")
bowlers_runs = bowlers_KKR_RR.groupby(['match_id', 'bowler'])['batsman_runs'].sum().reset_index(name ="runs_conceeded")
bowlers_ER["runs_conceeded"] = bowlers_runs.runs_conceeded
bowlers_ER["economy_rate"] = round(((bowlers_ER.runs_conceeded/bowlers_ER.balls_bowled)*6),1)
bowl=[];balls=[];runs=[]
ER_data_overall = economy_rates(bowlers_ER)
ER_data_overall

Unnamed: 0,bowler,balls_bowled,runs_conceeded,economy_rate
0,BA Stokes,20.5,27.1,7.9
1,SP Narine,23.9,25.9,6.5
2,PJ Cummins,23.7,29.8,7.5
3,AS Rajpoot,19.7,26.7,8.1
4,LH Ferguson,21.8,21.0,5.8
5,R Tewatia,16.7,19.6,7.0
6,S Tyagi,15.7,20.3,7.8
7,S Gopal,19.8,24.7,7.5
8,J Archer,24.5,30.4,7.4
9,P Krishna,24.3,36.4,9.0


In [162]:
print("Number of bowlers with Economy rate < 8 :", ER_data_overall[ER_data_overall.balls_bowled >6][ER_data_overall.economy_rate<8].shape[0])

Number of bowlers with Economy rate > 8 : 8


## Head to Head

In [157]:
bowlers_ER_faceoff = KKR_RR_faceoff.groupby(['match_id', 'bowler'])['batsman_runs'].count().reset_index(name ="balls_bowled")
bowlers_runs_faceoff = KKR_RR_faceoff.groupby(['match_id', 'bowler'])['batsman_runs'].sum().reset_index(name ="runs_conceeded")
bowlers_ER_faceoff["runs_conceeded"] = bowlers_runs_faceoff.runs_conceeded
bowlers_ER_faceoff["economy_rate"] = round(((bowlers_ER_faceoff.runs_conceeded/bowlers_ER_faceoff.balls_bowled)*6),1)
bowl=[];balls=[];runs=[]
ER_data_overall_faceoff = economy_rates(bowlers_ER_faceoff)
ER_data_overall_faceoff

Unnamed: 0,bowler,balls_bowled,runs_conceeded,economy_rate
0,BA Stokes,6.1,6.8,6.7
1,AS Rajpoot,6.2,10.8,10.5
2,SP Narine,7.8,8.4,6.5
3,PJ Cummins,5.5,6.5,7.1
4,R Tewatia,3.8,4.0,6.3
5,S Gopal,12.8,16.6,7.8
6,J Archer,10.8,13.8,7.7
7,P Krishna,17.7,28.7,9.7
8,V Chakravarthy,7.0,10.0,8.6


In [164]:
print("Number of bowlers with economy rate <8 :", ER_data_overall_faceoff[ER_data_overall_faceoff.balls_bowled >6][ER_data_overall_faceoff.economy_rate<8].shape[0])

Number of bowlers with economy rate <8 : 4


## Conclusion from above results number of Bowlers with ER less than 8 will be 4-8.<br> Henc, option 6 or more would be right answer by taking centrality measure