# Problem Statement:
  Using the latest data, find out the best **Pinch Hitters** inside **Powerplay** in the IPL so far (min of 20 ings)

# Keywords: Pinch hitter, Powerplay.

  * Pinch Hitter = High SR, Low balls_per_boundary, Decent Runs, Decent Balls Faced
  * Powerplay = 1-6 overs






In [None]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
#to display all rows columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 0)

In [None]:
df = pd.read_csv('/content/All_matches.csv')

In [None]:
df.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,total_runs,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed
0,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.1,Kolkata Knight Riders,Royal Challengers Bangalore,SC Ganguly,BB McCullum,P Kumar,0,1,,,,1.0,2,,,,,
1,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.2,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,0,,,,,0,,,,,
2,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.3,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,1,1.0,,,,2,,,,,
3,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.4,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,0,,,,,0,,,,,
4,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.5,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,0,,,,,0,,,,,


In [None]:
df.columns

Index(['match_id', 'season', 'start_date', 'venue', 'innings', 'ball',
       'batting_team', 'bowling_team', 'striker', 'non_striker', 'bowler',
       'runs_off_bat', 'extras', 'wides', 'noballs', 'byes', 'legbyes',
       'total_runs', 'penalty', 'wicket_type', 'player_dismissed',
       'other_wicket_type', 'other_player_dismissed'],
      dtype='object')

In [None]:
# Rename  'striker' to 'batsman'
df.rename(columns = {'striker':'batsman'},inplace=True)

# Rename 'runs_off_bat' column to 'batsman_runs'
df.rename(columns={'runs_off_bat': 'batsman_runs'}, inplace=True)

In [None]:
def balls_per_dismissal(Balls, Dismissals):
    if Dismissals > 0:
        return Balls/Dismissals
    else:
        return Balls/1

def balls_per_boundary(Balls, Boundaries):
    if Boundaries > 0:
        return Balls/Boundaries
    else:
        return Balls/1

In [None]:
def phase(ball):
    if ball <= 5.6:
        return 'Powerplay'
    elif ball <= 14.6:
        return 'Middle'
    else:
        return 'Death'

In [None]:
df['phase'] = df['ball'].apply(lambda x: phase(x))

In [None]:
df.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,batsman,non_striker,bowler,batsman_runs,extras,wides,noballs,byes,legbyes,total_runs,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed,phase
0,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.1,Kolkata Knight Riders,Royal Challengers Bangalore,SC Ganguly,BB McCullum,P Kumar,0,1,,,,1.0,2,,,,,,Powerplay
1,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.2,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,0,,,,,0,,,,,,Powerplay
2,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.3,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,1,1.0,,,,2,,,,,,Powerplay
3,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.4,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,0,,,,,0,,,,,,Powerplay
4,335982,2007/08,18-04-2008,M Chinnaswamy Stadium,1,0.5,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,SC Ganguly,P Kumar,0,0,,,,,0,,,,,,Powerplay


In [None]:
def phasesOfplay(df, current_phase):

    df = df[df.phase == current_phase]
    df.reset_index(inplace=True, drop=True)


    df['isDot'] = df['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['batsman_runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['batsman_runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['batsman_runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['batsman_runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['batsman_runs'].apply(lambda x: 1 if x == 6 else 0)

    runs = pd.DataFrame(df.groupby(['batsman', 'match_id'])['batsman_runs'].sum().reset_index()).groupby(['batsman'])['batsman_runs'].sum().reset_index().rename(columns={'batsman_runs':'runs'})
    innings = pd.DataFrame(df.groupby(['batsman'])['match_id'].apply(lambda x: len(list(np.unique(x)))).reset_index()).rename(columns = {'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['batsman'])['match_id'].count()).reset_index().rename(columns = {'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['batsman'])['player_dismissed'].count()).reset_index().rename(columns = {'player_dismissed':'dismissals'})

    dots = pd.DataFrame(df.groupby(['batsman'])['isDot'].sum()).reset_index().rename(columns = {'isDot':'dots'})
    ones = pd.DataFrame(df.groupby(['batsman'])['isOne'].sum()).reset_index().rename(columns = {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['batsman'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['batsman'])['isThree'].sum()).reset_index().rename(columns = {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['batsman'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['batsman'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})

    df = pd.merge(innings, runs, on = 'batsman').merge(balls, on = 'batsman').merge(dismissals, on = 'batsman').merge(dots, on = 'batsman').merge(ones, on = 'batsman').merge(twos, on = 'batsman').merge(threes, on = 'batsman').merge(fours, on = 'batsman').merge(sixes, on = 'batsman')

    #StrikeRate
    df['SR'] = df.apply(lambda x: 100*(x['runs']/x['balls']), axis = 1)

    #runs per innings
    df['RPI'] = df.apply(lambda x: x['runs']/x['innings'], axis = 1)

    #balls per dismissals
    df['BPD'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)

    #balls per boundary
    df['BPB'] = df.apply(lambda x: balls_per_boundary(x['balls'], (x['fours'] + x['sixes'])), axis = 1)

    return df

In [None]:
pp_df = phasesOfplay(df, 'Powerplay')
mid_df = phasesOfplay(df, 'Middle')
dth_df = phasesOfplay(df, 'Death')

In [None]:
pp_df.head()

Unnamed: 0,batsman,innings,runs,balls,dismissals,dots,ones,twos,threes,fours,sixes,SR,RPI,BPD,BPB
0,A Ashish Reddy,1,5,6,0,4,1,0,0,1,0,83.333333,5.0,6.0,6.0
1,A Badoni,7,18,38,3,27,8,1,0,2,0,47.368421,2.571429,12.666667,19.0
2,A Chopra,5,23,38,1,24,11,0,0,3,0,60.526316,4.6,38.0,12.666667
3,A Flintoff,1,14,14,0,10,1,0,1,1,1,100.0,14.0,14.0,7.0
4,A Manohar,2,12,18,1,12,4,0,0,2,0,66.666667,6.0,18.0,9.0


## Pair wise comparision matrices using Saaty scale & then arriving at weights for each parameter

In [None]:
wt_sr, wt_rpi, wt_bpd, wt_bpb = 0.38, 0.25, 0.12, 0.26

In [None]:
# filter by min 5 innings
pp_df = pp_df[pp_df.innings >= 20]
pp_df.reset_index(inplace = True, drop = True)

## Calculation - Normalizing all values to same dimensions

In [None]:
pp_df['calc_SR'] = pp_df['SR'].apply(lambda x: x*x)
pp_df['calc_RPI'] = pp_df['RPI'].apply(lambda x: x*x)
pp_df['calc_BPD'] = pp_df['BPD'].apply(lambda x: x*x)
pp_df['calc_BPB'] = pp_df['BPB'].apply(lambda x: x*x)

sq_sr, sq_rpi, sq_bpd, sq_bpb = np.sqrt(pp_df[['calc_SR','calc_RPI', 'calc_BPD', 'calc_BPB']].sum(axis = 0))

pp_df['calc_SR'] = pp_df['calc_SR'].apply(lambda x: x/sq_sr)
pp_df['calc_RPI'] = pp_df['calc_RPI'].apply(lambda x: x/sq_rpi)
pp_df['calc_BPD'] = pp_df['calc_BPD'].apply(lambda x: x/sq_bpd)
pp_df['calc_BPB'] = pp_df['calc_BPB'].apply(lambda x: x/sq_bpb)

pp_df['calc_SR'] = pp_df['calc_SR'].apply(lambda x: x*wt_sr)
pp_df['calc_RPI'] = pp_df['calc_RPI'].apply(lambda x: x*wt_rpi)
pp_df['calc_BPD'] = pp_df['calc_BPD'].apply(lambda x: x*wt_bpd)
pp_df['calc_BPB'] = pp_df['calc_BPB'].apply(lambda x: x*wt_bpb)

best_sr, worst_sr = max(pp_df['calc_SR']), min(pp_df['calc_SR'])
best_rpi, worst_rpi = max(pp_df['calc_RPI']), min(pp_df['calc_RPI'])
best_bpd, worst_bpd = max(pp_df['calc_BPD']), min(pp_df['calc_BPD'])
best_bpb, worst_bpb = min(pp_df['calc_BPB']), max(pp_df['calc_BPB'])

## Calculation - Comparision against the best & worst values

In [None]:
pp_df['dev_best_SR'] = pp_df['calc_SR'].apply(lambda x: (x-best_sr)*(x-best_sr))
pp_df['dev_best_RPI'] = pp_df['calc_RPI'].apply(lambda x: (x-best_rpi)*(x-best_rpi))
pp_df['dev_best_BPD'] = pp_df['calc_BPD'].apply(lambda x: (x-best_bpd)*(x-best_bpd))
pp_df['dev_best_BPB'] = pp_df['calc_BPB'].apply(lambda x: (x-best_bpb)*(x-best_bpb))

pp_df['dev_best_sqrt'] = pp_df.apply(lambda x: x['dev_best_SR'] + x['dev_best_RPI'] + x['dev_best_BPD'] + x['dev_best_BPB'], axis = 1)

pp_df['dev_worst_SR'] = pp_df['calc_SR'].apply(lambda x: (x-worst_sr)*(x-worst_sr))
pp_df['dev_worst_RPI'] = pp_df['calc_RPI'].apply(lambda x: (x-worst_rpi)*(x-worst_rpi))
pp_df['dev_worst_BPD'] = pp_df['calc_BPD'].apply(lambda x: (x-worst_bpd)*(x-worst_bpd))
pp_df['dev_worst_BPB'] = pp_df['calc_BPB'].apply(lambda x: (x-worst_bpb)*(x-worst_bpb))

pp_df['dev_worst_sqrt'] = pp_df.apply(lambda x: x['dev_worst_SR'] + x['dev_worst_RPI'] + x['dev_worst_BPD'] + x['dev_worst_BPB'], axis = 1)

In [None]:
pp_df['score'] = pp_df.apply(lambda x: x['dev_worst_sqrt']/x['dev_best_sqrt'], axis = 1)

In [None]:
pp_df[['batsman', 'score']].head()

Unnamed: 0,batsman,score
0,AB de Villiers,0.105725
1,AC Gilchrist,0.220523
2,AJ Finch,0.056044
3,AK Markram,0.007108
4,AM Rahane,0.08669


# End Result: The best pinch hitters inside Power Play (PP) in the IPL so far (min of 20 ings).

In [None]:
pp_df[[ 'batsman','innings', 'runs', 'balls', 'dismissals', 'fours', 'sixes', 'SR', 'BPB', 'score']].sort_values(['score'], ascending = False).reset_index(drop = True).head(25)

Unnamed: 0,batsman,innings,runs,balls,dismissals,fours,sixes,SR,BPB,score
0,TM Head,20,411,209,8,53,22,196.650718,2.786667,19.107634
1,SP Narine,57,945,573,41,119,55,164.921466,3.293103,2.358584
2,Abhishek Sharma,40,757,500,22,90,35,151.4,4.0,0.988767
3,YBK Jaiswal,52,1009,676,25,149,36,149.260355,3.654054,0.873706
4,PP Shaw,79,1345,945,51,193,37,142.328042,4.108696,0.541475
5,JM Bairstow,47,854,603,22,112,33,141.625207,4.158621,0.531104
6,CA Lynn,40,777,551,17,99,37,141.016334,4.051471,0.522077
7,E Lewis,22,419,297,13,49,21,141.077441,4.242857,0.508319
8,P Simran Singh,31,493,349,20,63,22,141.260745,4.105882,0.502743
9,YK Pathan,31,351,251,12,45,16,139.840637,4.114754,0.456649


In [None]:
import plotly.graph_objects as go

# Extracting top pinch hitters dataframe
top_pinch_hitters = pp_df[['batsman', 'score']].sort_values('score', ascending=False).head(5)

# Create diverging colorscale
colorscale = [[0, 'blue'], [0.5, 'lightblue'], [0.5, 'red'], [1, 'green']]

# Create bar plot
fig = go.Figure(data=[go.Bar(
    x=top_pinch_hitters['batsman'],
    y=top_pinch_hitters['score'],
    marker=dict(color=top_pinch_hitters['score'], coloraxis="coloraxis"),
    text=top_pinch_hitters['score'],
    textposition='inside',)])

# Update layout
fig.update_layout(
    title=dict(text='Top Pinch Hitters Inside Power Play in IPL', font=dict(color='orange')),
    xaxis_title=dict(text='Batsman', font=dict(color='orange', size=14)),
    yaxis_title=dict(text='Score', font=dict(color='orange', size=14)),
    xaxis=dict(tickangle=45, tickfont=dict(color='red', size=12)),
    yaxis=dict(tickformat=".2f", tickfont=dict(color='orange', size=12)),
    coloraxis=dict(colorscale=colorscale),
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='black', size=10))

# Show the plot
fig.show()


# The top three pinch hitters in IPL Power Play (min. 20 innings) displayed remarkable batting powers:



1.   Travis Head: Travis Head, the Australian left-hander, has been setting the IPL stage on fire with his explosive batting with a striking 196.65 Strike Rate, scoring an immpressive 411 runs.
1.   SP Narine:
2.   Abhishek Sharma:
