# Bowling Strategy based on Venues

In [1]:
import math
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

import matplotlib.pyplot as plt

In [2]:
deliveries = pd.read_csv('deliveries_updated_mens_ipl.csv')
matches = pd.read_csv('matches_updated_mens_ipl.csv')

In [3]:
df = deliveries.copy()
mdf = matches.copy()

In [4]:
def is_bowler_wicket(player_dismissed, dismissal_kind):
    if type(player_dismissed) == str:
        if dismissal_kind not in ['run out', 'retired hurt', 'obstructing the field']:
             return 1
        else:
            return 0
    else:
        return 0

In [5]:
df['isBowlerWk'] = df.apply(lambda x: is_bowler_wicket(x['player_dismissed'], x['dismissal_kind']), axis = 1)

In [6]:
df['total_runs'] = df['batsman_runs'] + df['extras']

In [7]:
cdf = df.merge(mdf, on ='matchId', how = 'left')

In [8]:
def phase(over):
    if over < 6:
        return 'Powerplay Overs'
    elif over < 16:
        return 'Middle Overs'
    else:
        return 'Death Overs'

In [9]:
cdf['Phase'] = cdf['over'].apply(lambda x : phase(x))

In [10]:
cdf1 = cdf[(cdf.venue == 'M.Chinnaswamy Stadium') | (cdf.venue == 'M Chinnaswamy Stadium')]                                                     

In [11]:
cdf1['venue'] = cdf1['venue'].replace(['M.Chinnaswamy Stadium',
       'M Chinnaswamy Stadium'], 'Chinnaswamy Bangalore')

In [12]:
def SR(Balls,Dismissals):
    if Dismissals > 0:
        return Balls/Dismissals
    else:
        return Balls/1
    
def Eco(Runs_Conceded,Balls):
    if Balls > 0:
        return Runs_Conceded/Balls
    else:
        return 1000
    
def Avg(Runs_Conceded,Dismissals):
    if Dismissals > 0:
        return Runs_Conceded/Dismissals
    elif(Runs_Conceded == 0):
        return 1000
    else:
        return Runs_Conceded/1

In [13]:
def BowlerStats(df,current_Phase,current_venue): 
    
    #This function is used to differentiate performance based on innings wise
    
    df = df[df.Phase == current_Phase]
    df = df[df.venue == current_venue]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['total_runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['total_runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['total_runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['total_runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['total_runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['total_runs'].apply(lambda x: 1 if x == 6 else 0)

    runs = pd.DataFrame(df.groupby(['bowler'])['total_runs'].sum()).reset_index().rename(columns = {'total_runs':'Runs'})
    balls = pd.DataFrame(df.groupby(['bowler'])['matchId'].count()).reset_index().rename(columns = {'matchId' : 'Balls'})
    innings = pd.DataFrame(df.groupby(['bowler'])['matchId'].apply(lambda x : len(list(np.unique(x))))).reset_index().rename(columns = {'matchId' : 'Innings'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['isBowlerWk'].sum()).reset_index().rename(columns = {'isBowlerWk' : 'Dismissals'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour' : 'Fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix' : 'Sixes'})
    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot' : 'Dots'})
    ones = pd.DataFrame(df.groupby(['bowler'])['isOne'].sum()).reset_index().rename(columns = {'isOne' : 'Ones'})
    twos = pd.DataFrame(df.groupby(['bowler'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo' : 'Twos'})
    threes = pd.DataFrame(df.groupby(['bowler'])['isThree'].sum()).reset_index().rename(columns = {'isThree' : 'Threes'})
    
    df = pd.merge(innings,runs, on ='bowler').merge(balls,on='bowler').merge(dismissals,on='bowler').merge(fours,on='bowler').merge(sixes,on='bowler').merge(dots,on='bowler').merge(ones,on='bowler').merge(twos,on='bowler').merge(threes,on='bowler')
    
    df['Dot%'] = df.apply(lambda x : 100*(x['Dots']/x['Balls']), axis = 1)
    df['SR'] = df.apply(lambda x: SR(x['Balls'],x['Dismissals']), axis = 1)
    df['Eco'] = df.apply(lambda x: 6*Eco(x['Runs'],x['Balls']), axis = 1)
    df['Avg'] = df.apply(lambda x: Avg(x['Runs'],x['Dismissals']), axis = 1)
    
    return df

In [14]:
df5 = BowlerStats(cdf1,'Powerplay Overs','Chinnaswamy Bangalore')
df6 = BowlerStats(cdf1,'Middle Overs','Chinnaswamy Bangalore')
df7 = BowlerStats(cdf1,'Death Overs','Chinnaswamy Bangalore')

In [15]:
df5.head()

Unnamed: 0,bowler,Innings,Runs,Balls,Dismissals,Fours,Sixes,Dots,Ones,Twos,Threes,Dot%,SR,Eco,Avg
0,A Chandila,1,4,6,0,0,0,3,2,1,0,50.0,6.0,4.0,4.0
1,A Choudhary,2,20,18,1,2,1,10,4,1,0,55.555556,18.0,6.666667,20.0
2,A Kumble,3,16,18,0,2,0,9,6,1,0,50.0,18.0,5.333333,16.0
3,A Mithun,3,30,25,0,4,0,7,14,0,0,28.0,25.0,7.2,30.0
4,A Nehra,6,98,85,4,9,5,44,22,5,0,51.764706,21.25,6.917647,24.5


In [16]:
df6.head()

Unnamed: 0,bowler,Innings,Runs,Balls,Dismissals,Fours,Sixes,Dots,Ones,Twos,Threes,Dot%,SR,Eco,Avg
0,A Ashish Reddy,4,73,42,1,5,5,14,14,3,1,33.333333,42.0,10.428571,73.0
1,A Chandila,1,11,12,0,0,1,6,5,0,0,50.0,12.0,5.5,11.0
2,A Choudhary,2,26,25,1,2,1,12,8,2,0,48.0,25.0,6.24,26.0
3,A Kumble,12,306,246,6,26,9,78,121,11,0,31.707317,41.0,7.463415,51.0
4,A Mishra,5,159,104,4,13,7,26,51,7,0,25.0,26.0,9.173077,39.75


In [17]:
df7.head()

Unnamed: 0,bowler,Innings,Runs,Balls,Dismissals,Fours,Sixes,Dots,Ones,Twos,Threes,Dot%,SR,Eco,Avg
0,A Ashish Reddy,1,13,6,1,1,1,1,3,0,0,16.666667,6.0,13.0,13.0
1,A Chandila,1,6,6,0,0,0,1,4,1,0,16.666667,6.0,6.0,6.0
2,A Choudhary,1,11,12,1,1,0,6,3,2,0,50.0,12.0,5.5,11.0
3,A Kumble,5,41,27,3,1,4,9,13,0,0,33.333333,9.0,9.111111,13.666667
4,A Mithun,5,75,44,2,6,5,14,17,2,0,31.818182,22.0,10.227273,37.5


In [18]:
np.mean(df5['Eco']),np.mean(df6['Eco']),np.mean(df7['Eco'])

(8.140861917724017, 8.441590025867663, 11.24405110463114)

In [19]:
np.mean(df5['SR']),np.mean(df6['SR']),np.mean(df7['SR'])

(18.600346320346322, 21.326092853687197, 10.919135802469135)

In [20]:
np.mean(df5['Avg']),np.mean(df6['Avg']),np.mean(df7['Avg'])

(24.310317460317457, 30.26602117745986, 20.381035141329264)

In [21]:
np.mean(df5['Dot%']),np.mean(df6['Dot%']),np.mean(df7['Dot%'])

(42.12999081627968, 29.182536712704596, 26.105667671208717)

## Normalization

In [42]:
df5 = df5[df5.Balls >= 24]

In [43]:
max_val = max(df5['Dot%'])
min_val = min(df5['Dot%'])
max_val1 = max(df5['SR'])
min_val1 = min(df5['SR'])
max_val2 = max(df5['Eco'])
min_val2 = min(df5['Eco'])
max_val3 = max(df5['Avg'])
min_val3 = min(df5['Avg'])

df5['Dot% Norm'] = df5['Dot%'].apply(lambda x : (x-min_val)/(max_val - min_val))
df5['SR Norm'] = df5['SR'].apply(lambda x : (x-min_val1)/(max_val1 - min_val1))
df5['Eco Norm'] = df5['Eco'].apply(lambda x : (x-min_val2)/(max_val2 - min_val2))
df5['Avg Norm'] = df5['Avg'].apply(lambda x : (x-min_val3)/(max_val3 - min_val3))

In [45]:
df5.head(3)

Unnamed: 0,bowler,Innings,Runs,Balls,Dismissals,Fours,Sixes,Dots,Ones,Twos,Threes,Dot%,SR,Eco,Avg,Eco Norm,Dot% Norm,SR Norm,Avg Norm
3,A Mithun,3,30,25,0,4,0,7,14,0,0,28.0,25.0,7.2,30.0,0.431718,0.0,0.200351,0.148073
4,A Nehra,6,98,85,4,9,5,44,22,5,0,51.764706,21.25,6.917647,24.5,0.387147,0.578955,0.154218,0.109026
8,AB Dinda,8,113,109,5,9,2,44,44,9,1,40.366972,21.8,6.220183,22.6,0.277048,0.301284,0.160984,0.095538


In [47]:
df5['Score'] = df5.apply(lambda x : x['Dot% Norm']*0.13 + x['SR Norm']*0.35 + x['Eco Norm']*0.16 + x['Avg Norm']*0.35, axis = 1)

# STANDARDIZATION

### Mean_value = np.mean(df5['Eco'])
### Standard_Deviation = np.std(df5['Eco'])

## df5['Z_Eco'] = df5['Eco'].apply(lambda x : (x - Mean_value)/Standard_Deviation, axis = 1)

## For bowling strategy, use the similar kind of dividing phases as in batting analysis and get the df. Then based on the phases, get the data required. Using those data, perform techniques like normalization, standardization and topsis method to analyse further and deeper.