In [1]:
import math
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

import matplotlib.pyplot as plt

In [2]:
deliveries = pd.read_csv('T20I-csv-files.csv')

In [3]:
df = deliveries.copy()

## Some values in the columns contains the name of the columns. So the below process is to remove them or make them 0

In [4]:
df1 = df.drop(df[(df['runs_off_bat'] == 'NaN') | (df['runs_off_bat'] == 'runs_off_bat')].index, inplace=True)

df['runs_off_bat'] = df['runs_off_bat'].fillna(0)
df['extras'] = df['extras'].fillna(0)

## Converting a datatype to other

In [5]:
df = df.astype({"runs_off_bat":"int64","extras":"int64"})

In [6]:
df['Total_Runs'] = df['runs_off_bat'] + df['extras']

In [7]:
df['overs'] = df['ball']

df['ball'] = df['ball'].astype(str)

df['over'] = df['ball'].str[0:-1]

df['over'] = df['over'].str.replace('.','')

df['ball'] = df['ball'].str[-1:]

## Dropping the na values in the over column and converting it to int type

In [8]:
df2 = df.drop(df[(df['over'] == 'na') | (df['over'] == 'na')].index, inplace=True)

In [9]:
df = df.astype({"over":"int64"})

# Parameters

In [10]:
def BPD(balls, dismissals):
    if dismissals > 0:
        return balls/dismissals
    else:
        return balls/1 
    
def BPB(balls, boundaries):
    if boundaries > 0:
        return balls/boundaries
    else:
        return balls/1 

def phase(over):
    if over <= 5:
        return 'Powerplay'
    elif over <= 14:
        return 'Middle'
    else:
        return 'Death'

In [11]:
df['Phase'] = df['over'].apply(lambda x: phase(x))

## By Venue, By Phase and by Opposition

In [12]:
def ByCustom(df,current_venue, current_phase, current_opposition):

    df = df[df.venue == current_venue]
    df = df[df.Phase == current_phase]
    df = df[df.bowling_team == current_opposition]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['runs_off_bat'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['runs_off_bat'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['runs_off_bat'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['runs_off_bat'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['runs_off_bat'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['runs_off_bat'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['striker'])['runs_off_bat'].sum()).reset_index().rename(columns = {'runs_off_bat':'Runs'})
    balls = pd.DataFrame(df.groupby(['striker'])['match_id'].count()).reset_index().rename(columns = {'match_id' : 'Balls'})
    innings = pd.DataFrame(df.groupby(['striker'])['match_id'].apply(lambda x : len(list(np.unique(x))))).reset_index().rename(columns = {'match_id' : 'Innings'})
    dismissals = pd.DataFrame(df.groupby(['striker'])['player_dismissed'].count()).reset_index().rename(columns = {'player_dismissed' : 'Dismissals'})
    fours = pd.DataFrame(df.groupby(['striker'])['isFour'].sum()).reset_index().rename(columns = {'isFour' : 'Fours'})
    sixes = pd.DataFrame(df.groupby(['striker'])['isSix'].sum()).reset_index().rename(columns = {'isSix' : 'Sixes'})
    dots = pd.DataFrame(df.groupby(['striker'])['isDot'].sum()).reset_index().rename(columns = {'isDot' : 'Dots'})
    
    df = pd.merge(innings,runs, on ='striker').merge(balls,on='striker').merge(dismissals,on='striker').merge(fours,on='striker').merge(sixes,on='striker').merge(dots, on='striker')
    
    df['RPI'] = df.apply(lambda x : (x['Runs']/x['Innings']), axis = 1)
    df['SR'] = df.apply(lambda x : 100*(x['Runs']/x['Balls']), axis = 1)
    df['BPD'] = df.apply(lambda x : BPD(x['Balls'], x['Dismissals']),axis = 1)
    df['BPB'] = df.apply(lambda x : BPB(x['Balls'], (x['Fours'] + x['Sixes'])),axis = 1)
    df['Dot%'] = df.apply(lambda x : (x['Dots']/x['Balls']), axis = 1)
    
    return df

In [13]:
# ByCustom(df,'Melbourne Cricket Ground', 'Powerplay', 'Sri Lanka')

## By Striker, Phase and Opposition

In [14]:
def ByStriker(df,current_striker, current_phase, current_opposition):

    df = df[df.striker == current_striker]
    df = df[df.Phase == current_phase]
    df = df[df.bowling_team == current_opposition]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['runs_off_bat'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['runs_off_bat'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['runs_off_bat'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['runs_off_bat'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['runs_off_bat'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['runs_off_bat'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['striker'])['runs_off_bat'].sum()).reset_index().rename(columns = {'runs_off_bat':'Runs'})
    balls = pd.DataFrame(df.groupby(['striker'])['match_id'].count()).reset_index().rename(columns = {'match_id' : 'Balls'})
    innings = pd.DataFrame(df.groupby(['striker'])['match_id'].apply(lambda x : len(list(np.unique(x))))).reset_index().rename(columns = {'match_id' : 'Innings'})
    dismissals = pd.DataFrame(df.groupby(['striker'])['player_dismissed'].count()).reset_index().rename(columns = {'player_dismissed' : 'Dismissals'})
    fours = pd.DataFrame(df.groupby(['striker'])['isFour'].sum()).reset_index().rename(columns = {'isFour' : 'Fours'})
    sixes = pd.DataFrame(df.groupby(['striker'])['isSix'].sum()).reset_index().rename(columns = {'isSix' : 'Sixes'})
    dots = pd.DataFrame(df.groupby(['striker'])['isDot'].sum()).reset_index().rename(columns = {'isDot' : 'Dots'})
    
    df = pd.merge(innings,runs, on ='striker').merge(balls,on='striker').merge(dismissals,on='striker').merge(fours,on='striker').merge(sixes,on='striker').merge(dots, on='striker')
    
    df['RPI'] = df.apply(lambda x : (x['Runs']/x['Innings']), axis = 1)
    df['SR'] = df.apply(lambda x : 100*(x['Runs']/x['Balls']), axis = 1)
    df['BPD'] = df.apply(lambda x : BPD(x['Balls'], x['Dismissals']),axis = 1)
    df['BPB'] = df.apply(lambda x : BPB(x['Balls'], (x['Fours'] + x['Sixes'])),axis = 1)
    df['Dot%'] = df.apply(lambda x : (x['Dots']/x['Balls']), axis = 1)
    
    return df

In [15]:
# ByStriker(df,'DA Warner','Middle','India')

In [16]:
df.head(10)

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed,Total_Runs,overs,over,Phase
0,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,1,Australia,Sri Lanka,AJ Finch,M Klinger,SL Malinga,0,0,,,,,,,,,,0,0.1,0,Powerplay
1,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,2,Australia,Sri Lanka,AJ Finch,M Klinger,SL Malinga,0,0,,,,,,,,,,0,0.2,0,Powerplay
2,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,3,Australia,Sri Lanka,AJ Finch,M Klinger,SL Malinga,1,0,,,,,,,,,,1,0.3,0,Powerplay
3,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,4,Australia,Sri Lanka,M Klinger,AJ Finch,SL Malinga,2,0,,,,,,,,,,2,0.4,0,Powerplay
4,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,5,Australia,Sri Lanka,M Klinger,AJ Finch,SL Malinga,0,0,,,,,,,,,,0,0.5,0,Powerplay
5,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,6,Australia,Sri Lanka,M Klinger,AJ Finch,SL Malinga,3,0,,,,,,,,,,3,0.6,0,Powerplay
6,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,1,Australia,Sri Lanka,M Klinger,AJ Finch,KMDN Kulasekara,0,0,,,,,,,,,,0,1.1,1,Powerplay
7,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,2,Australia,Sri Lanka,M Klinger,AJ Finch,KMDN Kulasekara,1,0,,,,,,,,,,1,1.2,1,Powerplay
8,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,3,Australia,Sri Lanka,AJ Finch,M Klinger,KMDN Kulasekara,0,0,,,,,,,,,,0,1.3,1,Powerplay
9,1001349,2016/17,2017-02-17,Melbourne Cricket Ground,1,4,Australia,Sri Lanka,AJ Finch,M Klinger,KMDN Kulasekara,0,0,,,,,,,,,,0,1.4,1,Powerplay


In [17]:
def ByTeams(df,current_team,same_team):
    
    df = df[df.bowling_team == current_team]
    df = df[df.batting_team == same_team]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['runs_off_bat'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['runs_off_bat'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['runs_off_bat'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['runs_off_bat'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['runs_off_bat'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['runs_off_bat'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['striker'])['runs_off_bat'].sum()).reset_index().rename(columns = {'runs_off_bat':'Runs'})
    balls = pd.DataFrame(df.groupby(['striker'])['match_id'].count()).reset_index().rename(columns = {'match_id' : 'Balls'})
    innings = pd.DataFrame(df.groupby(['striker'])['match_id'].apply(lambda x : len(list(np.unique(x))))).reset_index().rename(columns = {'match_id' : 'Innings'})
    dismissals = pd.DataFrame(df.groupby(['striker'])['player_dismissed'].count()).reset_index().rename(columns = {'player_dismissed' : 'Dismissals'})
    fours = pd.DataFrame(df.groupby(['striker'])['isFour'].sum()).reset_index().rename(columns = {'isFour' : 'Fours'})
    sixes = pd.DataFrame(df.groupby(['striker'])['isSix'].sum()).reset_index().rename(columns = {'isSix' : 'Sixes'})
    dots = pd.DataFrame(df.groupby(['striker'])['isDot'].sum()).reset_index().rename(columns = {'isDot' : 'Dots'})
    
    df = pd.merge(innings,runs, on ='striker').merge(balls,on='striker').merge(dismissals,on='striker').merge(fours,on='striker').merge(sixes,on='striker').merge(dots, on='striker')
    
    df['RPI'] = df.apply(lambda x : (x['Runs']/x['Innings']), axis = 1)
    df['SR'] = df.apply(lambda x : 100*(x['Runs']/x['Balls']), axis = 1)
    df['BPD'] = df.apply(lambda x : BPD(x['Balls'], x['Dismissals']),axis = 1)
    df['BPB'] = df.apply(lambda x : BPB(x['Balls'], (x['Fours'] + x['Sixes'])),axis = 1)
    df['Dot%'] = df.apply(lambda x : (x['Dots']/x['Balls']), axis = 1)
    
    return df

In [18]:
ByTeams(df, 'Ireland', 'England')

Unnamed: 0,striker,Innings,Runs,Balls,Dismissals,Fours,Sixes,Dots,RPI,SR,BPD,BPB,Dot%
0,C Kieswetter,1,13,17,1,0,0,10,13.0,76.470588,17.0,17.0,0.588235
1,EJG Morgan,1,45,37,1,5,0,13,45.0,121.621622,37.0,7.4,0.351351
2,GP Swann,1,7,6,1,0,0,1,7.0,116.666667,6.0,6.0,0.166667
3,KP Pietersen,1,9,18,1,0,0,9,9.0,50.0,18.0,18.0,0.5
4,LJ Wright,1,20,25,1,0,1,14,20.0,80.0,25.0,25.0,0.56
5,MJ Lumb,1,14,11,1,3,0,6,14.0,127.272727,11.0,3.666667,0.545455
6,PD Collingwood,1,0,3,1,0,0,3,0.0,0.0,3.0,3.0,1.0
7,TT Bresnan,1,5,6,1,1,0,4,5.0,83.333333,6.0,6.0,0.666667


## Same for Bowling

In [19]:
def is_bowler_wicket(player_dismissed, dismissal_kind):
    if type(player_dismissed) == str:
        if dismissal_kind not in ['run out', 'retired hurt', 'obstructing the field','retired out','retired not out']:
             return 1
        else:
            return 0
    else:
        return 0

df['isBowlerWk'] = df.apply(lambda x: is_bowler_wicket(x['player_dismissed'], x['wicket_type']), axis = 1)

In [20]:
def Bowl_SR(Balls,Dismissals):
    if Dismissals > 0:
        return Balls/Dismissals
    else:
        return Balls/1
    
def Eco(Runs_Conceded,Balls):
    if Balls > 0:
        return Runs_Conceded/Balls
    else:
        return 1000
    
def Bowl_Avg(Runs_Conceded,Dismissals):
    if Dismissals > 0:
        return Runs_Conceded/Dismissals
    elif(Runs_Conceded == 0):
        return 1000
    else:
        return Runs_Conceded/1
    
def BPD(Balls, Dismissals):
    if Dismissals > 0:
        return Balls/Dismissals
    else :
        return Balls/1
    
def BPB(Balls, Boundaries):
    if Boundaries > 0:
        return Balls/Boundaries
    else:
        return Balls/1

## By Opposition

In [21]:
def ByOpposition(df,current_team,same_team):
    
    df = df[df.bowling_team == current_team]
    df = df[df.batting_team == same_team]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['Total_Runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['Total_Runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['Total_Runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['Total_Runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['Total_Runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['Total_Runs'].apply(lambda x: 1 if x == 6 else 0)

    runs = pd.DataFrame(df.groupby(['bowler'])['Total_Runs'].sum()).reset_index().rename(columns = {'Total_Runs':'Runs'})
    balls = pd.DataFrame(df.groupby(['bowler'])['match_id'].count()).reset_index().rename(columns = {'match_id' : 'Balls'})
    innings = pd.DataFrame(df.groupby(['bowler'])['match_id'].apply(lambda x : len(list(np.unique(x))))).reset_index().rename(columns = {'match_id' : 'Innings'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['isBowlerWk'].sum()).reset_index().rename(columns = {'isBowlerWk' : 'Dismissals'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour' : 'Fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix' : 'Sixes'})
    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot' : 'Dots'})
    
    df = pd.merge(innings,runs, on ='bowler').merge(balls,on='bowler').merge(dismissals,on='bowler').merge(fours,on='bowler').merge(sixes,on='bowler').merge(dots,on='bowler').sort_values(['Innings'], ascending = False)   
    df.reset_index(inplace = True, drop = True)
    
    df['Eco'] = df.apply(lambda x: 6*Eco(x['Runs'],x['Balls']), axis = 1)
    df['Bowl_SR'] = df.apply(lambda x: Bowl_SR(x['Balls'],x['Dismissals']), axis = 1)
    df['Bowl_Avg'] = df.apply(lambda x: Bowl_Avg(x['Runs'],x['Dismissals']), axis = 1)
    df['Bowl_Dot%'] = df.apply(lambda x : 100*(x['Dots']/x['Balls']), axis = 1)
    
    return df

In [22]:
# ByOpposition(df,'Australia','Sri Lanka')

## By Phase and Opposition

In [23]:
def ByPhase(df,current_phase,current_team,same_team):
    
    df = df[df.Phase == current_phase]
    df = df[df.bowling_team == current_team]
    df = df[df.batting_team == same_team]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['Total_Runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['Total_Runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['Total_Runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['Total_Runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['Total_Runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['Total_Runs'].apply(lambda x: 1 if x == 6 else 0)

    runs = pd.DataFrame(df.groupby(['bowler'])['Total_Runs'].sum()).reset_index().rename(columns = {'Total_Runs':'Runs'})
    balls = pd.DataFrame(df.groupby(['bowler'])['match_id'].count()).reset_index().rename(columns = {'match_id' : 'Balls'})
    innings = pd.DataFrame(df.groupby(['bowler'])['match_id'].apply(lambda x : len(list(np.unique(x))))).reset_index().rename(columns = {'match_id' : 'Innings'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['isBowlerWk'].sum()).reset_index().rename(columns = {'isBowlerWk' : 'Dismissals'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour' : 'Fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix' : 'Sixes'})
    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot' : 'Dots'})
    
    df = pd.merge(innings,runs, on ='bowler').merge(balls,on='bowler').merge(dismissals,on='bowler').merge(fours,on='bowler').merge(sixes,on='bowler').merge(dots,on='bowler').sort_values(['Innings'], ascending = False)   
    df.reset_index(inplace = True, drop = True)
    
    df['Eco'] = df.apply(lambda x: 6*Eco(x['Runs'],x['Balls']), axis = 1)
    df['Bowl_SR'] = df.apply(lambda x: Bowl_SR(x['Balls'],x['Dismissals']), axis = 1)
    df['Bowl_Avg'] = df.apply(lambda x: Bowl_Avg(x['Runs'],x['Dismissals']), axis = 1)
    df['Bowl_Dot%'] = df.apply(lambda x : 100*(x['Dots']/x['Balls']), axis = 1)
    
    return df

In [24]:
# ByPhase(df,'Death','Australia','Sri Lanka')

## Venue Analysis

In [25]:
# def isOut(player_dismissed):
#     try:
#         x = math.isnan(player_dismissed)
#         return 0
#     except:
#         return 1

In [26]:
# vdf_runs = 0.5 * pd.DataFrame(df.groupby('venue')['Total_Runs'].sum())/df.groupby('venue')['match_id'].apply(lambda x :(len(list(np.unique(x)))))
# vdf_rpw = pd.DataFrame((df.groupby('venue')['Total_Runs'].sum()/df.groupby('venue')['player_dismissed'].count())

In [27]:
# fdf = pd.merge(vdf_runs,)