In [108]:
%matplotlib inline
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

In [109]:
path="C:\\Users\\SHETTS60\\Documents\\Python scripts\\Cricket analytics Materials\\Special Bonus-20210617T153758Z-001\\Special Bonus\\Data Files\\ODI_ball_by_ball.csv"
df = pd.read_csv(path)

In [110]:
bowlers = ['JJ Bumrah','TA Boult','K Rabada','JC Archer','PJ Cummins','MA Starc','Rashid Khan']


In [111]:
bowlersdf = df[(df['bowler'].isin(bowlers))]

In [112]:
# if value is 'nan' then type will be float. Other than nan type will be str, hence we will consider only value which has str type
def is_bowlers_wicket(player_dismissed, dismissal_kind):
    if type(player_dismissed ) == str:
        if dismissal_kind not in [ 'run out', 'retired hurt', 'obstructing the field']:
            return 1
        else:
            return 0
    else:
        return 0

In [119]:
def balls_per_dismissal(balls, dismissals):
    if dismissals > 0:
        return balls/dismissals
    else:
#         return math.inf
        return balls/1
    
def runs_per_ball(runs_conceeded, balls):
    if balls > 0:
        return runs_conceeded/balls
    else:
        return 1000
    
def runs_per_dismissal(runs_conceeded, dismissals):
    if dismissals > 0:
        return runs_conceeded/dismissals
    elif (runs_conceeded == 0):
        return 1000
    else:
        return runs_conceeded/1

In [120]:
bowlersdf['isBowlerWk'] = bowlersdf.apply(lambda x:is_bowlers_wicket(x["player_dismissed"],x["wicket_type"]),axis =1)

In [121]:
bowlersdf['total_runs'] = bowlersdf['runs_off_bat'].astype(int) + bowlersdf['extras'].astype(int)

In [133]:
def bowlerStats(df):
    #df = df[df.innings == current_innings]
    df.reset_index(inplace = True, drop = True)
    
    df["isDot"] = df["total_runs"].apply(lambda x:1 if x==0 else 0)
    df["isOne"] = df["total_runs"].apply(lambda x:1 if x==1 else 0)
    df["isTwo"] = df["total_runs"].apply(lambda x:1 if x==2 else 0)
    df["isThree"] = df["total_runs"].apply(lambda x:1 if x==3 else 0)

    df["isFour"] = df["runs_off_bat"].apply(lambda x:1 if x==4 else 0)
    df["isSix"] = df["runs_off_bat"].apply(lambda x:1 if x==6 else 0)
    
    runs = pd.DataFrame(df.groupby(["bowler"])["total_runs"].sum()).reset_index().rename(columns ={"total_runs":"runs"})
    innings = pd.DataFrame(df.groupby(["bowler"])["match_id"].apply(lambda x: len(np.unique(x)))).reset_index().rename(columns ={"match_id":"innings"})
    balls = pd.DataFrame(df.groupby(["bowler"])["match_id"].count()).reset_index().rename(columns ={"match_id":"balls"})
    dismissals = pd.DataFrame(df.groupby(["bowler"])["isBowlerWk"].sum()).reset_index().rename(columns ={"isBowlerWk":"dismissals"})

    dots = pd.DataFrame(df.groupby(["bowler"])["isDot"].sum()).reset_index().rename(columns ={"isDot":"dots"})
    ones = pd.DataFrame(df.groupby(['bowler'])['isOne'].sum()).reset_index().rename(columns = {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['bowler'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['bowler'])['isThree'].sum()).reset_index().rename(columns = {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})
    
    df = pd.merge(innings,balls, on ="bowler").merge(runs, on ="bowler").merge(dismissals, on ="bowler").merge(dots, on ="bowler").merge(ones, on ="bowler").merge(twos, on ="bowler").merge(threes, on ="bowler").merge(fours, on ="bowler").merge(sixes, on ="bowler")
    df["dot%"] = df.apply(lambda x:100*(x['dots']/x['balls']), axis = 1)
    # balls_per_dismissal, runs_per_ball, runs_per_dismissal
    df["SR"] = df.apply(lambda x: balls_per_dismissal(x["balls"],x["dismissals"]),axis =1)
    #We calculate runs_per_ball, to get runs per over we multiply with 6
    df["Eco"] = df.apply(lambda x: 6 * runs_per_ball(x["runs"],x["balls"]),axis =1)
    df["Avg"] = df.apply(lambda x: runs_per_dismissal(x["runs"],x["dismissals"]),axis =1)
    
    return df

In [134]:
df3 = bowlerStats(bowlersdf)

In [130]:
df1 = bowlerStats(bowlersdf,1)
df2 = bowlerStats(bowlersdf,2)

In [131]:
df1

Unnamed: 0,bowler,innings,balls,runs,dismissals,dots,ones,twos,threes,fours,sixes,dot%,SR,Eco,Avg
0,JC Archer,10,561,470,16,309,165,31,4,39,11,55.080214,35.0625,5.026738,29.375
1,JJ Bumrah,40,2272,1734,72,1276,686,107,12,165,16,56.161972,31.555556,4.579225,24.083333
2,K Rabada,38,2207,1853,76,1235,616,114,12,190,24,55.958314,29.039474,5.037608,24.381579
3,MA Starc,39,2207,1884,84,1201,632,145,16,177,29,54.417762,26.27381,5.121885,22.428571
4,PJ Cummins,32,1850,1621,49,984,580,76,11,165,29,53.189189,37.755102,5.257297,33.081633
5,Rashid Khan,31,1793,1301,59,1005,577,83,6,91,26,56.051311,30.389831,4.353597,22.050847
6,TA Boult,44,2623,2196,80,1469,719,154,21,217,31,56.004575,32.7875,5.023256,27.45


In [132]:
df2

Unnamed: 0,bowler,innings,balls,runs,dismissals,dots,ones,twos,threes,fours,sixes,dot%,SR,Eco,Avg
0,JC Archer,7,381,279,14,230,98,16,6,25,3,60.367454,27.214286,4.393701,19.928571
1,JJ Bumrah,27,1348,1144,36,741,401,48,11,123,10,54.970326,37.444444,5.091988,31.777778
2,K Rabada,37,1937,1566,43,1102,548,88,7,157,24,56.892101,45.046512,4.8508,36.418605
3,MA Starc,57,2952,2510,100,1641,810,167,34,261,29,55.589431,29.52,5.101626,25.1
4,PJ Cummins,37,1932,1640,62,1068,541,109,19,169,19,55.279503,31.16129,5.093168,26.451613
5,Rashid Khan,36,1838,1254,79,1036,605,80,8,89,12,56.365615,23.265823,4.09358,15.873418
6,TA Boult,49,2665,2170,89,1535,721,117,14,241,29,57.598499,29.94382,4.885553,24.382022


In [140]:
df3.sort_values(['dot%','SR','Eco',"Avg"], ascending = (True,True,True,True) ).reset_index(drop = True)

Unnamed: 0,bowler,innings,balls,runs,dismissals,dots,ones,twos,threes,fours,sixes,dot%,SR,Eco,Avg
0,PJ Cummins,69,3782,3261,111,2052,1121,185,30,334,48,54.257007,34.072072,5.173453,29.378378
1,MA Starc,96,5159,4394,184,2842,1442,312,50,438,58,55.088195,28.038043,5.110293,23.880435
2,JJ Bumrah,67,3620,2878,108,2017,1087,155,23,288,26,55.718232,33.518519,4.770166,26.648148
3,Rashid Khan,67,3631,2555,138,2041,1182,163,14,180,38,56.21041,26.311594,4.221977,18.514493
4,K Rabada,75,4144,3419,119,2337,1164,202,19,347,48,56.394788,34.823529,4.95029,28.731092
5,TA Boult,93,5294,4381,169,3004,1442,272,36,460,60,56.743483,31.325444,4.965244,25.923077
6,JC Archer,17,949,764,30,539,266,50,10,64,15,56.796628,31.633333,4.830348,25.466667
