In [1]:
        import pandas as pd
        import numpy as np
        import math
        import matplotlib.pyplot as plt
        
        import warnings
        warnings.filterwarnings('ignore')
        
        pd.set_option('display.max_columns',None)
        pd.set_option('display.max_row',None)
        pd.set_option('display.expand_frame_repr',False)
        pd.set_option('display.max_colwidth',None)

In [2]:
deliveries = pd.read_csv('ipl_2024_deliveries.csv')
matches = pd.read_csv('ipl_2024_matches.csv')

In [3]:
def ByInnings(df,current_innings):
    
    df = df[df.innings == current_innings]
    df.reset_index(inplace = True,drop = True)
    
    df['isDot'] = df['runs_of_bat'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['runs_of_bat'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['runs_of_bat'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['runs_of_bat'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['runs_of_bat'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['runs_of_bat'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['batsman'])['runs_of_bat'].sum()).reset_index().rename(columns={'runs_of_bat':'runs'})
    innings = pd.DataFrame(df.groupby(['batsman'])['match_id'].apply(lambda x: len(list(np.unique(x))))).reset_index().rename(columns={'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['batsman'])['match_id'].count()).reset_index().rename(columns={'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['batsman'])['player_dismissed'].count()).reset_index().rename(columns={'player_dismissed':'dismissals'})
    sixes = pd.DataFrame(df.groupby(['batsman'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})
    fours = pd.DataFrame(df.groupby(['batsman'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    
    df = pd.merge(innings,runs,on = 'batsman').merge( balls, on = 'batsman').merge( dismissals, on = 'batsman').merge( fours, on = 'batsman').merge( sixes, on = 'batsman')
    df['RPI'] =df.apply(lambda x: (x['runs']/x['innings']),axis = 1)
    return df


In [53]:
df.head()

Unnamed: 0,batsman,innings,runs,balls,dismissals,fours,sixes,dots,RPI,SR,BPD,BPB,dot_percentage,calc_SR,calc_RPI,calc_BPD,calc_dot_percentage,dev_best_SR,dev_best_RPI,dev_best_BPD,dev_best_dot_percentage,dev_best_sqrt,dev_worst_SR,dev_worst_RPI,dev_worst_BPD,dev_worst_dot_percentage,dev_worst_sqrt,score
0,Abdul Samad,1,13,6,1,1,1,1,13.0,216.666667,6.0,3.0,0.166667,7.546051,0.441679,0.110585,0.004757,79.462312,34.02745,2.293422,2.3e-05,115.783206,56.942887,0.195081,0.011559,0.027719,57.177246,0.33058
1,Anshul Kamboj,1,2,2,0,0,0,0,2.0,100.0,2.0,2.0,0.0,1.607443,0.010454,0.012287,0.0,220.604831,39.244345,2.600811,0.0,262.449987,2.583872,0.000109,8.5e-05,0.029326,2.613392,0.009859
2,Arshad Khan,1,5,4,1,1,0,2,5.0,125.0,4.0,4.0,0.5,2.511629,0.065337,0.049149,0.042812,194.56303,38.559722,2.483275,0.001833,235.60786,6.308282,0.004269,0.002123,0.016496,6.33117,0.026168
3,Bairstow,1,11,6,0,0,1,1,11.0,183.333333,6.0,6.0,0.166667,5.402794,0.316232,0.110585,0.004757,122.266552,35.506733,2.293422,2.3e-05,160.06673,29.190183,0.100003,0.011559,0.027719,29.329464,0.154858
4,Boult,1,0,1,0,0,0,1,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.003072,0.171247,270.938667,39.375433,2.630619,0.029326,312.974044,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
def balls_per_dismissal(balls, dismissals):
    if dismissals > 0:
        return balls/dismissals
    else:
        return balls/1 
    
def balls_per_boundary(balls, boundaries):
    if boundaries > 0:
        return balls/boundaries
    else:
        return balls/1 

In [5]:
def ByCustom(df, current_venue, current_phase, current_opposition):
    
    df = df[df.venue == current_venue]
    df = df[df.phase == current_phase]
    df = df[df.bowling_team == current_opposition]
   
  
    df.reset_index(inplace = True,drop = True)
    
    df['isDot'] = df['runs_of_bat'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['runs_of_bat'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['runs_of_bat'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['runs_of_bat'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['runs_of_bat'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['runs_of_bat'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['batsman'])['runs_of_bat'].sum()).reset_index().rename(columns={'runs_of_bat':'runs'})
    innings = pd.DataFrame(df.groupby(['batsman'])['match_id'].apply(lambda x: len(list(np.unique(x))))).reset_index().rename(columns={'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['batsman'])['match_id'].count()).reset_index().rename(columns={'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['batsman'])['player_dismissed'].count()).reset_index().rename(columns={'player_dismissed':'dismissals'})
    sixes = pd.DataFrame(df.groupby(['batsman'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})
    fours = pd.DataFrame(df.groupby(['batsman'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})

    dots = pd.DataFrame(df.groupby(['batsman'])['isDot'].sum()).reset_index().rename(columns = {'isDot':'dots'})

    
    df = pd.merge(innings,runs,on = 'batsman').merge( balls, on = 'batsman').merge( dismissals, on = 'batsman').merge( fours, on = 'batsman').merge( sixes, on = 'batsman').merge( dots, on = 'batsman')
    df['RPI'] = df.apply(lambda x: x['runs']/x['innings'], axis = 1)
    df['SR'] = df.apply(lambda x: 100*(x['runs']/x['balls']),axis = 1)
    df['BPD'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)
    df['BPB'] = df.apply(lambda x: balls_per_boundary(x['balls'], (x['fours'] + x['sixes'])), axis = 1)

    df['dot_percentage'] = df.apply(lambda x: x['dots']/x['balls'], axis = 1)

    return df

In [None]:
df=deliveries.copy()

mdf=matches.copy()

comb=pd.merge(df,mdf,on = 'match_id',how ='left')

comb = comb.rename(columns={'striker':'batsman'})

comb = comb.rename(columns={'venue_x':'venue'})

In [11]:
def get_phase(over_no):
    if over_no < 6:
        return'PowerPlay'
        
    elif over_no < 15:
        return'Middle Over'
    else:
        return'Death Over'

In [12]:
comb['phase'] = comb['over'].apply(lambda x: get_phase(x))

In [13]:
df = ByCustom(comb ,'Eden Gardens, Kolkata','PowerPlay','KKR')

In [14]:
df.head(100)

Unnamed: 0,batsman,innings,runs,balls,dismissals,fours,sixes,dots,RPI,SR,BPD,BPB,dot_percentage
0,Abhishek Sharma,1,27,14,0,3,2,6,27.0,192.857143,14.0,2.8,0.428571
1,Abishek Porel,1,18,13,0,2,1,6,18.0,138.461538,13.0,4.333333,0.461538
2,Ayush Badoni,1,5,6,0,1,0,4,5.0,83.333333,6.0,6.0,0.666667
3,Bairstow,1,36,16,1,5,2,5,36.0,225.0,16.0,2.285714,0.3125
4,Buttler,1,20,14,0,2,1,5,20.0,142.857143,14.0,4.666667,0.357143
5,Fraser-McGurk,1,12,7,1,1,1,3,12.0,171.428571,7.0,3.5,0.428571
6,Hooda,1,8,10,1,1,0,5,8.0,80.0,10.0,10.0,0.5
7,Ishan Kishan,1,38,20,0,5,2,9,38.0,190.0,20.0,2.857143,0.45
8,Jaiswal,1,19,10,1,3,1,5,19.0,190.0,10.0,2.5,0.5
9,Kohli,1,18,7,1,1,2,2,18.0,257.142857,7.0,2.333333,0.285714


In [15]:
df = ByCustom(comb ,'Eden Gardens, Kolkata','Middle Over','KKR')

In [16]:
df.head(15)

Unnamed: 0,batsman,innings,runs,balls,dismissals,fours,sixes,dots,RPI,SR,BPD,BPB,dot_percentage
0,Abdul Samad,1,2,5,0,0,0,3,2.0,40.0,5.0,5.0,0.6
1,Abhishek Sharma,1,5,5,1,1,0,3,5.0,100.0,5.0,5.0,0.6
2,Abishek Porel,1,0,3,1,0,0,3,0.0,0.0,3.0,3.0,1.0
3,Ashwin,1,8,12,1,1,0,7,8.0,66.666667,12.0,12.0,0.583333
4,Axar,1,15,21,1,2,0,13,15.0,71.428571,21.0,10.5,0.619048
5,Ayush Badoni,1,24,21,1,1,1,5,24.0,114.285714,21.0,10.5,0.238095
6,Bairstow,1,61,26,0,3,6,6,61.0,234.615385,26.0,2.888889,0.230769
7,Buttler,1,38,28,0,5,0,9,38.0,135.714286,28.0,5.6,0.321429
8,Dhruv Jurel,1,2,4,1,0,0,2,2.0,50.0,4.0,4.0,0.5
9,Green,1,6,4,1,0,1,3,6.0,150.0,4.0,4.0,0.75


In [17]:
df = ByCustom(comb ,'Eden Gardens, Kolkata','Death Over','KKR')

In [18]:
df.head(15)

Unnamed: 0,batsman,innings,runs,balls,dismissals,fours,sixes,dots,RPI,SR,BPD,BPB,dot_percentage
0,Abdul Samad,1,13,6,1,1,1,1,13.0,216.666667,6.0,3.0,0.166667
1,Anshul Kamboj,1,2,2,0,0,0,0,2.0,100.0,2.0,2.0,0.0
2,Arshad Khan,1,5,4,1,1,0,2,5.0,125.0,4.0,4.0,0.5
3,Bairstow,1,11,6,0,0,1,1,11.0,183.333333,6.0,6.0,0.166667
4,Boult,1,0,1,0,0,0,1,0.0,0.0,1.0,1.0,1.0
5,Buttler,1,49,23,1,2,5,8,49.0,213.043478,23.0,3.285714,0.347826
6,Chawla,1,1,2,0,0,0,1,1.0,50.0,2.0,2.0,0.5
7,Cummins,1,0,1,0,0,0,1,0.0,0.0,1.0,1.0,1.0
8,Ferguson,1,1,1,1,0,0,0,1.0,100.0,1.0,1.0,0.0
9,Karn Sharma,1,20,8,1,0,3,3,20.0,250.0,8.0,2.666667,0.375


In [19]:
comb['venue'].unique()

array(['MA Chidambaram Stadium, Chennai',
       'Maharaja Yadavindra Singh International Cricket Stadium, Mullanpur, Chandigarh',
       'Eden Gardens, Kolkata', 'Sawai Mansingh Stadium, Jaipur',
       'Narendra Modi Stadium, Ahmedabad',
       'M.Chinnaswamy Stadium, Bengaluru',
       'Rajiv Gandhi International Stadium, Hyderabad',
       'Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam',
       'Wankhede Stadium, Mumbai', 'Arun Jaitley Stadium, Delhi',
       'Himachal Pradesh Cricket Association Stadium, Dharamsala',
       'Barsapara Cricket Stadium, Guwahati'], dtype=object)

In [20]:
wt_sr, wt_rpi, wt_bpd, wt_dot_percentage = 0.13, 0.27, 0.16, 0.45

In [21]:
#step1: square of all values
df['calc_SR'] = df['SR'].apply(lambda x: x*x) 
df['calc_RPI'] = df['RPI'].apply(lambda x: x*x) 
df['calc_BPD'] = df['BPD'].apply(lambda x: x*x) 
df['calc_dot_percentage'] = df['dot_percentage'].apply(lambda x: x*x)

sq_sr, sq_rpi, sq_bpd, sq_dot_percentage = np.sqrt(df[['calc_SR','calc_RPI', 'calc_BPD', 'calc_dot_percentage']].sum(axis = 0))

df['calc_SR'] = df['calc_SR'].apply(lambda x: x/sq_sr) 
df['calc_RPI'] = df['calc_RPI'].apply(lambda x: x/sq_rpi) 
df['calc_BPD'] = df['calc_BPD'].apply(lambda x: x/sq_bpd) 
df['calc_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: x/sq_dot_percentage)

df['calc_SR'] = df['calc_SR'].apply(lambda x: x*wt_sr) 
df['calc_RPI'] = df['calc_RPI'].apply(lambda x: x*wt_rpi) 
df['calc_BPD'] = df['calc_BPD'].apply(lambda x: x*wt_bpd) 
df['calc_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: x*wt_dot_percentage)

best_sr, worst_sr = max(df['calc_SR']), min(df['calc_SR'])
best_rpi, worst_rpi = max(df['calc_RPI']), min(df['calc_RPI'])
best_bpd, worst_bpd = max(df['calc_BPD']), min(df['calc_BPD'])
best_dot_percentage, worst_dot_percentage = min(df['calc_dot_percentage']), max(df['calc_dot_percentage'])

In [22]:
df['dev_best_SR'] = df['calc_SR'].apply(lambda x: (x-best_sr)*(x-best_sr)) 
df['dev_best_RPI'] = df['calc_RPI'].apply(lambda x: (x-best_rpi)*(x-best_rpi)) 
df['dev_best_BPD'] = df['calc_BPD'].apply(lambda x: (x-best_bpd)*(x-best_bpd)) 
df['dev_best_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: (x-best_dot_percentage)*(x-best_dot_percentage))

df['dev_best_sqrt'] = df.apply(lambda x: x['dev_best_SR'] + x['dev_best_RPI'] + x['dev_best_BPD'] + x['dev_best_dot_percentage'], axis = 1) 

df['dev_worst_SR'] = df['calc_SR'].apply(lambda x: (x-worst_sr)*(x-worst_sr)) 
df['dev_worst_RPI'] = df['calc_RPI'].apply(lambda x: (x-worst_rpi)*(x-worst_rpi)) 
df['dev_worst_BPD'] = df['calc_BPD'].apply(lambda x: (x-worst_bpd)*(x-worst_bpd)) 
df['dev_worst_dot_percentage'] = df['calc_dot_percentage'].apply(lambda x: (x-worst_dot_percentage)*(x-worst_dot_percentage))

df['dev_worst_sqrt'] = df.apply(lambda x: x['dev_worst_SR'] + x['dev_worst_RPI'] + x['dev_worst_BPD'] + x['dev_worst_dot_percentage'], axis = 1) 

In [23]:
df['score'] = df.apply(lambda x: x['dev_worst_sqrt']/(x['dev_worst_sqrt'] + x['dev_best_sqrt']), axis = 1)

In [24]:
df[[ 'batsman','innings', 'runs', 'balls', 'dismissals', 'dot_percentage', 'score']].sort_values(['score'], ascending = False).reset_index(drop = True).head(50)

Unnamed: 0,batsman,innings,runs,balls,dismissals,dot_percentage,score
0,Shashank Singh,1,49,17,0,0.235294,0.955453
1,Shahbaz Ahmed,1,16,5,1,0.4,0.889187
2,Powell,1,24,8,1,0.25,0.880348
3,Klaasen,1,45,18,1,0.277778,0.753562
4,Karn Sharma,1,20,8,1,0.375,0.591419
5,Buttler,1,49,23,1,0.347826,0.531437
6,Abdul Samad,1,13,6,1,0.166667,0.33058
7,Bairstow,1,11,6,0,0.166667,0.154858
8,Pooran,1,34,21,1,0.428571,0.151707
9,Karthik,1,20,15,1,0.4,0.043736
