In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read All IPL Data
deliveres = pd.read_csv("C:/Users/utkar/OneDrive/Desktop/Analytics/Data/IPL Ball-by-Ball 2008-2020.csv")

In [3]:
# Goal 
# For the target batsman, find the bowler against whom he does well and against whom he struggles
# We use batting and bowling strike rate for comparison
# Most favourite bowler - The bowler whom the target batsman has the best strike rate
# Least favorite bowler - The bowler who has the best strike rate against the target batsman
# For example - 
# targetBatsman = 'AB de Villiers'

In [4]:
# Take a copy and look at Post 2016 data 
del_df = deliveres.copy()
del_df = del_df[del_df.id >= 980901]

In [5]:
def createStrikeRateAnalysis(del_df, batsman): 
    
    batsman_runs = del_df[del_df.batsman == batsman].groupby('bowler').batsman_runs.sum()
    batsman_balls = del_df[del_df.batsman == batsman].groupby('bowler').ball.count()
    batsman_wickets = del_df[del_df.batsman == batsman].groupby('bowler').is_wicket.sum()

    comb = pd.merge(batsman_runs, batsman_balls, on = 'bowler').merge(batsman_wickets , on = 'bowler')

    comb.rename(columns = {'is_wicket' : 'numWickets'} , inplace=True)
    comb.rename(columns = {'ball' : 'ballsFaced'} , inplace = True)
    comb.rename(columns = {'batsman_runs' : 'runsScored'} , inplace = True)
    comb.reset_index(inplace = True)

    # Filter to atleast AVG balls faced, considering only those bowlers who have bowled atleast 5 balls to this batsman
    comb = comb[comb.ballsFaced > 5]
    average_balls_faced = comb['ballsFaced'].sum() / len(comb)
    final_df = comb[comb.ballsFaced >= average_balls_faced]

    #print ("Filtered to include bowlers that have bowled atleast " , average_balls_faced , " balls to " , batsman)
    
    final_df['BattingStrikeRate'] = final_df.apply(lambda x: 100 * x['runsScored'] / x['ballsFaced'] , axis=1)
    final_df['BowlingStrikeRate'] = final_df.apply(lambda x: np.inf if (x['numWickets'] == 0) else (x['ballsFaced'] / x['numWickets']) , axis=1)

    final_df.reset_index(inplace=True, drop=True)
    
    return final_df

In [6]:
batsman = 'AB de Villiers'
final_df = createStrikeRateAnalysis(del_df, batsman)

In [7]:
def plot_df(final_df):
    # Plot Batsman's Strike Rate against the bowler vs Bowler's Strike Rate against the batsman
    plt.figure(figsize=(16, 8))

    for i in range(len(final_df)): 
        plt.text(final_df['BattingStrikeRate'][i], final_df['BowlingStrikeRate'][i], final_df['bowler'][i])

    plt.scatter(final_df['BattingStrikeRate'], final_df['BowlingStrikeRate'])
    plt.xlabel("Batting Strike Rate")
    plt.ylabel("Bowling Strike Rate")
    plt.title("Innings Comparison (Min 10 innings)")
    plt.show()

In [8]:
# plot_df(final_df)
# We could plot if removed or ignored the infinity for "Bowling Strike Rates".
# However, we're not looking to compare against other bowlers 
# and are only looking for extremes (Most and Leave Favourites)

In [9]:
# Best Batting Strike Rates - Favourite Bowlers
final_df.sort_values(by='BattingStrikeRate', ascending=False).head(5)

Unnamed: 0,bowler,runsScored,ballsFaced,numWickets,BattingStrikeRate,BowlingStrikeRate
25,SL Malinga,53,20,1,265.0,20.0
15,MM Sharma,69,27,0,255.555556,inf
17,P Kumar,44,18,0,244.444444,inf
7,HV Patel,53,23,0,230.434783,inf
1,AD Russell,46,20,0,230.0,inf


In [10]:
# Best Bowling Strike Rates - Least Favourite Bowlers
final_df.sort_values(by='BowlingStrikeRate', ascending=True).head(5)

Unnamed: 0,bowler,runsScored,ballsFaced,numWickets,BattingStrikeRate,BowlingStrikeRate
23,S Gopal,30,34,4,88.235294,8.5
27,SP Narine,34,21,2,161.904762,10.5
12,KH Pandya,51,49,4,104.081633,12.25
8,Harbhajan Singh,44,25,2,176.0,12.5
9,Imran Tahir,50,32,2,156.25,16.0
