In [1]:
# pip install "numpy<2.0" "pymc>=5" arviz pytensor

In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import pymc as pm
import numpy as np
import arviz as az
import statsmodels.api as sm

# load + clean the data

In [27]:
df = pd.read_csv('IPL_Bowler_Detailed_Data.csv')
# df

In [4]:
df.shape

(4800, 11)

In [5]:
df.dtypes

Match_ID           int64
Match_Date        object
Pitch_Type        object
Phase             object
Over               int64
Ball               int64
Bowler            object
Batter_Avg       float64
Batter_SR        float64
Runs_Conceded      int64
Is_Wicket          int64
dtype: object

In [6]:
df.isnull().sum()

Match_ID         0
Match_Date       0
Pitch_Type       0
Phase            0
Over             0
Ball             0
Bowler           0
Batter_Avg       0
Batter_SR        0
Runs_Conceded    0
Is_Wicket        0
dtype: int64

In [7]:
df['Phase'].value_counts()

Phase
Powerplay    2400
Death        2400
Name: count, dtype: int64

In [8]:
df['Runs_Conceded'].value_counts()

Runs_Conceded
0    2187
1    1072
2     538
4     490
6     314
3     199
Name: count, dtype: int64

# Convert Columns To Numeric

In [9]:
df['Bowler'].value_counts()

Bowler
Bowler B    2400
Bowler A    2400
Name: count, dtype: int64

In [10]:
df.columns

Index(['Match_ID', 'Match_Date', 'Pitch_Type', 'Phase', 'Over', 'Ball',
       'Bowler', 'Batter_Avg', 'Batter_SR', 'Runs_Conceded', 'Is_Wicket'],
      dtype='object')

In [28]:
label = LabelEncoder()

df['Pitch_Type'] = label.fit_transform(df['Pitch_Type'])

df['Phase'] = df['Phase'].map({'Powerplay':0, 'Death':1})

df['Bowler'] = df['Bowler'].map({'Bowler A':0,'Bowler B':1})

# df

In [29]:
df.drop('Match_Date',axis=1,inplace=True)
# df

# Filter only death overs

In [13]:
df['Phase'].value_counts()

Phase
0    2400
1    2400
Name: count, dtype: int64

In [14]:
df = df[df['Phase'] == 1]

In [30]:
# df.head()

# Feature: IS_DOT

In [16]:
df['Is_Dot'] = (df['Runs_Conceded'] == 0).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Is_Dot'] = (df['Runs_Conceded'] == 0).astype(int)


In [31]:
# df.head()

In [18]:
df.shape

(2400, 11)

# Feature: PRESSURE_APPLIED

In [19]:
df = df.reset_index(drop=True) 

df['Pressure_Applied'] = 0

for i in range(1, len(df)):
    same_match = df.loc[i, 'Match_ID'] == df.loc[i-1, 'Match_ID']
    same_bowler = df.loc[i, 'Bowler'] == df.loc[i-1, 'Bowler']
    same_over = df.loc[i, 'Over'] == df.loc[i-1, 'Over']
    
    if same_match and same_bowler and same_over:
        if df.loc[i-1, 'Ball'] == 6:
            continue
        if df.loc[i-1, 'Is_Dot'] == 1:
            df.loc[i, 'Pressure_Applied'] = 1

In [26]:
# df.head()

In [21]:
df['Pressure_Applied'].value_counts()

Pressure_Applied
0    2398
1       2
Name: count, dtype: int64

In [22]:
df_model = df[['Is_Wicket','Pitch_Type','Batter_Avg',
               'Pressure_Applied','Bowler']].copy()

df_model['Interaction'] = df_model['Pressure_Applied'] * df_model['Bowler']

y = df_model['Is_Wicket']

x = df_model[['Pitch_Type','Batter_Avg',
              'Pressure_Applied','Bowler','Interaction']]

x = sm.add_constant(x)

In [32]:
model = sm.Logit(y, x).fit()
# print(model.summary())

         Current function value: 0.291375
         Iterations: 35




In [24]:
conf = model.conf_int()
conf['coef'] = model.params
conf.columns = ['lower_94','upper_94','coef']
killer_instinct = conf.loc['Interaction']
print("94% Interval for Killer Instinct (Pressure × Bowler):")
print(killer_instinct)

94% Interval for Killer Instinct (Pressure × Bowler):
lower_94   -3.380092e+08
upper_94    3.380092e+08
coef       -5.684564e+00
Name: Interaction, dtype: float64


In [33]:
low = killer_instinct['lower_94']
high = killer_instinct['upper_94']
if low > 0:
    print("\nVerdict: Bowler B shows statistically significant Killer Instinct.")
elif high < 0:
    print("\nVerdict: Bowler A shows stronger mental pressure conversion.")
else:
    print("Verdict: No clear evidence either bowler converts pressure better.")

Verdict: No clear evidence either bowler converts pressure better.
