In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("../data/ipl_with_pressure_flag.csv")

df.head()


Unnamed: 0,Match_ID,Match_Date,Pitch_Type,Phase,Over,Ball,Bowler,Batter_Avg,Batter_SR,Runs_Conceded,Is_Wicket,is_dot,is_death,pressure_applied
0,11935,2023-01-28,Batting,Powerplay,2,1,Bowler B,33.46,131.68,0,0,True,False,0
1,11935,2023-01-28,Batting,Powerplay,2,2,Bowler B,41.41,138.41,3,0,False,False,0
2,11935,2023-01-28,Batting,Powerplay,2,3,Bowler B,26.8,107.22,0,0,True,False,0
3,11935,2023-01-28,Batting,Powerplay,2,4,Bowler B,9.31,136.44,0,0,True,False,0
4,11935,2023-01-28,Batting,Powerplay,2,5,Bowler B,19.52,105.85,0,1,True,False,0


In [2]:
# Encode categorical variables
df['Pitch_Code'] = df['Pitch_Type'].astype('category').cat.codes
df['Bowler_Code'] = df['Bowler'].astype('category').cat.codes

# Scale Batter_Avg
df['Batter_Avg_Scaled'] = (df['Batter_Avg'] - df['Batter_Avg'].mean()) / df['Batter_Avg'].std()

df[['Pitch_Type', 'Pitch_Code', 'Bowler', 'Bowler_Code', 'Batter_Avg', 'Batter_Avg_Scaled']].head()


Unnamed: 0,Pitch_Type,Pitch_Code,Bowler,Bowler_Code,Batter_Avg,Batter_Avg_Scaled
0,Batting,0,Bowler B,1,33.46,0.204695
1,Batting,0,Bowler B,1,41.41,1.018946
2,Batting,0,Bowler B,1,26.8,-0.477432
3,Batting,0,Bowler B,1,9.31,-2.268784
4,Batting,0,Bowler B,1,19.52,-1.22306


In [3]:
import pymc as pm
import arviz as az




In [8]:
# Inputs to the model
y = df["Is_Wicket"].values
pressure = df["pressure_applied"].values
pitch = df["Pitch_Code"].values
batter_avg = df["Batter_Avg_Scaled"].values
bowler = df["Bowler_Code"].values
avg = df["Batter_Avg_Scaled"].values


In [9]:
df.columns


Index(['Match_ID', 'Match_Date', 'Pitch_Type', 'Phase', 'Over', 'Ball',
       'Bowler', 'Batter_Avg', 'Batter_SR', 'Runs_Conceded', 'Is_Wicket',
       'is_dot', 'is_death', 'pressure_applied', 'Pitch_Code', 'Bowler_Code',
       'Batter_Avg_Scaled'],
      dtype='object')

In [10]:
with pm.Model() as pressure_model:
    
    # Priors
    intercept = pm.Normal("intercept", 0, 1)
    beta_pressure = pm.Normal("beta_pressure", 0, 1)
    beta_pitch = pm.Normal("beta_pitch", 0, 1)
    beta_avg = pm.Normal("beta_avg", 0, 1)
    
    # Hierarchical effect for bowler
    bowler_effect = pm.Normal("bowler_effect", mu=0, sigma=1, shape=len(np.unique(bowler)))
    
    # Linear predictor
    eta = (
        intercept
        + beta_pressure * pressure
        + beta_pitch * pitch
        + beta_avg * avg
        + bowler_effect[bowler]
    )
    
    # Probability of wicket
    p = pm.Deterministic("p", pm.math.sigmoid(eta))
    
    # Likelihood
    y_obs = pm.Bernoulli("y_obs", p=p, observed=y)
