In [17]:
import pandas as pd
import os

current_dir = os.getcwd()
data_path = os.path.join(current_dir, 'batsman_df.csv')

batsman_df = pd.read_csv(data_path)
batsman_df

Unnamed: 0,name,country,role,bat_style,t20_no,t20_runs,t20_avg,t20_sr,t20_50,t20_4s,t20_6s,ipl_no,ipl_runs,ipl_avg,ipl_sr,ipl_50,ipl_4s,ipl_6s
0,ajinkya rahane,india,batsman,right-handed-bat,2,375,20.83,113.3,1,32,6,17,4642,30.14,123.43,30,478,103
1,philip salt,england,wk-batsman,right-handed-bat,5,1193,34.09,164.33,5,116,61,2,653,34.37,175.54,6,74,34
2,rilee rossouw,south africa,batsman,left-handed-bat,5,767,34.86,159.8,3,69,39,2,473,23.65,153.58,2,45,25
3,srikar bharat,india,wk-batsman,right-handed-bat,0,0,0.0,0.0,0,0,0,2,199,28.43,122.09,1,12,8
4,n jagadeesan,india,wk-batsman,right-handed-bat,0,0,0.0,0.0,0,0,0,1,162,18.0,110.21,0,21,2
5,mandeep singh,india,batsman,right-handed-bat,1,87,43.5,119.18,1,11,1,16,1706,20.8,122.92,6,176,38
6,litton das,bangladesh,wk-batsman,right-handed-bat,3,2021,22.46,124.84,11,210,58,0,4,4.0,100.0,0,1,0
7,nicholas pooran,west indies,wk-batsman,left-handed-bat,10,2275,26.15,136.4,13,152,149,18,1769,32.16,162.3,9,112,127
8,vishnu vinod,india,wk-batsman,right-handed-bat,0,0,0.0,0.0,0,0,0,0,56,9.33,98.25,0,3,3
9,harpreet singh bhatia,india,batsman,left-handed-bat,0,0,0.0,0.0,0,0,0,1,123,17.57,103.37,0,10,3


In [18]:
import numpy as np

df = batsman_df.copy()

df['ipl_matches'] = (df['ipl_runs'] / df['ipl_avg']).replace([np.inf, -np.inf], 0).fillna(0)
df['t20_matches'] = (df['t20_runs'] / df['t20_avg']).replace([np.inf, -np.inf], 0).fillna(0)

# IPL Level
df['ipl_boundary_runs'] = df['ipl_4s']*4 + df['ipl_6s']*6
df['ipl_boundary_percent'] = df['ipl_boundary_runs'] / df['ipl_runs']
df['ipl_50_rate'] = df['ipl_50'] / df['ipl_matches']
df['ipl_exp_score'] = df['ipl_runs'] / df['ipl_matches']

# T20 Level
df['t20_boundary_runs'] = df['t20_4s']*4 + df['t20_6s']*6
df['t20_boundary_percent'] = df['t20_boundary_runs'] / df['t20_runs']
df['t20_50_rate'] = df['t20_50'] / df['t20_matches']
df['t20_exp_score'] = df['t20_runs'] / df['t20_matches']


def classify_batting_style(row):
    if row['ipl_avg'] >= 30 and row['ipl_sr'] < 135 and row['ipl_50_rate'] >= 0.2:
        return 'anchor'
    elif row['t20_avg'] >= 30 and row['t20_sr'] < 135 and row['t20_50_rate'] >= 0.2:
        return 'anchor'
    elif row['ipl_sr'] >= 140 and row['ipl_boundary_percent'] >= 0.6:
        return 'aggressive'
    elif row['t20_sr'] >= 140 and row['t20_boundary_percent'] >= 0.6:
        return 'aggressive'
    else:
        return 'balanced'

df['position'] = df.apply(classify_batting_style, axis=1)

df['is_anchor'] = (df['position'] == 'anchor').astype(int)
df['is_aggressive'] = (df['position'] == 'aggressive').astype(int)
df['is_balanced'] = (df['position'] == 'balanced').astype(int)

df = df.drop(columns=['position'])

In [19]:
batting_roles = {
    "ajinkya rahane": "Top-order",
    "philip salt": "Top-order",
    "rilee rossouw": "Top-order",
    "srikar bharat": "Middle-order",
    "n jagadeesan": "Top-order",
    "mandeep singh": "Middle-order",
    "litton das": "Top-order",
    "nicholas pooran": "Top-order",
    "vishnu vinod": "Top-order",
    "harpreet singh bhatia": "Middle-order",
    "donovan ferreira": "Finisher",
    "harry brook": "Middle-order",
    "heinrich klaasen": "Finisher",
    "anmolpreet singh": "Top-order",
    "mayank agarawal": "Top-order",
    "sameer rizvi": "Middle-order",
    "ricky bhui": "Top-order",
    "shai hope": "Top-order",
    "kumar kushagra": "Middle-order",
    "tristan stubbs": "Finisher",
    "angkrish raghuvanshi": "Top-order",
    "naman dhir": "Middle-order",
    "shubham dubey": "Middle-order",
    "rovman powell": "Finisher",
    "tom kohler-cadmore": "Top-order",
    "saurav chauhan": "Middle-order",
    "travis head": "Top-order",
    "devon conway": "Top-order",
    "rahul tripathi": "Top-order",
    "kl rahul": "Top-order",
    "jake fraser-mcgurk": "Top-order",
    "faf du plessis": "Top-order",
    "karun nair": "Middle-order",
    "jos buttler": "Top-order",
    "anuj rawat": "Top-order",
    "quinton de kock": "Top-order",
    "rahmanullah gurbaz": "Top-order",
    "rishabh pant": "Middle-order",
    "david miller": "Finisher",
    "aiden markram": "Middle-order",
    "matthew breetzke": "Top-order",
    "ryan rickelton": "Top-order",
    "josh inglis": "Middle-order",
    "nitish rana": "Middle-order",
    "jitesh sharma": "Finisher",
    "tim david": "Finisher",
    "devdutt padikkal": "Top-order",
    "ishan kishan": "Top-order",
    "abhinav manohar": "Middle-order",
    "sachin baby": "Middle-order",
    "atharva taide": "Top-order"
}

df['batting_position'] = df['name'].map(batting_roles)

In [20]:
df

Unnamed: 0,name,country,role,bat_style,t20_no,t20_runs,t20_avg,t20_sr,t20_50,t20_4s,...,ipl_50_rate,ipl_exp_score,t20_boundary_runs,t20_boundary_percent,t20_50_rate,t20_exp_score,is_anchor,is_aggressive,is_balanced,batting_position
0,ajinkya rahane,india,batsman,right-handed-bat,2,375,20.83,113.3,1,32,...,0.194787,30.14,164,0.437333,0.055547,20.83,0,0,1,Top-order
1,philip salt,england,wk-batsman,right-handed-bat,5,1193,34.09,164.33,5,116,...,0.315804,34.37,830,0.695725,0.142875,34.09,0,1,0,Top-order
2,rilee rossouw,south africa,batsman,left-handed-bat,5,767,34.86,159.8,3,69,...,0.1,23.65,510,0.664928,0.136349,34.86,0,1,0,Top-order
3,srikar bharat,india,wk-batsman,right-handed-bat,0,0,0.0,0.0,0,0,...,0.142864,28.43,0,,,,0,0,1,Middle-order
4,n jagadeesan,india,wk-batsman,right-handed-bat,0,0,0.0,0.0,0,0,...,0.0,18.0,0,,,,0,0,1,Top-order
5,mandeep singh,india,batsman,right-handed-bat,1,87,43.5,119.18,1,11,...,0.073154,20.8,50,0.574713,0.5,43.5,1,0,0,Middle-order
6,litton das,bangladesh,wk-batsman,right-handed-bat,3,2021,22.46,124.84,11,210,...,0.0,4.0,1188,0.587828,0.122246,22.46,0,0,1,Top-order
7,nicholas pooran,west indies,wk-batsman,left-handed-bat,10,2275,26.15,136.4,13,152,...,0.163618,32.16,1502,0.66022,0.149429,26.15,0,1,0,Top-order
8,vishnu vinod,india,wk-batsman,right-handed-bat,0,0,0.0,0.0,0,0,...,0.0,9.33,0,,,,0,0,1,Top-order
9,harpreet singh bhatia,india,batsman,left-handed-bat,0,0,0.0,0.0,0,0,...,0.0,17.57,0,,,,0,0,1,Middle-order
