This file was used to create batter_runs.csv from ball_by_ball_ipl.csv

In [7]:
import pandas as pd

data = pd.read_csv('ball_by_ball_it20.csv')

In [8]:
# Add column to data which keeps track of how many wickets have occurred in the innings so far
data['wickets'] = data.groupby(['Match ID', 'Innings'])['Wicket'].cumsum()
# Offset by 1 so that the wicket is only counted after the ball has been bowled
data['wickets'] = data['wickets'] - data['Wicket']

In [9]:
# Loop through each row in data

# Get all the batters
batters = data['Batter'].unique()

selected_cols = ['Match ID', 'Date', 'Innings', 'Over', 'wickets', 'Batter Runs', 'Extra Runs', 'Wicket', 'Bowler']

# Filter with selected cols
df_selected = data[selected_cols]

# Split Batter Runs into 7 columns, one for each run
df_selected = pd.concat([df_selected, pd.DataFrame(df_selected['Batter Runs'].apply(lambda x: [1 if i == x else 0 for i in range(7)]).tolist(), columns=[str(i) for i in range(7)])], axis=1)
df_selected.drop('Batter Runs', axis=1, inplace=True)
df_selected.head()

df_selected = df_selected.groupby(['Match ID', 'Date', 'Innings', 'Over', 'wickets', 'Bowler'], as_index=False).sum()

df_selected[df_selected['Match ID'] == data.iloc[0]['Match ID']].head()

Unnamed: 0,Match ID,Date,Innings,Over,wickets,Bowler,Extra Runs,Wicket,0,1,2,3,4,5,6
76178,1339605,2023-03-26,1,1,0,WD Parnell,0,1,1,2,0,0,0,0,0
76179,1339605,2023-03-26,1,1,1,WD Parnell,0,0,1,0,0,0,2,0,0
76180,1339605,2023-03-26,1,2,1,AK Markram,0,0,5,1,0,0,0,0,0
76181,1339605,2023-03-26,1,3,1,WD Parnell,1,0,1,2,1,0,3,0,0
76182,1339605,2023-03-26,1,4,1,K Rabada,0,0,4,1,0,0,1,0,0


In [10]:
# Give bowlers ids
bowlers = data['Bowler'].unique()
bowler_id = {bowler: i+1 for i, bowler in enumerate(bowlers)}
df_selected['Bowler ID'] = df_selected['Bowler'].apply(lambda x: bowler_id[x])

In [11]:
min(bowler_id.values())

1

In [12]:
# Convert col names to lowercase
df_selected.columns = [col.lower() for col in df_selected.columns]

# Save to csv
df_selected.to_csv('bowler_runs_it20.csv', index=False)

In [13]:
bowler_runs_ipl = pd.read_csv('bowler_runs_ipl.csv')
bowler_runs_it20 = pd.read_csv('bowler_runs_it20.csv')

In [14]:
# Add column ipl-it20 to each df
bowler_runs_ipl['ipl-it20'] = 'ipl'
bowler_runs_it20['ipl-it20'] = 'it20'

In [15]:
# Concatenate the two dataframes
bowler_runs = pd.concat([bowler_runs_ipl, bowler_runs_it20], ignore_index=True)
bowler_runs

Unnamed: 0,match id,date,innings,over,wickets,bowler,extra runs,wicket,0,1,2,3,4,5,6,bowler id,ipl-it20
0,335982,2008-04-18,1,1,0,P Kumar,3,0,7,0,0,0,0,0,0,174,ipl
1,335982,2008-04-18,1,2,0,Z Khan,0,0,2,0,0,0,3,0,1,69,ipl
2,335982,2008-04-18,1,3,0,P Kumar,1,0,4,1,0,0,1,0,0,174,ipl
3,335982,2008-04-18,1,4,0,AA Noffke,6,0,3,1,0,0,1,0,2,457,ipl
4,335982,2008-04-18,1,5,0,P Kumar,0,0,2,2,0,0,2,0,0,174,ipl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136478,1393328,2023-08-22,2,13,2,E Rukiriza,0,0,3,1,0,0,0,0,0,1134,it20
136479,1393328,2023-08-22,2,14,2,E Kubwimana,0,1,3,2,0,0,0,0,0,1918,it20
136480,1393328,2023-08-22,2,14,3,E Kubwimana,0,0,1,0,0,0,0,0,0,1918,it20
136481,1393328,2023-08-22,2,15,3,K Irakoze,0,0,2,2,1,0,1,0,0,285,it20


In [16]:
# Drop the batter id column
bowler_runs.drop('bowler id', axis=1, inplace=True)

In [27]:
# Give batters ids
bowlers = bowler_runs['bowler'].unique()
bowler_id = {bowler: i+1 for i, bowler in enumerate(bowlers)}
bowler_runs['bowler id'] = bowler_runs['bowler'].apply(lambda x: bowlers[x])

In [28]:
batter_runs

Unnamed: 0,match id,date,innings,over,wickets,batter,extra runs,wicket,0,1,2,3,4,5,6,ipl-it20,batter id
0,335982,2008-04-18,1,1,0,BB McCullum,2,0,6,0,0,0,0,0,0,ipl,1
1,335982,2008-04-18,1,1,0,SC Ganguly,1,0,1,0,0,0,0,0,0,ipl,2
2,335982,2008-04-18,1,2,0,BB McCullum,0,0,2,0,0,0,3,0,1,ipl,1
3,335982,2008-04-18,1,3,0,BB McCullum,0,0,0,1,0,0,1,0,0,ipl,1
4,335982,2008-04-18,1,3,0,SC Ganguly,1,0,4,0,0,0,0,0,0,ipl,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237288,1393328,2023-08-22,2,14,3,S Thakor,0,0,1,0,0,0,0,0,0,it20,2312
237289,1393328,2023-08-22,2,15,3,AP Rajeevan,0,0,2,1,1,0,0,0,0,it20,2754
237290,1393328,2023-08-22,2,15,3,S Thakor,0,0,0,1,0,0,1,0,0,it20,2312
237291,1393328,2023-08-22,2,16,3,AP Rajeevan,0,0,1,0,0,0,3,0,1,it20,2754


In [29]:
# Save
batter_runs.to_csv('batter_runs.csv', index=False)