In [234]:
import numpy as np # for Linear algebra
import pandas as pd # for data manipulation/CSV I/O
pd.options.mode.chained_assignment = None    # To avoid the SettingWithCopyWarning

In [235]:
deliveries=pd.read_csv("dataset/deliveries.csv")
matches =pd.read_csv("dataset/matches.csv")

In [236]:
deliveries.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


In [237]:
print(deliveries.columns)

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')


In [238]:
column_heads = list(set(list(deliveries.columns)).difference(set(['inning', 'non_striker', 'is_super_over', 'legbye_runs', 'bye_runs'])))

column_heads

['extra_runs',
 'batsman_runs',
 'ball',
 'noball_runs',
 'fielder',
 'penalty_runs',
 'over',
 'batting_team',
 'dismissal_kind',
 'player_dismissed',
 'bowler',
 'batsman',
 'bowling_team',
 'total_runs',
 'match_id',
 'wide_runs']

In [239]:
deliveries_bowlers = deliveries[column_heads]
deliveries_bowlers.head()

Unnamed: 0,extra_runs,batsman_runs,ball,noball_runs,fielder,penalty_runs,over,batting_team,dismissal_kind,player_dismissed,bowler,batsman,bowling_team,total_runs,match_id,wide_runs
0,0,0,1,0,,0,1,Sunrisers Hyderabad,,,TS Mills,DA Warner,Royal Challengers Bangalore,0,1,0
1,0,0,2,0,,0,1,Sunrisers Hyderabad,,,TS Mills,DA Warner,Royal Challengers Bangalore,0,1,0
2,0,4,3,0,,0,1,Sunrisers Hyderabad,,,TS Mills,DA Warner,Royal Challengers Bangalore,4,1,0
3,0,0,4,0,,0,1,Sunrisers Hyderabad,,,TS Mills,DA Warner,Royal Challengers Bangalore,0,1,0
4,2,0,5,0,,0,1,Sunrisers Hyderabad,,,TS Mills,DA Warner,Royal Challengers Bangalore,2,1,2


In [240]:
deliveries_bowlers['bowler'].nunique()

378

In [241]:
deliveries_bowlers.fillna("0", inplace=True)
deliveries_bowlers["Fours"] = np.where(deliveries_bowlers["batsman_runs"] == 4, 1, 0)
deliveries_bowlers["Sixes"] = np.where(deliveries_bowlers["batsman_runs"] == 6, 1, 0)

deliveries_bowlers['dismissal_kind'].unique()

array(['0', 'caught', 'bowled', 'run out', 'lbw', 'caught and bowled',
       'stumped', 'retired hurt', 'hit wicket', 'obstructing the field'],
      dtype=object)

In [242]:
dismissal_kind = ['caught', 'bowled', 'lbw', 'stumped', 'caught and bowled', 'hit wicket']
deliveries_bowlers["Wickets"] = np.where(deliveries_bowlers['dismissal_kind'].isin(dismissal_kind), 1, 0)

deliveries_bowlers.sample()

Unnamed: 0,extra_runs,batsman_runs,ball,noball_runs,fielder,penalty_runs,over,batting_team,dismissal_kind,player_dismissed,bowler,batsman,bowling_team,total_runs,match_id,wide_runs,Fours,Sixes,Wickets
21466,0,1,1,0,0,0,17,Kolkata Knight Riders,0,0,WPUJC Vaas,DJ Hussey,Deccan Chargers,1,92,0,0,0,0


In [243]:
bowlers_info = deliveries_bowlers.groupby(['bowler', 'match_id'], as_index=False).agg(
    {'over': lambda x: x.nunique(), 'total_runs': 'sum', 'Fours': 'sum', 'Sixes': 'sum', 'Wickets': 'sum'}).reset_index()

recent_form = bowlers_info.copy()
bowlers_info.sample()

Unnamed: 0,index,bowler,match_id,over,total_runs,Fours,Sixes,Wickets
894,894,Ankit Sharma,596,3,26,0,2,0


In [244]:
bowlers_info['3WI'] = np.where(bowlers_info['Wickets']>2, 1, 0)
career_stats = bowlers_info.groupby('bowler', as_index=False).agg(
    {'match_id': 'count', 'total_runs': 'sum', 'over': 'sum', 'Fours': 'sum', 'Sixes': 'sum', 
     'Wickets': 'sum', '3WI': 'sum'}).reset_index()

most_wickets_in_match = bowlers_info.groupby(['bowler']).agg({'Wickets': 'max'}).reset_index()
bowlers_info = pd.merge(bowlers_info, most_wickets_in_match, on=['bowler', 'Wickets'])

least_runs_for_most_wickets = bowlers_info.groupby(['bowler']).agg({'total_runs': 'min'}).reset_index()
bowlers_info = pd.merge(bowlers_info, least_runs_for_most_wickets, on=['bowler', 'total_runs'])

bowlers_info.head()

Unnamed: 0,index,bowler,match_id,over,total_runs,Fours,Sixes,Wickets,3WI
0,8,A Ashish Reddy,376,4,25,1,1,3,1
1,21,A Chandila,365,4,13,1,0,4,1
2,35,A Choudhary,42,4,17,1,0,2,0
3,37,A Dananjaya,7902,4,48,2,4,0,0
4,39,A Flintoff,122,3,11,1,0,1,0


In [245]:
bowlers_info['BBI'] = ( bowlers_info['Wickets'].map(str)) + '/' + (bowlers_info['total_runs'].map(str) )

bowlers_info.head()

Unnamed: 0,index,bowler,match_id,over,total_runs,Fours,Sixes,Wickets,3WI,BBI
0,8,A Ashish Reddy,376,4,25,1,1,3,1,3/25
1,21,A Chandila,365,4,13,1,0,4,1,4/13
2,35,A Choudhary,42,4,17,1,0,2,0,2/17
3,37,A Dananjaya,7902,4,48,2,4,0,0,0/48
4,39,A Flintoff,122,3,11,1,0,1,0,1/11


In [246]:
bowlers_info['bowler'].count()

384

In [247]:
recent_form.head(30)
recent_form = recent_form.groupby('bowler').tail(4)
recent_form.drop(['match_id','Fours','Sixes'],axis=1,inplace=True)
recent_form = recent_form.groupby(['bowler']).agg({'over':'sum' , 'total_runs':'sum', 'Wickets':'sum'}).reset_index()
recent_form['Form_Average'] = np.where(recent_form['total_runs']/recent_form['Wickets'] == np.inf ,
                                  recent_form['total_runs'],
                                  recent_form['total_runs']/recent_form['Wickets'])
recent_form['Form_Econ'] = np.where(recent_form['total_runs']/recent_form['over'] == np.inf,
                                   0,recent_form['total_runs']/recent_form['over'])

recent_form.rename(columns={'bowler':'Bowler','total_runs':'Recent_runs', 'Wickets':'Recent_wickets'},inplace=True)
recent_form.drop(['over'],axis=1,inplace=True)
recent_form.head()

Unnamed: 0,Bowler,Recent_runs,Recent_wickets,Form_Average,Form_Econ
0,A Ashish Reddy,75,3,25.0,9.375
1,A Chandila,99,3,33.0,7.615385
2,A Choudhary,89,4,22.25,6.846154
3,A Dananjaya,48,0,48.0,12.0
4,A Flintoff,106,2,53.0,9.636364


In [248]:
bowlers_info[bowlers_info.duplicated(['bowler'], keep='first')]

Unnamed: 0,index,bowler,match_id,over,total_runs,Fours,Sixes,Wickets,3WI,BBI
161,3410,K Rabada,41,4,28,2,1,2,0,2/28
229,4679,N Rana,7908,2,11,1,0,2,0,2/11
248,5002,PJ Cummins,55,4,22,1,0,2,0,2/22
287,6195,Rashid Khan,7918,4,19,2,0,3,1,3/19
288,6204,Rashid Khan,7952,4,19,2,0,3,1,3/19
316,6605,SE Bond,230,4,24,2,0,2,0,2/24


In [249]:
bowlers_info[bowlers_info.duplicated(['bowler'], keep='last')]

Unnamed: 0,index,bowler,match_id,over,total_runs,Fours,Sixes,Wickets,3WI,BBI
160,3407,K Rabada,31,4,28,3,0,2,0,2/28
228,4678,N Rana,7896,1,11,1,1,2,0,2/11
247,4996,PJ Cummins,24,4,22,1,0,2,0,2/22
286,6176,Rashid Khan,6,4,19,1,0,3,1,3/19
287,6195,Rashid Khan,7918,4,19,2,0,3,1,3/19
315,6601,SE Bond,196,4,24,2,1,2,0,2/24


In [250]:
bowlers_info = bowlers_info.drop_duplicates('bowler', keep='last')
career_stats = pd.merge(career_stats, bowlers_info[['bowler', 'BBI']], on='bowler')

career_stats.head()

Unnamed: 0,index,bowler,match_id,total_runs,over,Fours,Sixes,Wickets,3WI,BBI
0,0,A Ashish Reddy,20,400,45,26,20,18,1,3/25
1,1,A Chandila,12,245,39,17,10,11,1,4/13
2,2,A Choudhary,5,144,17,13,6,5,0,2/17
3,3,A Dananjaya,1,48,4,2,4,0,0,0/48
4,4,A Flintoff,3,106,11,7,7,2,0,1/11


In [251]:
career_stats = career_stats.rename(columns={'bowler': 'Bowler', 'total_runs': 'Runs', 'match_id': 'Matches', 'over': 'Overs'})
career_stats['Economy'] = np.around(career_stats['Runs'] / career_stats['Overs'], 2)
career_stats['Average'] = np.where(career_stats['Wickets'] == 0 , career_stats['Runs'], 
                                   np.around(career_stats['Runs'] / career_stats['Wickets'], 2) )
career_stats['SR'] = np.where(career_stats['Wickets'] == 0 , 0, 
                              np.around((career_stats['Overs']*6) / career_stats['Wickets'], 2))

# Reordering columns for presentation


In [255]:
career_stats = pd.merge(career_stats, recent_form, on='Bowler')

sequence = ['Bowler', 'Matches', 'Wickets', 'Runs', 'Overs' , 'Average' ,'Economy', 'SR', 'BBI',  '3WI', 'Fours', 'Sixes',
           'Recent_runs','Recent_wickets','Form_Average','Form_Econ']

career_stats = career_stats[sequence]
career_stats.head()

Unnamed: 0,Bowler,Matches,Wickets,Runs,Overs,Average,Economy,SR,BBI,3WI,Fours,Sixes,Recent_runs,Recent_wickets,Form_Average,Form_Econ
0,A Ashish Reddy,20,18,400,45,22.22,8.89,15.0,3/25,1,26,20,75,3,25.0,9.375
1,A Chandila,12,11,245,39,22.27,6.28,21.27,4/13,1,17,10,99,3,33.0,7.615385
2,A Choudhary,5,5,144,17,28.8,8.47,20.4,2/17,0,13,6,89,4,22.25,6.846154
3,A Dananjaya,1,0,48,4,48.0,12.0,0.0,0/48,0,2,4,48,0,48.0,12.0
4,A Flintoff,3,2,106,11,53.0,9.64,33.0,1/11,0,7,7,106,2,53.0,9.636364


In [None]:
career_stats.sort_values(by='Wickets', ascending=False).reset_index(drop='T')[:10]


In [253]:
export_csv = career_stats.to_csv ('dataset/bowlers.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path