In [20]:
import pandas as pd 
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import lazypredict

In [21]:
data = pd.read_csv('data/final_score_data.csv')

In [22]:
data.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'total_runs', 'player_dismissed', 'Cumsum_Total', 'wickets_lost',
       'total_wickets', 'Last_5overs_runs', 'Last_5overs_wickets',
       'Total_Score', 'id', 'season', 'city', 'venue'],
      dtype='object')

In [23]:
data.head(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,total_runs,player_dismissed,Cumsum_Total,wickets_lost,total_wickets,Last_5overs_runs,Last_5overs_wickets,Total_Score,id,season,city,venue
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,0,,0,0,0,0.0,0.0,207,1,2017,Hyderabad,"Rajiv Gandhi International Stadium, Uppal"
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,0,,0,0,0,0.0,0.0,207,1,2017,Hyderabad,"Rajiv Gandhi International Stadium, Uppal"
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,4,,4,0,0,4.0,0.0,207,1,2017,Hyderabad,"Rajiv Gandhi International Stadium, Uppal"
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,0,,4,0,0,4.0,0.0,207,1,2017,Hyderabad,"Rajiv Gandhi International Stadium, Uppal"
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,2,,6,0,0,6.0,0.0,207,1,2017,Hyderabad,"Rajiv Gandhi International Stadium, Uppal"


In [24]:
cols_remove = ['total_runs', 'player_dismissed', 'Cumsum_Total', 'wickets_lost','id']
data.drop(labels = cols_remove,axis=1,inplace=True)

In [25]:
data['batting_team'].unique()

array(['Sunrisers Hyderabad', 'Royal Challengers Bangalore',
       'Mumbai Indians', 'Rising Pune Supergiant', 'Gujarat Lions',
       'Kolkata Knight Riders', 'Kings XI Punjab', 'Delhi Daredevils',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants',
       'Delhi Capitals'], dtype=object)

In [26]:
data.groupby(by='batting_team')['season'].unique()

batting_team
Chennai Super Kings            [2008, 2009, 2010, 2011, 2012, 2013, 2014, 201...
Deccan Chargers                                   [2008, 2009, 2010, 2011, 2012]
Delhi Capitals                                                            [2019]
Delhi Daredevils               [2017, 2008, 2009, 2010, 2011, 2012, 2013, 201...
Gujarat Lions                                                       [2017, 2016]
Kings XI Punjab                [2017, 2008, 2009, 2010, 2011, 2012, 2013, 201...
Kochi Tuskers Kerala                                                      [2011]
Kolkata Knight Riders          [2017, 2008, 2009, 2010, 2011, 2012, 2013, 201...
Mumbai Indians                 [2017, 2008, 2009, 2010, 2011, 2012, 2013, 201...
Pune Warriors                                                 [2011, 2012, 2013]
Rajasthan Royals               [2008, 2009, 2010, 2011, 2012, 2013, 2014, 201...
Rising Pune Supergiant                                                    [2017]
Rising Pune Sup

In [27]:
## dropping the below Teams
drop_teams = ['Pune Warriors','Kochi Tuskers Kerala']
drop_teams_rowId = data[data['batting_team'].isin(drop_teams) | data['bowling_team'].isin(drop_teams)].index

data.drop(labels=drop_teams_rowId,axis=0,inplace=True)

In [28]:
## Replacing some teams with other teams and also replacing with Shortnames
rename_map = {'Rising Pune Supergiant':'CSK','Chennai Super Kings':'CSK','Rising Pune Supergiants':'CSK',
              'Deccan Chargers':'SRH', 'Sunrisers Hyderabad':'SRH',
              'Delhi Daredevils' : 'DC','Delhi Capitals':'DC',
              'Gujarat Lions' : 'RR', 'Rajasthan Royals':'RR',
              'Kings XI Punjab' : 'KXIP',
              'Royal Challengers Bangalore' : 'RCB',
              'Mumbai Indians' : 'MI',
              'Kolkata Knight Riders' : 'KKR'}
              
data['batting_team'] = data['batting_team'].map(rename_map).fillna(data['batting_team'])
data['bowling_team'] = data['bowling_team'].map(rename_map).fillna(data['bowling_team'])

In [29]:
## Location and venues
print(data['venue'].nunique())
data['venue'].unique()

40


array(['Rajiv Gandhi International Stadium, Uppal',
       'Maharashtra Cricket Association Stadium',
       'Saurashtra Cricket Association Stadium', 'Holkar Cricket Stadium',
       'M Chinnaswamy Stadium', 'Wankhede Stadium', 'Eden Gardens',
       'Feroz Shah Kotla',
       'Punjab Cricket Association IS Bindra Stadium, Mohali',
       'Green Park', 'Punjab Cricket Association Stadium, Mohali',
       'Sawai Mansingh Stadium', 'MA Chidambaram Stadium, Chepauk',
       'Dr DY Patil Sports Academy', 'Newlands', "St George's Park",
       'Kingsmead', 'SuperSport Park', 'Buffalo Park',
       'New Wanderers Stadium', 'De Beers Diamond Oval',
       'OUTsurance Oval', 'Brabourne Stadium',
       'Sardar Patel Stadium, Motera', 'Barabati Stadium',
       'Vidarbha Cricket Association Stadium, Jamtha',
       'Himachal Pradesh Cricket Association Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Subrata Roy Sahara Stadium',
       'Shaheed Veer Narayan Singh

In [34]:
## Mapping the Stadium names(venue), same stadium name is written in different ways
rename_stadium = {'Rajiv Gandhi International Stadium, Uppal' : 'RGIS',
                    'Maharashtra Cricket Association Stadium':'MCAS',
                    'Saurashtra Cricket Association Stadium' : 'SCAS' , 
                    'Holkar Cricket Stadium': 'HCS',
                    'M Chinnaswamy Stadium' : 'MCS',
                    'Wankhede Stadium' : 'WS', 
                    'Eden Gardens':'Eden',
                    'Feroz Shah Kotla' : 'FSK',
                    'Punjab Cricket Association IS Bindra Stadium, Mohali' : 'PCAS',
                    'Green Park' : 'GreenPark', 
                    'Punjab Cricket Association Stadium, Mohali' : 'PCAS',
                    'Sawai Mansingh Stadium' : 'SMS', 
                    'MA Chidambaram Stadium, Chepauk' : 'MACS',
                    'Dr DY Patil Sports Academy' : 'DYPSA', 
                    'Newlands' : 'Newlands', 
                    "St George's Park" : 'GeorgePark',
                    'Kingsmead' : 'Kingsmead', 
                    'SuperSport Park' : 'SuperSport', 
                    'Buffalo Park' : 'BuffaloPark',
                    'New Wanderers Stadium' : 'NewWanderers', 
                    'De Beers Diamond Oval' : 'DeBeersDiamond',
                    'OUTsurance Oval' : 'OUTsurance', 
                    'Brabourne Stadium' : 'Brabourne',
                    'Sardar Patel Stadium, Motera' : 'SPS',
                    'Barabati Stadium' : 'Barabati',
                    'Vidarbha Cricket Association Stadium, Jamtha' : 'VCAS',
                    'Himachal Pradesh Cricket Association Stadium' : 'HPCAS',
                    'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium' : 'YSRACACS',
                    'Subrata Roy Sahara Stadium' : 'SRSS',
                    'Shaheed Veer Narayan Singh International Stadium' : 'SVNSIS',
                    'JSCA International Stadium Complex' : 'JSCAISC', 
                    'Sheikh Zayed Stadium' : 'SZS',
                    'Sharjah Cricket Stadium' : 'Sharjah', 
                    'Dubai International Cricket Stadium' : 'DICS',
                    'M. A. Chidambaram Stadium' : 'MACS', 
                    'Feroz Shah Kotla Ground' : 'FSKG',
                    'M. Chinnaswamy Stadium' : 'MCS', 
                    'Rajiv Gandhi Intl. Cricket Stadium' : 'RGIS',
                    'IS Bindra Stadium' : 'PCAS', 
                    'ACA-VDCA Stadium' : 'YSRACACS' }

data['venue1'] = data['venue'].map(rename_stadium).fillna(data['venue'])
print(data['venue1'].nunique())

34


city
Abu Dhabi                          [SZS]
Ahmedabad                          [SPS]
Bangalore                          [MCS]
Bengaluru                          [MCS]
Bloemfontein                [OUTsurance]
Cape Town                     [Newlands]
Centurion                   [SuperSport]
Chandigarh                        [PCAS]
Chennai                           [MACS]
Cuttack                       [Barabati]
Delhi                        [FSK, FSKG]
Dharamsala                       [HPCAS]
Durban                       [Kingsmead]
East London                [BuffaloPark]
Hyderabad                         [RGIS]
Indore                             [HCS]
Jaipur                             [SMS]
Johannesburg              [NewWanderers]
Kanpur                       [GreenPark]
Kimberley               [DeBeersDiamond]
Kolkata                           [Eden]
Mohali                            [PCAS]
Mumbai            [WS, DYPSA, Brabourne]
Nagpur                            [VCAS]
Port Elizab