In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import tensorflow as tf

In [3]:
import glob

In [4]:
all_files = glob.glob('./ipl_csv2/*.csv')
df_from_each_file = (pd.read_csv(f) for f in all_files)
df = pd.concat(df_from_each_file, ignore_index=True)

In [5]:
df.dtypes

match_id                    int64
season                     object
start_date                 object
venue                      object
innings                     int64
ball                      float64
batting_team               object
bowling_team               object
striker                    object
non_striker                object
bowler                     object
runs_off_bat                int64
extras                      int64
wides                     float64
noballs                   float64
byes                      float64
legbyes                   float64
penalty                   float64
wicket_type                object
player_dismissed           object
other_wicket_type         float64
other_player_dismissed    float64
dtype: object

In [6]:
df['season'].unique()

array([2017, 2018, 2019, '2020/21', 2021, '2007/08', 2009, '2009/10',
       2011, 2012, 2013, 2014, 2015, 2016], dtype=object)

In [7]:
df.drop(['other_player_dismissed','other_wicket_type', 'penalty','player_dismissed','start_date'], axis=1, inplace=True)

In [8]:
df.dtypes

match_id          int64
season           object
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
runs_off_bat      int64
extras            int64
wides           float64
noballs         float64
byes            float64
legbyes         float64
wicket_type      object
dtype: object

In [9]:
df['total_runs'] = df.iloc[:,-7:-2].sum(axis=1)

In [10]:
df.drop(['runs_off_bat','wides','noballs','extras','byes','legbyes'],axis=1, inplace=True)

In [11]:
df.dtypes

match_id          int64
season           object
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
wicket_type      object
total_runs      float64
dtype: object

In [12]:
df['year'] = df['season'].astype(str).str.slice(0,4).astype(int)

In [13]:
df['year'].unique()

array([2017, 2018, 2019, 2020, 2021, 2007, 2009, 2011, 2012, 2013, 2014,
       2015, 2016], dtype=int64)

In [14]:
df.drop('season',inplace=True,axis=1)

In [15]:
df.dtypes

match_id          int64
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
wicket_type      object
total_runs      float64
year              int32
dtype: object

In [16]:
df['innings'].unique()

array([1, 2, 3, 4, 5, 6], dtype=int64)

In [17]:
df['batting_team'].unique()

array(['Sunrisers Hyderabad', 'Royal Challengers Bangalore',
       'Mumbai Indians', 'Rising Pune Supergiant', 'Gujarat Lions',
       'Kolkata Knight Riders', 'Kings XI Punjab', 'Delhi Daredevils',
       'Chennai Super Kings', 'Rajasthan Royals', 'Delhi Capitals',
       'Deccan Chargers', 'Kochi Tuskers Kerala', 'Pune Warriors',
       'Rising Pune Supergiants'], dtype=object)

In [18]:
df['batting_team'] = df['batting_team'].replace(['Delhi Daredevils'],'Delhi Capitals')
df['bowling_team'] = df['bowling_team'].replace(['Delhi Daredevils'],'Delhi Capitals')
df['batting_team'] = df['batting_team'].replace(['Kings XI Punjab'],'Punjab Kings')
df['bowling_team'] = df['bowling_team'].replace(['Kings XI Punjab'],'Punjab Kings')

In [19]:
current_teams = ['Sunrisers Hyderabad', 'Royal Challengers Bangalore', 'Mumbai Indians', 'Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals', 'Delhi Capitals', 'Punjab Kings']

In [20]:
df = df[(df['batting_team'].isin(current_teams)) &(df['bowling_team'].isin(current_teams))]

In [21]:
data1 = df.groupby(['match_id','innings'])['striker','non_striker'].apply(lambda x: list(np.unique(x)))
data1 = data1.to_frame('batsmen')
#data1['batsmen'] = str(data1['batsmen'])[1:-1]

In [22]:
data2 = df.groupby(['match_id','innings'])['bowler'].apply(lambda x: list(np.unique(x)))
data2 = data2.to_frame('bowlers')
#data2['bowlers'] = str(data2['bowlers'])[1:-1]

In [23]:
len(data2)

1293

In [24]:
#current_players= pd.read_csv('./playersList.csv')

In [25]:
#current_players.head()

In [26]:
#current_players['Players'][0]

In [27]:
#df = df[(df['current_players'].isin(current_players))]

In [28]:
df = df[df['ball'] <=6.0]

#

In [29]:
len(data1)

1293

In [30]:
len(df)

47466

In [31]:
df.dtypes

match_id          int64
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
wicket_type      object
total_runs      float64
year              int32
dtype: object

In [32]:
#df.drop('venue', axis=1, inplace=True)

In [33]:
df.dtypes

match_id          int64
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
wicket_type      object
total_runs      float64
year              int32
dtype: object

In [34]:
df['innings'].unique()

array([1, 2, 3, 4, 5, 6], dtype=int64)

In [35]:
df['venue'].unique()

array(['Rajiv Gandhi International Stadium, Uppal',
       'M.Chinnaswamy Stadium', 'Wankhede Stadium',
       'Holkar Cricket Stadium', 'Eden Gardens', 'M Chinnaswamy Stadium',
       'Feroz Shah Kotla',
       'Punjab Cricket Association IS Bindra Stadium, Mohali',
       'Punjab Cricket Association IS Bindra Stadium',
       'Rajiv Gandhi International Stadium', 'MA Chidambaram Stadium',
       'Sawai Mansingh Stadium',
       'Maharashtra Cricket Association Stadium', 'Arun Jaitley Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Sheikh Zayed Stadium', 'Dubai International Cricket Stadium',
       'Sharjah Cricket Stadium',
       'MA Chidambaram Stadium, Chepauk, Chennai',
       'Wankhede Stadium, Mumbai',
       'Punjab Cricket Association Stadium, Mohali',
       'MA Chidambaram Stadium, Chepauk', 'Dr DY Patil Sports Academy',
       'Newlands', "St George's Park", 'Kingsmead', 'SuperSport Park',
       'Buffalo Park', 'New Wanderers Stadium', 'De

In [36]:
df['venue'] = df['venue'].replace(['MA Chidambaram Stadium, Chepauk, Chennai'],'MA Chidambaram Stadium')
df['venue'] = df['venue'].replace(['MA Chidambaram Stadium, Chepauk'],'MA Chidambaram Stadium')
df['venue'] = df['venue'].replace(['M.Chinnaswamy Stadium'],'M Chinnaswamy Stadium')

In [37]:
current_stadium = ['Wankhede Stadium, Mumbai', 'MA Chidambaram Stadium', 'M Chinnaswamy Stadium', 'Eden Gardens', 'Arun Jaitley Stadium', 'Narendra Modi']

In [38]:
df = df[df['venue'].isin(current_stadium)]

In [39]:
len(df)

15044

In [40]:
data = df.groupby(['match_id','innings'])['striker','non_striker'].apply(lambda x: list(np.unique(x)))

In [41]:
data.head(5)

match_id  innings
335982    1                      [BB McCullum, RT Ponting, SC Ganguly]
          2          [CL White, JH Kallis, MV Boucher, R Dravid, V ...
335989    1                [MEK Hussey, ML Hayden, PA Patel, SK Raina]
          2          [L Ronchi, RV Uthappa, SM Pollock, ST Jayasuriya]
335992    1          [JH Kallis, LRPL Taylor, R Dravid, S Chanderpaul]
dtype: object

In [42]:
df.dtypes

match_id          int64
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
wicket_type      object
total_runs      float64
year              int32
dtype: object

In [43]:
#df = pd.get_dummies(data=df, columns=['batting_team', 'bowling_team', 'venue'])

In [44]:
df.columns

Index(['match_id', 'venue', 'innings', 'ball', 'batting_team', 'bowling_team',
       'striker', 'non_striker', 'bowler', 'wicket_type', 'total_runs',
       'year'],
      dtype='object')

In [45]:
df.drop('wicket_type', axis=1, inplace=True)

In [46]:
df.dtypes

match_id          int64
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
total_runs      float64
year              int32
dtype: object

In [47]:
df['bowler'].unique()

array(['Z Khan', 'CH Morris', 'PJ Cummins', 'S Nadeem', 'B Stanlake',
       'YS Chahal', 'Iqbal Abdulla', 'TS Mills', 'TA Boult', 'UT Yadav',
       'CR Woakes', 'SP Narine', 'PP Chawla', 'Sandeep Sharma',
       'I Sharma', 'GJ Maxwell', 'VR Aaron', 'TG Southee',
       'Harbhajan Singh', 'MJ McClenaghan', 'S Badree', 'STR Binny',
       'S Aravind', 'B Kumar', 'A Nehra', 'BCJ Cutting', 'Rashid Khan',
       'Kuldeep Yadav', 'NM Coulter-Nile', 'K Rabada', 'CJ Anderson',
       'A Choudhary', 'SR Watson', 'T Natarajan', 'C de Grandhomme',
       'MG Johnson', 'KV Sharma', 'CJ Jordan', 'S Kaul', 'Bipul Sharma',
       'JJ Bumrah', 'SL Malinga', 'R Vinay Kumar', 'Washington Sundar',
       'DL Chahar', 'RA Jadeja', 'Imran Tahir', 'AD Russell', 'TK Curran',
       'K Khejroliya', 'AR Patel', 'R Ashwin', 'Mujeeb Ur Rahman',
       'AJ Tye', 'K Gowtham', 'DS Kulkarni', 'JD Unadkat', 'BA Stokes',
       'Shivam Mavi', 'AS Rajpoot', 'BB Sran', 'N Rana', 'Avesh Khan',
       'LE Plunkett', 'S

In [48]:
#df['total_runs'] = df['total_runs'].astype(np.int)

In [49]:
data3 = df.groupby(['match_id','innings'])['total_runs'].sum()

In [50]:
#data3.head(10)

In [51]:
#df.dtypes

In [52]:
#df[['match_id','innings', 'total_runs', 'ball']].head(40)

In [53]:
data3.head()

match_id  innings
335982    1          67.0
          2          28.0
335989    1          60.0
          2          54.0
335992    1          58.0
Name: total_runs, dtype: float64

In [54]:
len(data3)

409

In [55]:
df.dtypes

match_id          int64
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
total_runs      float64
year              int32
dtype: object

In [56]:
#data2 = data2.to_frame('bowlers')
#data3 = data3.to_frame('runs')

In [57]:
data4 = df.groupby(['match_id','innings'])

In [58]:
#df.drop(['striker','non_striker','bowler'],axis=1,inplace=True)

In [59]:
df.dtypes

match_id          int64
venue            object
innings           int64
ball            float64
batting_team     object
bowling_team     object
striker          object
non_striker      object
bowler           object
total_runs      float64
year              int32
dtype: object

In [60]:
df.drop('total_runs',axis=1,inplace=True)

In [61]:
#df.drop(['match_id','innings'],axis=1,inplace=True)

In [62]:
#data4 = df.groupby(['match_id','innings'])['year'].apply(lambda x:np.unique(x))

In [63]:
#data4.head()

In [64]:
data4 = df.groupby(['match_id','innings'])['batting_team'].apply(lambda x: np.unique(x)).str[0]

In [65]:
data4.head()

match_id  innings
335982    1                Kolkata Knight Riders
          2          Royal Challengers Bangalore
335989    1                  Chennai Super Kings
          2                       Mumbai Indians
335992    1          Royal Challengers Bangalore
Name: batting_team, dtype: object

In [66]:
data5 = df.groupby(['match_id','innings'])['bowling_team'].apply(lambda x: np.unique(x)).str[0]

In [67]:
df.drop(['batting_team','bowling_team'],axis=1,inplace=True)

In [68]:
df.dtypes

match_id         int64
venue           object
innings          int64
ball           float64
striker         object
non_striker     object
bowler          object
year             int32
dtype: object

In [69]:
data6 = df.groupby(['match_id','innings'])['venue'].apply(lambda x: np.unique(x)).str[0]

In [70]:
data7 = df.groupby(['match_id','innings'])['year'].apply(lambda x: np.unique(x)).str[0]

In [71]:
data3.head()

match_id  innings
335982    1          67.0
          2          28.0
335989    1          60.0
          2          54.0
335992    1          58.0
Name: total_runs, dtype: float64

In [72]:
concat_data = pd.concat([data1,data2,data3,data4,data5,data6,data7],axis=1)

In [73]:
concat_data.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,batsmen,bowlers,total_runs,batting_team,bowling_team,venue,year
match_id,innings,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
335982,1,"[BB McCullum, DJ Hussey, Mohammad Hafeez, RT P...","[AA Noffke, CL White, JH Kallis, P Kumar, SB J...",67.0,Kolkata Knight Riders,Royal Challengers Bangalore,M Chinnaswamy Stadium,2007.0
335982,2,"[AA Noffke, B Akhil, CL White, JH Kallis, MV B...","[AB Agarkar, AB Dinda, I Sharma, LR Shukla, SC...",28.0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,2007.0
335983,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[B Lee, IK Pathan, JR Hopes, K Goel, PP Chawla...",,,,,
335983,2,"[IK Pathan, JR Hopes, K Goel, KC Sangakkara, S...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",,,,,
335984,1,"[D Salunkhe, DS Lehmann, M Kaif, M Rawat, RA J...","[B Geeves, DL Vettori, GD McGrath, MF Maharoof...",,,,,
335984,2,"[G Gambhir, S Dhawan, V Sehwag]","[D Salunkhe, MM Patel, SK Trivedi, SK Warne, S...",,,,,
335985,1,"[AM Nayar, DJ Thornely, Harbhajan Singh, L Ron...","[B Akhil, JH Kallis, P Kumar, R Vinay Kumar, S...",,,,,
335985,2,"[B Akhil, JH Kallis, LRPL Taylor, MV Boucher, ...","[A Nehra, AM Nayar, DS Kulkarni, Harbhajan Sin...",,,,,
335987,1,"[B Lee, DPMD Jayawardene, IK Pathan, JR Hopes,...","[D Salunkhe, MM Patel, Pankaj Singh, SK Trived...",,,,,
335987,2,"[DS Lehmann, Kamran Akmal, M Kaif, RA Jadeja, ...","[B Lee, IK Pathan, JR Hopes, PP Chawla, S Sree...",,,,,


In [74]:
concat_data.drop('year',axis=1,inplace=True)

In [75]:
concat_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,batsmen,bowlers,total_runs,batting_team,bowling_team,venue
match_id,innings,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
335982,1,"[BB McCullum, DJ Hussey, Mohammad Hafeez, RT P...","[AA Noffke, CL White, JH Kallis, P Kumar, SB J...",67.0,Kolkata Knight Riders,Royal Challengers Bangalore,M Chinnaswamy Stadium
335982,2,"[AA Noffke, B Akhil, CL White, JH Kallis, MV B...","[AB Agarkar, AB Dinda, I Sharma, LR Shukla, SC...",28.0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium
335983,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[B Lee, IK Pathan, JR Hopes, K Goel, PP Chawla...",,,,
335983,2,"[IK Pathan, JR Hopes, K Goel, KC Sangakkara, S...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",,,,
335984,1,"[D Salunkhe, DS Lehmann, M Kaif, M Rawat, RA J...","[B Geeves, DL Vettori, GD McGrath, MF Maharoof...",,,,


In [76]:
concat_data.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,batsmen,bowlers,total_runs,batting_team,bowling_team,venue
match_id,innings,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
335982,1,"[BB McCullum, DJ Hussey, Mohammad Hafeez, RT P...","[AA Noffke, CL White, JH Kallis, P Kumar, SB J...",67.0,Kolkata Knight Riders,Royal Challengers Bangalore,M Chinnaswamy Stadium
335982,2,"[AA Noffke, B Akhil, CL White, JH Kallis, MV B...","[AB Agarkar, AB Dinda, I Sharma, LR Shukla, SC...",28.0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium
335983,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[B Lee, IK Pathan, JR Hopes, K Goel, PP Chawla...",,,,
335983,2,"[IK Pathan, JR Hopes, K Goel, KC Sangakkara, S...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",,,,
335984,1,"[D Salunkhe, DS Lehmann, M Kaif, M Rawat, RA J...","[B Geeves, DL Vettori, GD McGrath, MF Maharoof...",,,,
335984,2,"[G Gambhir, S Dhawan, V Sehwag]","[D Salunkhe, MM Patel, SK Trivedi, SK Warne, S...",,,,
335985,1,"[AM Nayar, DJ Thornely, Harbhajan Singh, L Ron...","[B Akhil, JH Kallis, P Kumar, R Vinay Kumar, S...",,,,
335985,2,"[B Akhil, JH Kallis, LRPL Taylor, MV Boucher, ...","[A Nehra, AM Nayar, DS Kulkarni, Harbhajan Sin...",,,,
335987,1,"[B Lee, DPMD Jayawardene, IK Pathan, JR Hopes,...","[D Salunkhe, MM Patel, Pankaj Singh, SK Trived...",,,,
335987,2,"[DS Lehmann, Kamran Akmal, M Kaif, RA Jadeja, ...","[B Lee, IK Pathan, JR Hopes, PP Chawla, S Sree...",,,,


In [77]:
#concat_data['batsmen'].unique()

In [78]:
#concat_data = pd.get_dummies(data=df, columns=['batting_team', 'bowling_team', 'venue'])

In [79]:
from sklearn.preprocessing import MultiLabelBinarizer

In [80]:
concat_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,batsmen,bowlers,total_runs,batting_team,bowling_team,venue
match_id,innings,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
335982,1,"[BB McCullum, DJ Hussey, Mohammad Hafeez, RT P...","[AA Noffke, CL White, JH Kallis, P Kumar, SB J...",67.0,Kolkata Knight Riders,Royal Challengers Bangalore,M Chinnaswamy Stadium
335982,2,"[AA Noffke, B Akhil, CL White, JH Kallis, MV B...","[AB Agarkar, AB Dinda, I Sharma, LR Shukla, SC...",28.0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium
335983,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[B Lee, IK Pathan, JR Hopes, K Goel, PP Chawla...",,,,
335983,2,"[IK Pathan, JR Hopes, K Goel, KC Sangakkara, S...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",,,,
335984,1,"[D Salunkhe, DS Lehmann, M Kaif, M Rawat, RA J...","[B Geeves, DL Vettori, GD McGrath, MF Maharoof...",,,,


In [81]:
#len(lb.classes_)

In [82]:
df.dtypes

match_id         int64
venue           object
innings          int64
ball           float64
striker         object
non_striker     object
bowler          object
year             int32
dtype: object

In [83]:
from sklearn.preprocessing import LabelEncoder

In [84]:
le = LabelEncoder()

In [85]:
le.fit(df['striker'])

LabelEncoder()

In [86]:
df.head()

Unnamed: 0,match_id,venue,innings,ball,striker,non_striker,bowler,year
960,1082595,M Chinnaswamy Stadium,1,0.1,CH Gayle,SR Watson,Z Khan,2017
961,1082595,M Chinnaswamy Stadium,1,0.2,SR Watson,CH Gayle,Z Khan,2017
962,1082595,M Chinnaswamy Stadium,1,0.3,SR Watson,CH Gayle,Z Khan,2017
963,1082595,M Chinnaswamy Stadium,1,0.4,SR Watson,CH Gayle,Z Khan,2017
964,1082595,M Chinnaswamy Stadium,1,0.5,SR Watson,CH Gayle,Z Khan,2017


In [87]:
#def trans(data):
#    for i in data:
#        for j in range(len(data[i])):
#            le.fit_transform(data[i][j])

In [88]:
#trans(concat_data['batsmen'])
len(concat_data)

1293

In [89]:
#concat_data['batsmen'][0][0]
len(concat_data['batsmen'])

1293

In [90]:
concat_data.dtypes

batsmen          object
bowlers          object
total_runs      float64
batting_team     object
bowling_team     object
venue            object
dtype: object

In [91]:
#for i in range(len(concat_data['batsmen'])):
#    concat_data['batsmen'][i] = ','.join(concat_data['batsmen'][i])

In [92]:
#for i in range(len(concat_data['bowlers'])):
#    concat_data['bowlers'][i] = ','.join(concat_data['bowlers'][i])

In [93]:
concat_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,batsmen,bowlers,total_runs,batting_team,bowling_team,venue
match_id,innings,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
335982,1,"[BB McCullum, DJ Hussey, Mohammad Hafeez, RT P...","[AA Noffke, CL White, JH Kallis, P Kumar, SB J...",67.0,Kolkata Knight Riders,Royal Challengers Bangalore,M Chinnaswamy Stadium
335982,2,"[AA Noffke, B Akhil, CL White, JH Kallis, MV B...","[AB Agarkar, AB Dinda, I Sharma, LR Shukla, SC...",28.0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium
335983,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[B Lee, IK Pathan, JR Hopes, K Goel, PP Chawla...",,,,
335983,2,"[IK Pathan, JR Hopes, K Goel, KC Sangakkara, S...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",,,,
335984,1,"[D Salunkhe, DS Lehmann, M Kaif, M Rawat, RA J...","[B Geeves, DL Vettori, GD McGrath, MF Maharoof...",,,,


In [94]:
concat_data['batsmen'][0]

['BB McCullum', 'DJ Hussey', 'Mohammad Hafeez', 'RT Ponting', 'SC Ganguly']

In [95]:
#len(concat_data['batsmen'].unique())

In [96]:
concat_data = concat_data[concat_data['total_runs'].notna()]

In [97]:
concat_data.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,batsmen,bowlers,total_runs,batting_team,bowling_team,venue
match_id,innings,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
335982,1,"[BB McCullum, DJ Hussey, Mohammad Hafeez, RT P...","[AA Noffke, CL White, JH Kallis, P Kumar, SB J...",67.0,Kolkata Knight Riders,Royal Challengers Bangalore,M Chinnaswamy Stadium
335982,2,"[AA Noffke, B Akhil, CL White, JH Kallis, MV B...","[AB Agarkar, AB Dinda, I Sharma, LR Shukla, SC...",28.0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium
335989,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[A Nehra, AM Nayar, DJ Bravo, DS Kulkarni, Har...",60.0,Chennai Super Kings,Mumbai Indians,MA Chidambaram Stadium
335989,2,"[A Nehra, AM Nayar, DJ Bravo, Harbhajan Singh,...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",54.0,Mumbai Indians,Chennai Super Kings,MA Chidambaram Stadium
335992,1,"[JH Kallis, LRPL Taylor, MV Boucher, P Kumar, ...","[MM Patel, SK Trivedi, SK Warne, SR Watson, So...",58.0,Royal Challengers Bangalore,Rajasthan Royals,M Chinnaswamy Stadium
335992,2,"[GC Smith, M Kaif, M Rawat, SR Watson, YK Pathan]","[A Kumble, JH Kallis, P Kumar, R Vinay Kumar, ...",46.0,Rajasthan Royals,Royal Challengers Bangalore,M Chinnaswamy Stadium
335993,1,"[AB Agarkar, AB Dinda, BB McCullum, DJ Hussey,...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",62.0,Kolkata Knight Riders,Chennai Super Kings,MA Chidambaram Stadium
335993,2,"[ML Hayden, MS Dhoni, PA Patel]","[AB Agarkar, AB Dinda, DJ Hussey, I Sharma, LR...",52.0,Chennai Super Kings,Kolkata Knight Riders,MA Chidambaram Stadium
335996,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[B Akhil, DW Steyn, JH Kallis, P Kumar, Z Khan]",38.0,Chennai Super Kings,Royal Challengers Bangalore,M Chinnaswamy Stadium
335996,2,"[B Akhil, B Chipli, DW Steyn, JH Kallis, LRPL ...","[JA Morkel, JDP Oram, Joginder Sharma, MS Gony...",47.0,Royal Challengers Bangalore,Chennai Super Kings,M Chinnaswamy Stadium


In [98]:
clen=4
for i in concat_data['batsmen']:
    if len(i)==11:
        clen=len(i)
print(clen)

11


In [99]:
concat_data['batsmen'][0][1]

'DJ Hussey'

In [100]:
#

In [101]:
concat_data = concat_data.join(pd.DataFrame(concat_data.batsmen.values.tolist(), concat_data.index).add_prefix('batsmen'))

In [102]:
concat_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,batsmen,bowlers,total_runs,batting_team,bowling_team,venue,batsmen0,batsmen1,batsmen2,batsmen3,batsmen4,batsmen5,batsmen6,batsmen7,batsmen8,batsmen9,batsmen10
match_id,innings,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
335982,1,"[BB McCullum, DJ Hussey, Mohammad Hafeez, RT P...","[AA Noffke, CL White, JH Kallis, P Kumar, SB J...",67.0,Kolkata Knight Riders,Royal Challengers Bangalore,M Chinnaswamy Stadium,BB McCullum,DJ Hussey,Mohammad Hafeez,RT Ponting,SC Ganguly,,,,,,
335982,2,"[AA Noffke, B Akhil, CL White, JH Kallis, MV B...","[AB Agarkar, AB Dinda, I Sharma, LR Shukla, SC...",28.0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,AA Noffke,B Akhil,CL White,JH Kallis,MV Boucher,P Kumar,R Dravid,SB Joshi,V Kohli,W Jaffer,Z Khan
335989,1,"[JDP Oram, MEK Hussey, ML Hayden, MS Dhoni, PA...","[A Nehra, AM Nayar, DJ Bravo, DS Kulkarni, Har...",60.0,Chennai Super Kings,Mumbai Indians,MA Chidambaram Stadium,JDP Oram,MEK Hussey,ML Hayden,MS Dhoni,PA Patel,S Badrinath,SK Raina,,,,
335989,2,"[A Nehra, AM Nayar, DJ Bravo, Harbhajan Singh,...","[JDP Oram, Joginder Sharma, M Muralitharan, MS...",54.0,Mumbai Indians,Chennai Super Kings,MA Chidambaram Stadium,A Nehra,AM Nayar,DJ Bravo,Harbhajan Singh,L Ronchi,MA Khote,RV Uthappa,SM Pollock,ST Jayasuriya,,
335992,1,"[JH Kallis, LRPL Taylor, MV Boucher, P Kumar, ...","[MM Patel, SK Trivedi, SK Warne, SR Watson, So...",58.0,Royal Challengers Bangalore,Rajasthan Royals,M Chinnaswamy Stadium,JH Kallis,LRPL Taylor,MV Boucher,P Kumar,R Dravid,R Vinay Kumar,S Chanderpaul,SB Joshi,V Kohli,Z Khan,


In [103]:
bat = pd.concat([df['striker'], df['non_striker']]).unique()

In [104]:
from sklearn.preprocessing import LabelEncoder

In [105]:
encoder = LabelEncoder()

In [106]:
encoder.fit(bat)

LabelEncoder()

In [107]:
concat_data['batsmen0'] = encoder.transform(concat_data['batsmen0'])
concat_data['batsmen0'] = encoder.transform(concat_data['batsmen0'])
concat_data['batsmen0'] = encoder.transform(concat_data['batsmen0'])
concat_data['batsmen0'] = encoder.transform(concat_data['batsmen0'])

ValueError: y contains previously unseen labels: 'AA Noffke'

In [None]:
#concat_data.head()

In [None]:
concat_data.head(20)

In [None]:
#lb.classes_

In [None]:
lb.transform(concat_data['batsmen'])

In [None]:
concat_data.head()

In [None]:
len(concat_data)

In [None]:
len(concat_data)

In [None]:
concat_data.head(15)

In [None]:
#for i in range(len(concat_data['batsmen'])):
#    concat_data['batsmen'][i] = ','.join(concat_data['batsmen'][i])

In [None]:
#concat_data['batsmen']

In [None]:
#

In [None]:
#

In [None]:
#

In [None]:
#

In [None]:
#


In [None]:
#

In [None]:
concat_data.head()

In [None]:
concat_data.drop(['batsmen','bowlers'], axis=1,inplace=True)

In [None]:
concat_data = pd.get_dummies(data=concat_data, columns=['batting_team', 'bowling_team', 'venue'])

In [None]:
concat_data.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

In [None]:
#concat_data.drop('match_id',axis=1,inplace=True)
#concat_data.columns = concat_data.columns.droplevel()
concat_data.rows

In [None]:
X = concat_data.drop('total_runs',axis=1)

In [None]:
y = concat_data['total_runs']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [None]:
lasso=Lasso()
parameters={'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40]}
lasso_regressor=GridSearchCV(lasso,parameters,scoring='r2',cv=5)

In [None]:
lasso_regressor.fit(X_train,y_train)
print(lasso_regressor.best_params_)
print(lasso_regressor.best_score_)
prediction = lasso_regressor.predict(X_test)

In [None]:
from sklearn.metrics import SCORERS

In [None]:
SCORERS.keys()

In [None]:
from sklearn import metrics
print(sns.distplot(y_test-prediction))
print('MAE:', metrics.mean_absolute_error(y_test, prediction))
print('MSE:', metrics.mean_squared_error(y_test, prediction))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, prediction)))