# INITIAL DATA CLEANING 
The data relating to the 2020 and 2021 seasons of the tournament was fetched mostly from cricsheet.org website and a few portions taken from Wikipedia. The data was in the form of comma-separated values(csv). Two sets of csv files were present. The first set contained a summary of each match with information on playing eleven, toss, winning team and winning margin. The second set contained the ball-by-ball data of every match that included the striker, non-striker, bowler, runs scored and wickets taken. 

In [None]:
import numpy as np
import pandas as pd
import glob

In [3]:
# Get CSV files ending with info from folder
path = r'C:\Users\DELL\Cricdata\rhf'
csv_files = glob.glob(path + "/*info.csv")

In [4]:
# Read each CSV file into a list of dataframes

df_list = []

for file in csv_files:
    df = pd.read_csv(file,nrows=20,sep='\t',header=0)
    df_list.append(df) 

In [5]:
df_list[1]

Unnamed: 0,"version,2.2.0"
0,"info,balls_per_over,6"
1,"info,team,South East Stars"
2,"info,team,Western Storm"
3,"info,gender,female"
4,"info,season,2020"
5,"info,date,2020/08/29"
6,"info,event,Rachael Heyhoe Flint Trophy"
7,"info,venue,""Kent County Cricket Ground, Becken..."
8,"info,city,Beckenham"
9,"info,toss_winner,Western Storm"


In [6]:
for i in range(len(df_list)):
    df_list[i]=df_list[i]['version,2.2.0'].str.split(",",expand=True) #split the columns comma wise

In [7]:
df_list[1]

Unnamed: 0,0,1,2,3
0,info,balls_per_over,6,
1,info,team,South East Stars,
2,info,team,Western Storm,
3,info,gender,female,
4,info,season,2020,
5,info,date,2020/08/29,
6,info,event,Rachael Heyhoe Flint Trophy,
7,info,venue,"""Kent County Cricket Ground","Beckenham"""
8,info,city,Beckenham,
9,info,toss_winner,Western Storm,


In [8]:
for i in range(len(df_list)):
    df_list[i].drop([0,3],axis=1,inplace=True) #drop the first and last column

In [9]:
df_list[1]

Unnamed: 0,1,2
0,balls_per_over,6
1,team,South East Stars
2,team,Western Storm
3,gender,female
4,season,2020
5,date,2020/08/29
6,event,Rachael Heyhoe Flint Trophy
7,venue,"""Kent County Cricket Ground"
8,city,Beckenham
9,toss_winner,Western Storm


In [10]:
#combine all the dataframes columnwise into one
match_summary = pd.concat(df_list,axis=1)

In [11]:
match_summary

Unnamed: 0,1,2,1.1,2.1,1.2,2.2,1.3,2.3,1.4,2.4,...,1.5,2.5,1.6,2.6,1.7,2.7,1.8,2.8,1.9,2.9
0,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6,...,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6
1,team,Sunrisers,team,South East Stars,team,Southern Vipers,team,South East Stars,team,Thunder,...,team,Thunder,team,Western Storm,team,Northern Diamonds,team,Central Sparks,team,Northern Diamonds
2,team,Southern Vipers,team,Western Storm,team,Western Storm,team,Sunrisers,team,Central Sparks,...,team,South East Stars,team,Sunrisers,team,Southern Vipers,team,Northern Diamonds,team,Southern Vipers
3,gender,female,gender,female,gender,female,gender,female,gender,female,...,gender,female,gender,female,gender,female,gender,female,gender,female
4,season,2020,season,2020,season,2020,season,2020,season,2020,...,season,2021,season,2021,season,2021,season,2021,season,2021
5,date,2020/08/29,date,2020/08/29,date,2020/08/31,date,2020/08/31,date,2020/08/31,...,date,2021/09/18,date,2021/09/18,date,2021/09/18,date,2021/09/22,date,2021/09/25
6,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,...,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy
7,venue,"""County Ground",venue,"""Kent County Cricket Ground",venue,"""County Ground",venue,"""Kent County Cricket Ground",venue,"""Edgbaston",...,venue,"""Kent County Cricket Ground",venue,"""County Ground",venue,"""The Rose Bowl",venue,"""North Marine Road Ground",venue,"""County Ground"
8,city,Chelmsford,city,Beckenham,city,Bristol,city,Beckenham,city,Birmingham,...,city,Beckenham,city,Bristol,city,Southampton,city,Scarborough,city,Northampton
9,toss_winner,Southern Vipers,toss_winner,Western Storm,toss_winner,Western Storm,toss_winner,South East Stars,toss_winner,Thunder,...,toss_winner,Thunder,toss_winner,Western Storm,toss_winner,Northern Diamonds,toss_winner,Central Sparks,toss_winner,Northern Diamonds


In [12]:
#reset the column headers
match_summary.columns = range(match_summary.columns.size)

In [13]:
match_summary

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,100,101,102,103,104,105,106,107,108,109
0,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6,...,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6,balls_per_over,6
1,team,Sunrisers,team,South East Stars,team,Southern Vipers,team,South East Stars,team,Thunder,...,team,Thunder,team,Western Storm,team,Northern Diamonds,team,Central Sparks,team,Northern Diamonds
2,team,Southern Vipers,team,Western Storm,team,Western Storm,team,Sunrisers,team,Central Sparks,...,team,South East Stars,team,Sunrisers,team,Southern Vipers,team,Northern Diamonds,team,Southern Vipers
3,gender,female,gender,female,gender,female,gender,female,gender,female,...,gender,female,gender,female,gender,female,gender,female,gender,female
4,season,2020,season,2020,season,2020,season,2020,season,2020,...,season,2021,season,2021,season,2021,season,2021,season,2021
5,date,2020/08/29,date,2020/08/29,date,2020/08/31,date,2020/08/31,date,2020/08/31,...,date,2021/09/18,date,2021/09/18,date,2021/09/18,date,2021/09/22,date,2021/09/25
6,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,...,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy,event,Rachael Heyhoe Flint Trophy
7,venue,"""County Ground",venue,"""Kent County Cricket Ground",venue,"""County Ground",venue,"""Kent County Cricket Ground",venue,"""Edgbaston",...,venue,"""Kent County Cricket Ground",venue,"""County Ground",venue,"""The Rose Bowl",venue,"""North Marine Road Ground",venue,"""County Ground"
8,city,Chelmsford,city,Beckenham,city,Bristol,city,Beckenham,city,Birmingham,...,city,Beckenham,city,Bristol,city,Southampton,city,Scarborough,city,Northampton
9,toss_winner,Southern Vipers,toss_winner,Western Storm,toss_winner,Western Storm,toss_winner,South East Stars,toss_winner,Thunder,...,toss_winner,Thunder,toss_winner,Western Storm,toss_winner,Northern Diamonds,toss_winner,Central Sparks,toss_winner,Northern Diamonds


In [14]:
#transpose the dataframe
match_summary=match_summary.T

In [15]:
match_summary

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,balls_per_over,team,team,gender,season,date,event,venue,city,toss_winner,toss_decision,umpire,umpire,match_referee,winner,winner_wickets,player,player,player,player
1,6,Sunrisers,Southern Vipers,female,2020,2020/08/29,Rachael Heyhoe Flint Trophy,"""County Ground",Chelmsford,Southern Vipers,field,AY Harris,Chris Watts,SJ Davis,Southern Vipers,7,Sunrisers,Sunrisers,Sunrisers,Sunrisers
2,balls_per_over,team,team,gender,season,date,event,venue,city,toss_winner,toss_decision,umpire,umpire,match_referee,winner,winner_wickets,player,player,player,player
3,6,South East Stars,Western Storm,female,2020,2020/08/29,Rachael Heyhoe Flint Trophy,"""Kent County Cricket Ground",Beckenham,Western Storm,field,NJ Llong,SM Bartlett,PM Such,Western Storm,6,South East Stars,South East Stars,South East Stars,South East Stars
4,balls_per_over,team,team,gender,season,date,event,venue,city,toss_winner,toss_decision,umpire,umpire,match_referee,winner,winner_runs,player,player,player,player
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,6,Northern Diamonds,Southern Vipers,female,2021,2021/09/18,Rachael Heyhoe Flint Trophy,"""The Rose Bowl",Southampton,Northern Diamonds,bat,IN Ramage,S McLelland,AJ Swann,Southern Vipers,5,Northern Diamonds,Northern Diamonds,Northern Diamonds,Northern Diamonds
106,balls_per_over,team,team,gender,season,date,event,venue,city,toss_winner,toss_decision,umpire,umpire,match_referee,winner,winner_wickets,player,player,player,player
107,6,Central Sparks,Northern Diamonds,female,2021,2021/09/22,Rachael Heyhoe Flint Trophy,"""North Marine Road Ground",Scarborough,Central Sparks,bat,Rob White,SM Bartlett,AW Smith,Northern Diamonds,6,Central Sparks,Central Sparks,Central Sparks,Central Sparks
108,balls_per_over,team,team,gender,season,date,event,venue,city,toss_winner,toss_decision,player_of_match,umpire,umpire,reserve_umpire,match_referee,winner,winner_wickets,player,player


In [16]:
#check out values of certain columns
for i in (0,3,6,7,11,12,13,16,17,18,19):
    print(match_summary[i].value_counts())
    print("\n")

balls_per_over    53
6                 53
Name: 0, dtype: int64


female    53
gender    53
Name: 3, dtype: int64


event                          53
Rachael Heyhoe Flint Trophy    53
Name: 6, dtype: int64


venue                                   53
"County Ground                          18
"Kent County Cricket Ground              5
"Headingley                              4
 Worcester"                              4
"The Rose Bowl                           4
"Riverside Ground                        3
"Edgbaston                               3
"Aigburth                                2
"Grace Road                              2
"Kennington Oval                         2
"Boughton Hall Cricket Club Ground       2
"Haslegrave Ground                       2
"North Marine Road Ground                1
"FP Fenner's Ground                      1
"Merchant Taylors' School Ground         1
Kibworth Cricket Club Ground             1
"The Cooper Associates County Ground     1
"Rookwood         

In [17]:
#remove initial columns that do not contain any uselful value
match_summary.drop([0,3,6,7,11,12,19],axis=1,inplace=True)
#reset the column headers
match_summary.columns = range(match_summary.columns.size)
match_summary

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_wickets,player,player,player
1,Sunrisers,Southern Vipers,2020,2020/08/29,Chelmsford,Southern Vipers,field,SJ Davis,Southern Vipers,7,Sunrisers,Sunrisers,Sunrisers
2,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_wickets,player,player,player
3,South East Stars,Western Storm,2020,2020/08/29,Beckenham,Western Storm,field,PM Such,Western Storm,6,South East Stars,South East Stars,South East Stars
4,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_runs,player,player,player
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,Northern Diamonds,Southern Vipers,2021,2021/09/18,Southampton,Northern Diamonds,bat,AJ Swann,Southern Vipers,5,Northern Diamonds,Northern Diamonds,Northern Diamonds
106,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_wickets,player,player,player
107,Central Sparks,Northern Diamonds,2021,2021/09/22,Scarborough,Central Sparks,bat,AW Smith,Northern Diamonds,6,Central Sparks,Central Sparks,Central Sparks
108,team,team,season,date,city,toss_winner,toss_decision,umpire,reserve_umpire,match_referee,winner,winner_wickets,player


In [18]:
#rename some columns
match_summary.rename(columns = {0:'Team1',1:'Team2',2:'Season',3:'Date',4:'Venue',5:'Toss_Winner',
                               6:'Toss_Decision',8:'Winner',9:'Winning_Margin'}, inplace = True)

In [19]:
match_summary

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,7,Winner,Winning_Margin,10,11,12
0,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_wickets,player,player,player
1,Sunrisers,Southern Vipers,2020,2020/08/29,Chelmsford,Southern Vipers,field,SJ Davis,Southern Vipers,7,Sunrisers,Sunrisers,Sunrisers
2,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_wickets,player,player,player
3,South East Stars,Western Storm,2020,2020/08/29,Beckenham,Western Storm,field,PM Such,Western Storm,6,South East Stars,South East Stars,South East Stars
4,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_runs,player,player,player
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,Northern Diamonds,Southern Vipers,2021,2021/09/18,Southampton,Northern Diamonds,bat,AJ Swann,Southern Vipers,5,Northern Diamonds,Northern Diamonds,Northern Diamonds
106,team,team,season,date,city,toss_winner,toss_decision,match_referee,winner,winner_wickets,player,player,player
107,Central Sparks,Northern Diamonds,2021,2021/09/22,Scarborough,Central Sparks,bat,AW Smith,Northern Diamonds,6,Central Sparks,Central Sparks,Central Sparks
108,team,team,season,date,city,toss_winner,toss_decision,umpire,reserve_umpire,match_referee,winner,winner_wickets,player


In [20]:
#check out values of column 'Winner'
match_summary['Winner'].value_counts()

winner               44
Northern Diamonds     9
Central Sparks        8
Southern Vipers       8
Western Storm         6
Lightning             5
match_referee         4
Thunder               4
South East Stars      4
winner_runs           2
reserve_umpire        2
winner_wickets        1
7                     1
AJ Swann              1
49                    1
111                   1
DW Headley            1
PM Such               1
SM Bartlett           1
S Cummings            1
S McLelland           1
Name: Winner, dtype: int64

In [21]:
#get index of rows where column 'Winner' has unusual values
check_rows=[]
check_rows.append(match_summary.index[match_summary['Winner']=='match_referee'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='reserve_umpire'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='SM Bartlett'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='111'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='DW Headley'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='49'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='winner_wickets'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='7'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='PM Such'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='S McLelland'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='S Cummings'].tolist())
check_rows.append(match_summary.index[match_summary['Winner']=='AJ Swann'].tolist())
#flatten the list for easyness
check_rows = list(np.concatenate(check_rows).flat)
#sort the list
check_rows.sort()
check_rows

[15, 16, 20, 26, 32, 33, 46, 47, 79, 80, 81, 82, 96, 97, 108, 109]

In [22]:
#investigating those particular rows
match_summary.iloc[check_rows]

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,7,Winner,Winning_Margin,10,11,12
15,team,team,season,date,city,toss_winner,toss_decision,umpire,match_referee,winner,winner_runs,player,player
16,Western Storm,Sunrisers,2020,2020/09/05,Chelmsford,Sunrisers,field,NJ Llong,PM Such,Western Storm,86,Western Storm,Western Storm
20,Southern Vipers,South East Stars,2020,2020/09/05,Brighton,Southern Vipers,bat,Southern Vipers,111,Southern Vipers,Southern Vipers,Southern Vipers,Southern Vipers
26,Southern Vipers,Sunrisers,2020,2020/09/11,Southampton,Southern Vipers,bat,Southern Vipers,49,Southern Vipers,Southern Vipers,Southern Vipers,Southern Vipers
32,team,team,season,date,city,toss_winner,toss_decision,umpire,match_referee,winner,winner_wickets,player,player
33,Sunrisers,South East Stars,2020,2020/09/13,Chelmsford,Sunrisers,bat,F Richards,AJ Swann,South East Stars,6,Sunrisers,Sunrisers
46,team,team,season,date,city,toss_winner,toss_decision,umpire,reserve_umpire,tv_umpire,match_referee,winner,winner_runs
47,Southern Vipers,Northern Diamonds,2020,2020/09/27,Birmingham,Northern Diamonds,field,Rob White,SM Bartlett,James Middlebrook,SJ Davis,Southern Vipers,38
79,team,team,season,date,city,toss_winner,toss_decision,umpire,match_referee,winner,winner_wickets,player,player
80,Western Storm,Southern Vipers,2021,2021/06/12,Taunton,Western Storm,bat,S McLelland,S Cummings,Southern Vipers,8,Western Storm,Western Storm


In [23]:
#correcting values for concerned rows
match_summary['Winner'].iloc[15] = 'winner'
match_summary['Winner'].iloc[16]= 'Western Storm'
match_summary['Winning_Margin'].iloc[15] = 'winner_runs'
match_summary['Winning_Margin'].iloc[16] = 86

match_summary['Winner'].iloc[32] = 'winner'
match_summary['Winner'].iloc[33] = 'South East Stars'
match_summary['Winning_Margin'].iloc[32] = 'winner_wickets'
match_summary['Winning_Margin'].iloc[33] = 6

match_summary['Winner'].iloc[46] = 'winner'
match_summary['Winner'].iloc[47] = 'Southern Vipers'
match_summary['Winning_Margin'].iloc[46] = 'winner_runs'
match_summary['Winning_Margin'].iloc[47] = 38

match_summary['Winner'].iloc[79] = 'winner'
match_summary['Winner'].iloc[80] = 'Southern Vipers'
match_summary['Winning_Margin'].iloc[79] = 'winner_wickets'
match_summary['Winning_Margin'].iloc[80] = 8

match_summary['Winner'].iloc[81] = 'winner'
match_summary['Winner'].iloc[82]= 'Northern Diamonds'
match_summary['Winning_Margin'].iloc[81] = 'winner_wickets'
match_summary['Winning_Margin'].iloc[82]= 7

match_summary['Winner'].iloc[96] = 'winner'
match_summary['Winner'].iloc[97] = 'Southern Vipers'
match_summary['Winning_Margin'].iloc[96] = 'winner_runs'
match_summary['Winning_Margin'].iloc[97] = 1

match_summary['Winner'].iloc[108] = 'winner'
match_summary['Winner'].iloc[109] = 'Southern Vipers'
match_summary['Winning_Margin'].iloc[108] = 'winner_wickets'
match_summary['Winning_Margin'].iloc[109] = 3

In [24]:
#checking those particular rows
match_summary.iloc[check_rows]

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,7,Winner,Winning_Margin,10,11,12
15,team,team,season,date,city,toss_winner,toss_decision,umpire,winner,winner_runs,winner_runs,player,player
16,Western Storm,Sunrisers,2020,2020/09/05,Chelmsford,Sunrisers,field,NJ Llong,Western Storm,86,86,Western Storm,Western Storm
20,Southern Vipers,South East Stars,2020,2020/09/05,Brighton,Southern Vipers,bat,Southern Vipers,111,Southern Vipers,Southern Vipers,Southern Vipers,Southern Vipers
26,Southern Vipers,Sunrisers,2020,2020/09/11,Southampton,Southern Vipers,bat,Southern Vipers,49,Southern Vipers,Southern Vipers,Southern Vipers,Southern Vipers
32,team,team,season,date,city,toss_winner,toss_decision,umpire,winner,winner_wickets,winner_wickets,player,player
33,Sunrisers,South East Stars,2020,2020/09/13,Chelmsford,Sunrisers,bat,F Richards,South East Stars,6,6,Sunrisers,Sunrisers
46,team,team,season,date,city,toss_winner,toss_decision,umpire,winner,winner_runs,match_referee,winner,winner_runs
47,Southern Vipers,Northern Diamonds,2020,2020/09/27,Birmingham,Northern Diamonds,field,Rob White,Southern Vipers,38,SJ Davis,Southern Vipers,38
79,team,team,season,date,city,toss_winner,toss_decision,umpire,winner,winner_wickets,winner_wickets,player,player
80,Western Storm,Southern Vipers,2021,2021/06/12,Taunton,Western Storm,bat,S McLelland,Southern Vipers,8,8,Western Storm,Western Storm


In [25]:
#investigating rows 19 20 25 26
match_summary.iloc[[19,20,25,26]]

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,7,Winner,Winning_Margin,10,11,12
19,team,team,season,date,city,toss_winner,toss_decision,winner,winner_runs,player,player,player,player
20,Southern Vipers,South East Stars,2020,2020/09/05,Brighton,Southern Vipers,bat,Southern Vipers,111,Southern Vipers,Southern Vipers,Southern Vipers,Southern Vipers
25,team,team,season,date,city,toss_winner,toss_decision,winner,winner_runs,player,player,player,player
26,Southern Vipers,Sunrisers,2020,2020/09/11,Southampton,Southern Vipers,bat,Southern Vipers,49,Southern Vipers,Southern Vipers,Southern Vipers,Southern Vipers


In [26]:
#correcting values for concerned rows
match_summary['Winner'].iloc[19] = 'winner'
match_summary['Winner'].iloc[20] = 'Southern Vipers'
match_summary['Winning_Margin'].iloc[19] = 'winner_runs'
match_summary['Winning_Margin'].iloc[20] = 111

match_summary['Winner'].iloc[25] = 'winner'
match_summary['Winner'].iloc[26]= 'Southern Vipers'
match_summary['Winning_Margin'].iloc[25] = 'winner_runs'
match_summary['Winning_Margin'].iloc[26] = 49

In [27]:
#checking rows 19 20 25 26
match_summary.iloc[[19,20,25,26]]

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,7,Winner,Winning_Margin,10,11,12
19,team,team,season,date,city,toss_winner,toss_decision,winner,winner,winner_runs,player,player,player
20,Southern Vipers,South East Stars,2020,2020/09/05,Brighton,Southern Vipers,bat,Southern Vipers,Southern Vipers,111,Southern Vipers,Southern Vipers,Southern Vipers
25,team,team,season,date,city,toss_winner,toss_decision,winner,winner,winner_runs,player,player,player
26,Southern Vipers,Sunrisers,2020,2020/09/11,Southampton,Southern Vipers,bat,Southern Vipers,Southern Vipers,49,Southern Vipers,Southern Vipers,Southern Vipers


In [28]:
#check out values of column 'Winner'
match_summary['Winner'].value_counts()

winner               53
Southern Vipers      14
Northern Diamonds    10
Central Sparks        8
Western Storm         7
South East Stars      5
Lightning             5
Thunder               4
Name: Winner, dtype: int64

In [29]:
#check out values of column 'Winning_Margin'
match_summary['Winning_Margin'].value_counts()

winner_runs       27
winner_wickets    26
6                  8
2                  3
7                  3
4                  3
5                  2
55                 2
3                  2
36                 2
9                  2
38                 1
8                  1
1                  1
47                 1
28                 1
111                1
8                  1
49                 1
62                 1
134                1
77                 1
120                1
72                 1
135                1
3                  1
105                1
86                 1
22                 1
24                 1
73                 1
1                  1
41                 1
7                  1
6                  1
32                 1
115                1
Name: Winning_Margin, dtype: int64

In [30]:
#remove other columns that do not contain any uselful value
match_summary.drop([7,10,11,12],axis=1,inplace=True)

In [31]:
match_summary

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,Winner,Winning_Margin
0,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets
1,Sunrisers,Southern Vipers,2020,2020/08/29,Chelmsford,Southern Vipers,field,Southern Vipers,7
2,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets
3,South East Stars,Western Storm,2020,2020/08/29,Beckenham,Western Storm,field,Western Storm,6
4,team,team,season,date,city,toss_winner,toss_decision,winner,winner_runs
...,...,...,...,...,...,...,...,...,...
105,Northern Diamonds,Southern Vipers,2021,2021/09/18,Southampton,Northern Diamonds,bat,Southern Vipers,5
106,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets
107,Central Sparks,Northern Diamonds,2021,2021/09/22,Scarborough,Central Sparks,bat,Northern Diamonds,6
108,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets


In [32]:
#Add new column ByWickets
match_summary['ByWickets']=match_summary['Winning_Margin'].apply(lambda x: 'Y' if x == 'winner_wickets' else 'N')

In [33]:
#Add new column ByRuns
match_summary['ByRuns']=match_summary['Winning_Margin'].apply(lambda x: 'Y' if x == 'winner_runs' else 'N')

In [34]:
match_summary

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,Winner,Winning_Margin,ByWickets,ByRuns
0,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
1,Sunrisers,Southern Vipers,2020,2020/08/29,Chelmsford,Southern Vipers,field,Southern Vipers,7,N,N
2,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
3,South East Stars,Western Storm,2020,2020/08/29,Beckenham,Western Storm,field,Western Storm,6,N,N
4,team,team,season,date,city,toss_winner,toss_decision,winner,winner_runs,N,Y
...,...,...,...,...,...,...,...,...,...,...,...
105,Northern Diamonds,Southern Vipers,2021,2021/09/18,Southampton,Northern Diamonds,bat,Southern Vipers,5,N,N
106,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
107,Central Sparks,Northern Diamonds,2021,2021/09/22,Scarborough,Central Sparks,bat,Northern Diamonds,6,N,N
108,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N


In [35]:
#locate next row positions to put ByWickets value
next_rows=[]
next_rows.append(match_summary.index[match_summary['ByWickets']=='Y'].tolist())
next_rows=np.add(next_rows, 1).tolist()
next_rows

[[1,
  3,
  9,
  13,
  18,
  24,
  33,
  37,
  39,
  41,
  49,
  51,
  55,
  61,
  65,
  67,
  73,
  75,
  77,
  80,
  82,
  86,
  88,
  105,
  107,
  109]]

In [36]:
#flatten the list
next_rows=list(np.concatenate(next_rows).flat)

In [37]:
#change the values
match_summary['ByWickets'].iloc[next_rows] = 'Y'

In [38]:
match_summary

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,Winner,Winning_Margin,ByWickets,ByRuns
0,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
1,Sunrisers,Southern Vipers,2020,2020/08/29,Chelmsford,Southern Vipers,field,Southern Vipers,7,Y,N
2,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
3,South East Stars,Western Storm,2020,2020/08/29,Beckenham,Western Storm,field,Western Storm,6,Y,N
4,team,team,season,date,city,toss_winner,toss_decision,winner,winner_runs,N,Y
...,...,...,...,...,...,...,...,...,...,...,...
105,Northern Diamonds,Southern Vipers,2021,2021/09/18,Southampton,Northern Diamonds,bat,Southern Vipers,5,Y,N
106,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
107,Central Sparks,Northern Diamonds,2021,2021/09/22,Scarborough,Central Sparks,bat,Northern Diamonds,6,Y,N
108,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N


In [39]:
#Likewise locate and modify next row positions to put ByRuns value
next_rows=[]
next_rows.append(match_summary.index[match_summary['ByRuns']=='Y'].tolist())
next_rows=np.add(next_rows, 1).tolist()
next_rows=list(np.concatenate(next_rows).flat)
match_summary['ByRuns'].iloc[next_rows] = 'Y'

In [40]:
match_summary.head(10)

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,Winner,Winning_Margin,ByWickets,ByRuns
0,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
1,Sunrisers,Southern Vipers,2020,2020/08/29,Chelmsford,Southern Vipers,field,Southern Vipers,7,Y,N
2,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
3,South East Stars,Western Storm,2020,2020/08/29,Beckenham,Western Storm,field,Western Storm,6,Y,N
4,team,team,season,date,city,toss_winner,toss_decision,winner,winner_runs,N,Y
5,Southern Vipers,Western Storm,2020,2020/08/31,Bristol,Western Storm,field,Southern Vipers,22,N,Y
6,team,team,season,date,city,toss_winner,toss_decision,winner,winner_runs,N,Y
7,South East Stars,Sunrisers,2020,2020/08/31,Beckenham,South East Stars,bat,South East Stars,77,N,Y
8,team,team,season,date,city,toss_winner,toss_decision,winner,winner_wickets,Y,N
9,Thunder,Central Sparks,2020,2020/08/31,Birmingham,Thunder,bat,Central Sparks,4,Y,N


In [41]:
#remove the irrelevant rows
drop_rows = []
drop_rows.append(match_summary.index[match_summary['Team1']=='team'].tolist())
drop_rows=list(np.concatenate(drop_rows).flat)
match_summary.drop(drop_rows,axis=0,inplace=True)

In [42]:
match_summary.reset_index(drop=True,inplace=True)

In [43]:
match_summary

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,Winner,Winning_Margin,ByWickets,ByRuns
0,Sunrisers,Southern Vipers,2020.0,2020/08/29,Chelmsford,Southern Vipers,field,Southern Vipers,7.0,Y,N
1,South East Stars,Western Storm,2020.0,2020/08/29,Beckenham,Western Storm,field,Western Storm,6.0,Y,N
2,Southern Vipers,Western Storm,2020.0,2020/08/31,Bristol,Western Storm,field,Southern Vipers,22.0,N,Y
3,South East Stars,Sunrisers,2020.0,2020/08/31,Beckenham,South East Stars,bat,South East Stars,77.0,N,Y
4,Thunder,Central Sparks,2020.0,2020/08/31,Birmingham,Thunder,bat,Central Sparks,4.0,Y,N
5,Northern Diamonds,Lightning,2020.0,2020/08/31,Chester-le-Street,Northern Diamonds,bat,Northern Diamonds,9.0,N,Y
6,Thunder,Central Sparks,2020.0,2020/09/05,Worcester,Thunder,bat,Central Sparks,8.0,Y,N
7,,,,,,,,,,N,N
8,Western Storm,Sunrisers,2020.0,2020/09/05,Chelmsford,Sunrisers,field,Western Storm,86.0,N,Y
9,Lightning,Northern Diamonds,2020.0,2020/09/05,Leicester,Northern Diamonds,field,Northern Diamonds,2.0,Y,N


In [44]:
#remove the None values
match_summary = match_summary.replace(to_replace='None', value=np.nan).dropna()
match_summary.reset_index(drop=True,inplace=True)

In [45]:
match_summary

Unnamed: 0,Team1,Team2,Season,Date,Venue,Toss_Winner,Toss_Decision,Winner,Winning_Margin,ByWickets,ByRuns
0,Sunrisers,Southern Vipers,2020,2020/08/29,Chelmsford,Southern Vipers,field,Southern Vipers,7,Y,N
1,South East Stars,Western Storm,2020,2020/08/29,Beckenham,Western Storm,field,Western Storm,6,Y,N
2,Southern Vipers,Western Storm,2020,2020/08/31,Bristol,Western Storm,field,Southern Vipers,22,N,Y
3,South East Stars,Sunrisers,2020,2020/08/31,Beckenham,South East Stars,bat,South East Stars,77,N,Y
4,Thunder,Central Sparks,2020,2020/08/31,Birmingham,Thunder,bat,Central Sparks,4,Y,N
5,Northern Diamonds,Lightning,2020,2020/08/31,Chester-le-Street,Northern Diamonds,bat,Northern Diamonds,9,N,Y
6,Thunder,Central Sparks,2020,2020/09/05,Worcester,Thunder,bat,Central Sparks,8,Y,N
7,Western Storm,Sunrisers,2020,2020/09/05,Chelmsford,Sunrisers,field,Western Storm,86,N,Y
8,Lightning,Northern Diamonds,2020,2020/09/05,Leicester,Northern Diamonds,field,Northern Diamonds,2,Y,N
9,Southern Vipers,South East Stars,2020,2020/09/05,Brighton,Southern Vipers,bat,Southern Vipers,111,N,Y


In [46]:
#finally export the dataframe to csv file
match_summary.to_csv("C:\\Users\\DELL\\Cricdata\\match_summary.csv",index=False)

In [47]:
allmatch_df = pd.read_csv('C:\\Users\\DELL\\Cricdata\\rhf\\all_matches.csv')

In [48]:
allmatch_df.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,...,extras,wides,noballs,byes,legbyes,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed
0,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.1,Sunrisers,Southern Vipers,KL Midwood,AD Carr,...,0,,,,,,,,,
1,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.2,Sunrisers,Southern Vipers,KL Midwood,AD Carr,...,0,,,,,,,,,
2,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.3,Sunrisers,Southern Vipers,KL Midwood,AD Carr,...,0,,,,,,,,,
3,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.4,Sunrisers,Southern Vipers,KL Midwood,AD Carr,...,1,1.0,,,,,,,,
4,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.5,Sunrisers,Southern Vipers,KL Midwood,AD Carr,...,1,1.0,,,,,,,,


In [49]:
allmatch_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30228 entries, 0 to 30227
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   match_id                30228 non-null  int64  
 1   season                  30228 non-null  int64  
 2   start_date              30228 non-null  object 
 3   venue                   30228 non-null  object 
 4   innings                 30228 non-null  int64  
 5   ball                    30228 non-null  float64
 6   batting_team            30228 non-null  object 
 7   bowling_team            30228 non-null  object 
 8   striker                 30228 non-null  object 
 9   non_striker             30228 non-null  object 
 10  bowler                  30228 non-null  object 
 11  runs_off_bat            30228 non-null  int64  
 12  extras                  30228 non-null  int64  
 13  wides                   1341 non-null   float64
 14  noballs                 114 non-null  

In [50]:
#drop columns that contain no or only a single value
allmatch_df.drop(['penalty','other_wicket_type','other_player_dismissed'],axis=1,inplace=True)

In [51]:
allmatch_df.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,wicket_type,player_dismissed
0,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.1,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,
1,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.2,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,
2,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.3,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,
3,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.4,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,1,1.0,,,,,
4,1229326,2020,2020-08-29,"County Ground, Chelmsford",1,0.5,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,1,1.0,,,,,


In [52]:
#check unique values of venue
allmatch_df['venue'].value_counts()

County Ground, Bristol                          3642
County Ground, Chelmsford                       2899
Kent County Cricket Ground, Beckenham           2711
The Rose Bowl, Southampton                      2428
County Ground, New Road, Worcester              2315
Headingley, Leeds                               2300
Edgbaston, Birmingham                           1772
Riverside Ground, Chester-le-Street             1682
Grace Road, Leicester                           1261
Aigburth, Liverpool                             1206
Haslegrave Ground, Loughborough                 1200
Kennington Oval, London                         1188
Boughton Hall Cricket Club Ground, Chester      1161
County Ground, Hove                             1029
County Ground, Northampton                       621
Kibworth Cricket Club Ground                     592
The Cooper Associates County Ground, Taunton     535
North Marine Road Ground, Scarborough            524
Merchant Taylors' School Ground, Northwood    

In [53]:
# find the match_id where venue is Kibworth Cricket Club Ground
allmatch_df[allmatch_df['venue']=='Kibworth Cricket Club Ground']['match_id'].unique()

array([1252268], dtype=int64)

In [54]:
#modify the value of venue
allmatch_df['venue'][allmatch_df['match_id']==1252268] = 'Kibworth Cricket Club Ground,Kibworth'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allmatch_df['venue'][allmatch_df['match_id']==1252268] = 'Kibworth Cricket Club Ground,Kibworth'


In [55]:
# find the match_id where venue is County Ground, New Road, Worcester
allmatch_df[allmatch_df['venue']=='County Ground, New Road, Worcester']['match_id'].unique()

array([1229334, 1229341, 1252278, 1252285], dtype=int64)

In [56]:
#modify the value of venue
allmatch_df['venue'][allmatch_df['match_id'] == 1229334] = 'County Ground, Worcester'
allmatch_df['venue'][allmatch_df['match_id'] == 1229341] = 'County Ground, Worcester'
allmatch_df['venue'][allmatch_df['match_id'] == 1252278] = 'County Ground, Worcester'
allmatch_df['venue'][allmatch_df['match_id'] == 1252285] = 'County Ground, Worcester'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allmatch_df['venue'][allmatch_df['match_id'] == 1229334] = 'County Ground, Worcester'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allmatch_df['venue'][allmatch_df['match_id'] == 1229341] = 'County Ground, Worcester'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allmatch_df['venue'][allmatch_df['match_id'] == 1252278] = 'County Ground, Worcester'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://panda

In [57]:
#check unique values of venue
allmatch_df['venue'].value_counts()

County Ground, Bristol                          3642
County Ground, Chelmsford                       2899
Kent County Cricket Ground, Beckenham           2711
The Rose Bowl, Southampton                      2428
County Ground, Worcester                        2315
Headingley, Leeds                               2300
Edgbaston, Birmingham                           1772
Riverside Ground, Chester-le-Street             1682
Grace Road, Leicester                           1261
Aigburth, Liverpool                             1206
Haslegrave Ground, Loughborough                 1200
Kennington Oval, London                         1188
Boughton Hall Cricket Club Ground, Chester      1161
County Ground, Hove                             1029
County Ground, Northampton                       621
Kibworth Cricket Club Ground,Kibworth            592
The Cooper Associates County Ground, Taunton     535
North Marine Road Ground, Scarborough            524
Merchant Taylors' School Ground, Northwood    

In [58]:
#split the venue column comma wise
allmatch_df['venue'].str.split(",",expand=True)

Unnamed: 0,0,1
0,County Ground,Chelmsford
1,County Ground,Chelmsford
2,County Ground,Chelmsford
3,County Ground,Chelmsford
4,County Ground,Chelmsford
...,...,...
30223,County Ground,Northampton
30224,County Ground,Northampton
30225,County Ground,Northampton
30226,County Ground,Northampton


In [59]:
#keep only the city name
allmatch_df['venue']=allmatch_df['venue'].str.split(",",expand=True)[1]
allmatch_df['venue']=allmatch_df['venue'].str.strip() #remove extra space

In [60]:
allmatch_df.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,wicket_type,player_dismissed
0,1229326,2020,2020-08-29,Chelmsford,1,0.1,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,
1,1229326,2020,2020-08-29,Chelmsford,1,0.2,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,
2,1229326,2020,2020-08-29,Chelmsford,1,0.3,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,
3,1229326,2020,2020-08-29,Chelmsford,1,0.4,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,1,1.0,,,,,
4,1229326,2020,2020-08-29,Chelmsford,1,0.5,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,1,1.0,,,,,


In [61]:
allmatch_df['venue'].value_counts()

Bristol              3642
Chelmsford           2899
Beckenham            2711
Southampton          2428
Worcester            2315
Leeds                2300
Birmingham           1772
Chester-le-Street    1682
Leicester            1261
Liverpool            1206
Loughborough         1200
London               1188
Chester              1161
Hove                 1029
Northampton           621
Kibworth              592
Taunton               535
Scarborough           524
Northwood             507
Sale                  415
Cambridge             240
Name: venue, dtype: int64

In [62]:
#add a new column 'over'
import math
allmatch_df['over']=allmatch_df['ball'].apply(np.ceil)

In [63]:
allmatch_df['over']=allmatch_df['over'].astype(int)

In [64]:
allmatch_df.head(10)

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,wicket_type,player_dismissed,over
0,1229326,2020,2020-08-29,Chelmsford,1,0.1,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,,1
1,1229326,2020,2020-08-29,Chelmsford,1,0.2,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,,1
2,1229326,2020,2020-08-29,Chelmsford,1,0.3,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,,1
3,1229326,2020,2020-08-29,Chelmsford,1,0.4,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,1,1.0,,,,,,1
4,1229326,2020,2020-08-29,Chelmsford,1,0.5,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,1,1.0,,,,,,1
5,1229326,2020,2020-08-29,Chelmsford,1,0.6,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,1,,1.0,,,,,1
6,1229326,2020,2020-08-29,Chelmsford,1,0.7,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,,1
7,1229326,2020,2020-08-29,Chelmsford,1,0.8,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,,1
8,1229326,2020,2020-08-29,Chelmsford,1,0.9,Sunrisers,Southern Vipers,KL Midwood,AD Carr,LK Bell,0,0,,,,,,,1
9,1229326,2020,2020-08-29,Chelmsford,1,1.1,Sunrisers,Southern Vipers,AD Carr,KL Midwood,TG Norris,0,0,,,,,,,2


In [65]:
allmatch_df.columns

Index(['match_id', 'season', 'start_date', 'venue', 'innings', 'ball',
       'batting_team', 'bowling_team', 'striker', 'non_striker', 'bowler',
       'runs_off_bat', 'extras', 'wides', 'noballs', 'byes', 'legbyes',
       'wicket_type', 'player_dismissed', 'over'],
      dtype='object')

In [66]:
#rearrange the columns
allmatch_df=allmatch_df[['match_id', 'season', 'start_date', 'venue', 'innings','over','ball',
                          'batting_team','bowling_team','striker', 'non_striker','runs_off_bat','extras',
                          'bowler','wicket_type','player_dismissed','wides', 'noballs', 'byes', 'legbyes']]

In [67]:
allmatch_df.head(10)

Unnamed: 0,match_id,season,start_date,venue,innings,over,ball,batting_team,bowling_team,striker,non_striker,runs_off_bat,extras,bowler,wicket_type,player_dismissed,wides,noballs,byes,legbyes
0,1229326,2020,2020-08-29,Chelmsford,1,1,0.1,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,0,LK Bell,,,,,,
1,1229326,2020,2020-08-29,Chelmsford,1,1,0.2,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,0,LK Bell,,,,,,
2,1229326,2020,2020-08-29,Chelmsford,1,1,0.3,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,0,LK Bell,,,,,,
3,1229326,2020,2020-08-29,Chelmsford,1,1,0.4,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,1,LK Bell,,,1.0,,,
4,1229326,2020,2020-08-29,Chelmsford,1,1,0.5,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,1,LK Bell,,,1.0,,,
5,1229326,2020,2020-08-29,Chelmsford,1,1,0.6,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,1,LK Bell,,,,1.0,,
6,1229326,2020,2020-08-29,Chelmsford,1,1,0.7,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,0,LK Bell,,,,,,
7,1229326,2020,2020-08-29,Chelmsford,1,1,0.8,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,0,LK Bell,,,,,,
8,1229326,2020,2020-08-29,Chelmsford,1,1,0.9,Sunrisers,Southern Vipers,KL Midwood,AD Carr,0,0,LK Bell,,,,,,
9,1229326,2020,2020-08-29,Chelmsford,1,2,1.1,Sunrisers,Southern Vipers,AD Carr,KL Midwood,0,0,TG Norris,,,,,,


In [68]:
#export the dataframe to csv file
allmatch_df.to_csv("C:\\Users\\DELL\\Cricdata\\allmatches_tidy.csv",index=False)

Finally two dataframes were taken into consideration for analysis, one containg the summary information of every match (match_summary) and the other contained ball by ball information (allmatch_df).