In [114]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [115]:
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 500)

## Match results

In [116]:
match = pd.read_csv('data/processed data/matchresults.csv')
match.columns = ['team1', 'team2', 'winner', 'margin', 'ground', 'matchdate', 'match_id']
match.head()

Unnamed: 0,team1,team2,winner,margin,ground,matchdate,match_id
0,Namibia,Sri Lanka,Namibia,55 runs,Geelong,"Oct 16, 2022",T20I # 1823
1,Netherlands,U.A.E.,Netherlands,3 wickets,Geelong,"Oct 16, 2022",T20I # 1825
2,Scotland,West Indies,Scotland,42 runs,Hobart,"Oct 17, 2022",T20I # 1826
3,Ireland,Zimbabwe,Zimbabwe,31 runs,Hobart,"Oct 17, 2022",T20I # 1828
4,Namibia,Netherlands,Netherlands,5 wickets,Geelong,"Oct 18, 2022",T20I # 1830


In [117]:
match.to_csv('data/processed data/matchresults.csv', index=False)

In [118]:
match_id_dict = {}

for index, row in match.iterrows():
    key1 = row['team1'] + ' vs ' + row['team2']
    key2 = row['team2'] + ' vs ' + row['team1']
    match_id_dict[key1] = row['match_id']
    match_id_dict[key2] = row['match_id']

match_id_dict

{'Namibia vs Sri Lanka': 'T20I # 1823',
 'Sri Lanka vs Namibia': 'T20I # 1823',
 'Netherlands vs U.A.E.': 'T20I # 1825',
 'U.A.E. vs Netherlands': 'T20I # 1825',
 'Scotland vs West Indies': 'T20I # 1826',
 'West Indies vs Scotland': 'T20I # 1826',
 'Ireland vs Zimbabwe': 'T20I # 1828',
 'Zimbabwe vs Ireland': 'T20I # 1828',
 'Namibia vs Netherlands': 'T20I # 1830',
 'Netherlands vs Namibia': 'T20I # 1830',
 'Sri Lanka vs U.A.E.': 'T20I # 1832',
 'U.A.E. vs Sri Lanka': 'T20I # 1832',
 'Ireland vs Scotland': 'T20I # 1833',
 'Scotland vs Ireland': 'T20I # 1833',
 'West Indies vs Zimbabwe': 'T20I # 1834',
 'Zimbabwe vs West Indies': 'T20I # 1834',
 'Netherlands vs Sri Lanka': 'T20I # 1835',
 'Sri Lanka vs Netherlands': 'T20I # 1835',
 'Namibia vs U.A.E.': 'T20I # 1836',
 'U.A.E. vs Namibia': 'T20I # 1836',
 'Ireland vs West Indies': 'T20I # 1837',
 'West Indies vs Ireland': 'T20I # 1837',
 'Scotland vs Zimbabwe': 'T20I # 1838',
 'Zimbabwe vs Scotland': 'T20I # 1838',
 'Australia vs New Zea

## Player info

In [119]:
player = pd.read_csv('data/processed data/playerinfo.csv')
player.head()

Unnamed: 0,name,team,batting_style,bowling_style,player_role
0,Jan Frylinck,Namibia,Left hand Bat,Left arm Fast medium,Allrounder
1,Michael van Lingen,Namibia,Left hand Bat,Left arm Medium,Bowling Allrounder
2,Divan la Cock,Namibia,Right hand Bat,Legbreak,Opening Batter
3,Jan Nicol Loftie-Eaton,Namibia,Left hand Bat,"Right arm Medium, Legbreak",Batter
4,Stephan Baard,Namibia,Right hand Bat,Right arm Medium fast,Batter


In [146]:
player[player['team']=='India']

Unnamed: 0,name,team,batting_style,bowling_style,player_role
164,Virat Kohli,India,Right hand Bat,Right arm Medium,Top order Batter
176,Bhuvneshwar Kumar,India,Right hand Bat,Right arm Medium,Bowler
177,Arshdeep Singh,India,Left hand Bat,Left arm Medium fast,Bowler
180,Ravichandran Ashwin,India,Right hand Bat,Right arm Offbreak,Bowling Allrounder
181,Axar Patel,India,Akshar Patel,Left hand Bat,Slow Left arm Orthodox
182,KL Rahul,India,Right hand Bat,Wicketkeeper,Opening Batter
183,Rohit Sharma,India,Right hand Bat,Right arm Offbreak,Top order Batter
184,Suryakumar Yadav,India,Right hand Bat,"Right arm Medium, Right arm Offbreak",Batter
185,Dinesh Karthik,India,Dinesh,Right hand Bat,Right arm Offbreak
236,Deepak Hooda,India,Right hand Bat,Right arm Offbreak,Allrounder


In [None]:
player.to_csv('data/processed data/playerinfo.csv', index=False)

## Batting summary

In [120]:
battingsummary = pd.read_csv('data/battingsummary.csv')
battingsummary.head(20)

Unnamed: 0,BATTING,Unnamed: 1,R,B,M,4s,6s,SR,Unnamed: 8,Unnamed: 9,match,team
0,Michael van Lingen,c Pramod Madushan b Chameera,3,6,7,0,0,50.00,,,Namibia vs Sri Lanka,Namibia
1,,,,,,,,,,,Namibia vs Sri Lanka,Namibia
2,Divan la Cock,c Shanaka b Pramod Madushan,9,9,15,1,0,100.00,,,Namibia vs Sri Lanka,Namibia
3,,,,,,,,,,,Namibia vs Sri Lanka,Namibia
4,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20,12,18,1,2,166.66,,,Namibia vs Sri Lanka,Namibia
5,,,,,,,,,,,Namibia vs Sri Lanka,Namibia
6,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,49,2,0,108.33,,,Namibia vs Sri Lanka,Namibia
7,,,,,,,,,,,Namibia vs Sri Lanka,Namibia
8,Gerhard Erasmus (c),c Gunathilaka b PWH de Silva,20,24,30,0,0,83.33,,,Namibia vs Sri Lanka,Namibia
9,,,,,,,,,,,Namibia vs Sri Lanka,Namibia


In [121]:
battingsummary.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1568 entries, 0 to 1567
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   BATTING     1014 non-null   object 
 1   Unnamed: 1  1013 non-null   object 
 2   R           1014 non-null   object 
 3   B           846 non-null    object 
 4   M           846 non-null    object 
 5   4s          846 non-null    object 
 6   6s          846 non-null    object 
 7   SR          846 non-null    object 
 8   Unnamed: 8  147 non-null    object 
 9   Unnamed: 9  0 non-null      float64
 10  match       1568 non-null   object 
 11  team        1568 non-null   object 
dtypes: float64(1), object(11)
memory usage: 147.1+ KB


In [122]:
battingsummary = battingsummary.drop(['Unnamed: 8', "Unnamed: 9"], axis=1)
battingsummary.head()

Unnamed: 0,BATTING,Unnamed: 1,R,B,M,4s,6s,SR,match,team
0,Michael van Lingen,c Pramod Madushan b Chameera,3.0,6.0,7.0,0.0,0.0,50.0,Namibia vs Sri Lanka,Namibia
1,,,,,,,,,Namibia vs Sri Lanka,Namibia
2,Divan la Cock,c Shanaka b Pramod Madushan,9.0,9.0,15.0,1.0,0.0,100.0,Namibia vs Sri Lanka,Namibia
3,,,,,,,,,Namibia vs Sri Lanka,Namibia
4,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20.0,12.0,18.0,1.0,2.0,166.66,Namibia vs Sri Lanka,Namibia


In [123]:
battingsummary.isna().sum()

BATTING       554
Unnamed: 1    555
R             554
B             722
M             722
4s            722
6s            722
SR            722
match           0
team            0
dtype: int64

In [124]:
df = battingsummary[battingsummary[battingsummary.columns[0]].notnull()]

In [125]:
df.isna().sum()

BATTING         0
Unnamed: 1      1
R               0
B             168
M             168
4s            168
6s            168
SR            168
match           0
team            0
dtype: int64

In [126]:
to_drop = df[df['B'].isnull()].index
len(to_drop)

168

In [127]:
df.drop(to_drop, axis=0, inplace=True)
df.isna().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(to_drop, axis=0, inplace=True)


BATTING       0
Unnamed: 1    0
R             0
B             0
M             0
4s            0
6s            0
SR            0
match         0
team          0
dtype: int64

In [128]:
df.drop(df[df['BATTING'].str.contains('Did not bat') | df['BATTING'].str.contains('Fall of wickets')].index, inplace=True)
df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(df[df['BATTING'].str.contains('Did not bat') | df['BATTING'].str.contains('Fall of wickets')].index, inplace=True)


Unnamed: 0,BATTING,Unnamed: 1,R,B,M,4s,6s,SR,match,team
0,Michael van Lingen,c Pramod Madushan b Chameera,3,6,7,0,0,50.0,Namibia vs Sri Lanka,Namibia
2,Divan la Cock,c Shanaka b Pramod Madushan,9,9,15,1,0,100.0,Namibia vs Sri Lanka,Namibia
4,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20,12,18,1,2,166.66,Namibia vs Sri Lanka,Namibia
6,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,49,2,0,108.33,Namibia vs Sri Lanka,Namibia
8,Gerhard Erasmus (c),c Gunathilaka b PWH de Silva,20,24,30,0,0,83.33,Namibia vs Sri Lanka,Namibia


In [129]:
df.reset_index(drop=True,inplace=True)
df['match'] = df['match'].apply(lambda x: x.replace('United Arab Emirates','U.A.E.'  ))
df.head(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['match'] = df['match'].apply(lambda x: x.replace('United Arab Emirates','U.A.E.'  ))


Unnamed: 0,BATTING,Unnamed: 1,R,B,M,4s,6s,SR,match,team
0,Michael van Lingen,c Pramod Madushan b Chameera,3,6,7,0,0,50.00,Namibia vs Sri Lanka,Namibia
1,Divan la Cock,c Shanaka b Pramod Madushan,9,9,15,1,0,100.00,Namibia vs Sri Lanka,Namibia
2,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20,12,18,1,2,166.66,Namibia vs Sri Lanka,Namibia
3,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,49,2,0,108.33,Namibia vs Sri Lanka,Namibia
4,Gerhard Erasmus (c),c Gunathilaka b PWH de Silva,20,24,30,0,0,83.33,Namibia vs Sri Lanka,Namibia
5,Jan Frylinck,run out (Gunathilaka/†Mendis),44,28,44,4,0,157.14,Namibia vs Sri Lanka,Namibia
6,David Wiese,c †Mendis b Theekshana,0,1,4,0,0,0.00,Namibia vs Sri Lanka,Namibia
7,JJ Smit,not out,31,16,29,2,2,193.75,Namibia vs Sri Lanka,Namibia
8,Pathum Nissanka,c Smit b Shikongo,9,10,16,1,0,90.00,Namibia vs Sri Lanka,Sri Lanka
9,Kusal Mendis †,c †Green b Wiese,6,6,8,0,0,100.00,Namibia vs Sri Lanka,Sri Lanka


In [130]:
df.columns = ['player', 'dismissal', 'runs', 'balls', 'maiden', '4s', '6s', 'sr', 'match', 'team']
df.head(12)

Unnamed: 0,player,dismissal,runs,balls,maiden,4s,6s,sr,match,team
0,Michael van Lingen,c Pramod Madushan b Chameera,3,6,7,0,0,50.0,Namibia vs Sri Lanka,Namibia
1,Divan la Cock,c Shanaka b Pramod Madushan,9,9,15,1,0,100.0,Namibia vs Sri Lanka,Namibia
2,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20,12,18,1,2,166.66,Namibia vs Sri Lanka,Namibia
3,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,49,2,0,108.33,Namibia vs Sri Lanka,Namibia
4,Gerhard Erasmus (c),c Gunathilaka b PWH de Silva,20,24,30,0,0,83.33,Namibia vs Sri Lanka,Namibia
5,Jan Frylinck,run out (Gunathilaka/†Mendis),44,28,44,4,0,157.14,Namibia vs Sri Lanka,Namibia
6,David Wiese,c †Mendis b Theekshana,0,1,4,0,0,0.0,Namibia vs Sri Lanka,Namibia
7,JJ Smit,not out,31,16,29,2,2,193.75,Namibia vs Sri Lanka,Namibia
8,Pathum Nissanka,c Smit b Shikongo,9,10,16,1,0,90.0,Namibia vs Sri Lanka,Sri Lanka
9,Kusal Mendis †,c †Green b Wiese,6,6,8,0,0,100.0,Namibia vs Sri Lanka,Sri Lanka


In [131]:
df['out/notout'] = df['dismissal'].apply(lambda x: 'out' if len(x)>7 else 'notout')
df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['out/notout'] = df['dismissal'].apply(lambda x: 'out' if len(x)>7 else 'notout')


Unnamed: 0,player,dismissal,runs,balls,maiden,4s,6s,sr,match,team,out/notout
0,Michael van Lingen,c Pramod Madushan b Chameera,3,6,7,0,0,50.0,Namibia vs Sri Lanka,Namibia,out
1,Divan la Cock,c Shanaka b Pramod Madushan,9,9,15,1,0,100.0,Namibia vs Sri Lanka,Namibia,out
2,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20,12,18,1,2,166.66,Namibia vs Sri Lanka,Namibia,out
3,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,49,2,0,108.33,Namibia vs Sri Lanka,Namibia,out
4,Gerhard Erasmus (c),c Gunathilaka b PWH de Silva,20,24,30,0,0,83.33,Namibia vs Sri Lanka,Namibia,out
5,Jan Frylinck,run out (Gunathilaka/†Mendis),44,28,44,4,0,157.14,Namibia vs Sri Lanka,Namibia,out
6,David Wiese,c †Mendis b Theekshana,0,1,4,0,0,0.0,Namibia vs Sri Lanka,Namibia,out
7,JJ Smit,not out,31,16,29,2,2,193.75,Namibia vs Sri Lanka,Namibia,notout
8,Pathum Nissanka,c Smit b Shikongo,9,10,16,1,0,90.0,Namibia vs Sri Lanka,Sri Lanka,out
9,Kusal Mendis †,c †Green b Wiese,6,6,8,0,0,100.0,Namibia vs Sri Lanka,Sri Lanka,out


In [132]:
df.drop('dismissal', axis=1, inplace=True)
df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('dismissal', axis=1, inplace=True)


Unnamed: 0,player,runs,balls,maiden,4s,6s,sr,match,team,out/notout
0,Michael van Lingen,3,6,7,0,0,50.0,Namibia vs Sri Lanka,Namibia,out
1,Divan la Cock,9,9,15,1,0,100.0,Namibia vs Sri Lanka,Namibia,out
2,Jan Nicol Loftie-Eaton,20,12,18,1,2,166.66,Namibia vs Sri Lanka,Namibia,out
3,Stephan Baard,26,24,49,2,0,108.33,Namibia vs Sri Lanka,Namibia,out
4,Gerhard Erasmus (c),20,24,30,0,0,83.33,Namibia vs Sri Lanka,Namibia,out
5,Jan Frylinck,44,28,44,4,0,157.14,Namibia vs Sri Lanka,Namibia,out
6,David Wiese,0,1,4,0,0,0.0,Namibia vs Sri Lanka,Namibia,out
7,JJ Smit,31,16,29,2,2,193.75,Namibia vs Sri Lanka,Namibia,notout
8,Pathum Nissanka,9,10,16,1,0,90.0,Namibia vs Sri Lanka,Sri Lanka,out
9,Kusal Mendis †,6,6,8,0,0,100.0,Namibia vs Sri Lanka,Sri Lanka,out


In [133]:
df['match_id'] = df['match'].map(match_id_dict)
df.sample(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['match_id'] = df['match'].map(match_id_dict)


Unnamed: 0,player,runs,balls,maiden,4s,6s,sr,match,team,out/notout,match_id
267,Najmul Hossain Shanto,25,20,31,4,0,125.00,Bangladesh vs Netherlands,Bangladesh,out,T20I # 1843
348,Suryakumar Yadav,51,25,38,7,1,204.00,India vs Netherlands,India,notout,T20I # 1848
518,Roelof van der Merwe,0,3,6,0,0,0.00,Zimbabwe vs Netherlands,Netherlands,notout,T20I # 1859
8,Pathum Nissanka,9,10,16,1,0,90.00,Namibia vs Sri Lanka,Sri Lanka,out,T20I # 1823
16,Pramod Madushan,0,0,4,0,0,-,Namibia vs Sri Lanka,Sri Lanka,out,T20I # 1823
430,Fakhar Zaman,20,16,27,3,0,125.00,Netherlands vs Pakistan,Pakistan,out,T20I # 1852
63,Luke Jongwe,20,10,14,3,0,200.00,Zimbabwe vs Ireland,Zimbabwe,notout,T20I # 1828
26,Aayan Afzal Khan,5,7,8,0,0,71.42,U.A.E. vs Netherlands,United Arab Emirates,out,T20I # 1825
31,Bas de Leede,14,18,37,1,0,77.77,U.A.E. vs Netherlands,Netherlands,out,T20I # 1825
665,Glenn Phillips,6,8,9,1,0,75.00,New Zealand vs Pakistan,New Zealand,out,T20I # 1877


In [134]:
df.to_csv('data/processed data/battingsummary.csv', index=False)

## Bowling summary

In [135]:
bowlingsummary = pd.read_csv('data/bowlingsummary.csv')
bowlingsummary.head(10)

Unnamed: 0,BOWLING,O,M,R,W,ECON,0s,4s,6s,WD,NB,match,team
0,Maheesh Theekshana,4,0,23,1,5.75,7,0,0,2,0,Namibia vs Sri Lanka,Sri Lanka
1,"14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6","14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6",Namibia vs Sri Lanka,Sri Lanka
2,Dushmantha Chameera,4,0,39,1,9.75,6,3,1,2,0,Namibia vs Sri Lanka,Sri Lanka
3,"1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos","1.2 to M van Lingen, Massive top-edge and Chameera strikes second ball on return! Short of a good length on middle and leg, van Lingen looks for a pull, but the ball is not short enough. He gets a top-edge with the ball going to deep third, who takes a comfortable catch.. 6/1See all photos",Namibia vs Sri Lanka,Sri Lanka
4,Pramod Madushan,4,0,37,2,9.25,6,3,1,0,0,Namibia vs Sri Lanka,Sri Lanka
5,"2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...","2.6 to D la Cock, Leading edge and LA Cock departs! On a length, not a lot of shape but the batter is through the shot early. Closes the face of the bat just a touch as the ball catches the leading edge to balloon straight up. Shanaka stationed at mid-on takes a nice, easy catch and Sri Lanka are making inroads early.. 16/213.6 to SJ Baard, Great catch at deep backward square leg! It's a full toss, almost a waist high no-ball and Baard looks to whip this high towards deep backward square leg...",Namibia vs Sri Lanka,Sri Lanka
6,Chamika Karunaratne,4,0,36,1,9.00,7,3,1,1,0,Namibia vs Sri Lanka,Sri Lanka
7,"4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos","4.5 to JN Loftie-Eaton, WHAT A CATCH! Take a bow, Kusal Mendis! Slower off-cutter outside off, he looks for a drive on the up but is deceived by the slowness of it. The ball catches his outside edge and it seems will go past the keeper. But Mendis dives full-length to his left, almost behind him and just about catches it.. 35/3See all photos",Namibia vs Sri Lanka,Sri Lanka
8,Wanindu Hasaranga de Silva,4,0,27,1,6.75,8,1,1,0,0,Namibia vs Sri Lanka,Sri Lanka
9,"11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4","11.5 to MG Erasmus, But this time he's chanced his arm one too many times! This was a gimme from Hasaranga, the short googly, but Erasmus is almost too eager to go at this and swings hard. Only manages a top edge straight to deep midwicket running in. A good partnership comes to an end.. 76/4",Namibia vs Sri Lanka,Sri Lanka


In [136]:
bowlingsummary['BOWLING'][1]

"14.2 to D Wiese, Big appeal for caught behind, and the finger goes up! Batter seems to have a discussion with the non-striker and eventually goes for the review. And ultra edge shows a healthy spike as the ball passes the bat, review lost, and that's the big wicket of David Wiese too.. 93/6"

In [137]:
mask = bowlingsummary[bowlingsummary.columns[0]].str.match(re.compile(r'^\d+\D+\d+\D*'))

In [138]:
df = bowlingsummary[~mask]
df.head()

Unnamed: 0,BOWLING,O,M,R,W,ECON,0s,4s,6s,WD,NB,match,team
0,Maheesh Theekshana,4,0,23,1,5.75,7,0,0,2,0,Namibia vs Sri Lanka,Sri Lanka
2,Dushmantha Chameera,4,0,39,1,9.75,6,3,1,2,0,Namibia vs Sri Lanka,Sri Lanka
4,Pramod Madushan,4,0,37,2,9.25,6,3,1,0,0,Namibia vs Sri Lanka,Sri Lanka
6,Chamika Karunaratne,4,0,36,1,9.0,7,3,1,1,0,Namibia vs Sri Lanka,Sri Lanka
8,Wanindu Hasaranga de Silva,4,0,27,1,6.75,8,1,1,0,0,Namibia vs Sri Lanka,Sri Lanka


In [139]:
df.reset_index(drop=True, inplace=True)

In [140]:
df['match'] = df['match'].apply(lambda x: x.replace('United Arab Emirates','U.A.E.'))
df.sample(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['match'] = df['match'].apply(lambda x: x.replace('United Arab Emirates','U.A.E.'))


Unnamed: 0,BOWLING,O,M,R,W,ECON,0s,4s,6s,WD,NB,match,team
372,Hasan Mahmud,4.0,0,47,3,11.75,7,6,2,0,1,India vs Bangladesh,Bangladesh
368,Sikandar Raza,1.0,0,6,0,6.0,0,0,0,0,0,Zimbabwe vs Netherlands,Zimbabwe
350,Ish Sodhi,4.0,0,23,1,5.75,10,2,0,0,0,England vs New Zealand,New Zealand
184,Hardik Pandya,4.0,0,30,3,7.5,10,4,0,0,0,Pakistan vs India,India
410,Pat Cummins,4.0,0,22,0,5.5,12,2,0,0,0,Australia vs Afghanistan,Australia
156,Lockie Ferguson,3.0,0,20,1,6.66,9,2,1,0,0,New Zealand vs Australia,New Zealand
158,Ben Stokes,4.0,0,19,2,4.75,14,2,0,1,0,Afghanistan vs England,England
460,Arshdeep Singh,2.0,0,9,1,4.5,9,1,0,2,0,India vs Zimbabwe,India
116,Jan Frylinck,3.0,0,30,0,10.0,3,2,1,1,0,U.A.E. vs Namibia,Namibia
211,Richard Ngarava,1.0,0,17,0,17.0,2,4,0,0,1,Zimbabwe vs South Africa,Zimbabwe


In [141]:
df.columns = ['player', 'overs', 'maiden', 'runs', 'wickets', 'econ', '0s', '4s', '6s', 'wides', 'noballs', 'match', 'team']
df.head(12)

Unnamed: 0,player,overs,maiden,runs,wickets,econ,0s,4s,6s,wides,noballs,match,team
0,Maheesh Theekshana,4,0,23,1,5.75,7,0,0,2,0,Namibia vs Sri Lanka,Sri Lanka
1,Dushmantha Chameera,4,0,39,1,9.75,6,3,1,2,0,Namibia vs Sri Lanka,Sri Lanka
2,Pramod Madushan,4,0,37,2,9.25,6,3,1,0,0,Namibia vs Sri Lanka,Sri Lanka
3,Chamika Karunaratne,4,0,36,1,9.0,7,3,1,1,0,Namibia vs Sri Lanka,Sri Lanka
4,Wanindu Hasaranga de Silva,4,0,27,1,6.75,8,1,1,0,0,Namibia vs Sri Lanka,Sri Lanka
5,Gerhard Erasmus,1,0,8,0,8.0,1,1,0,0,0,Namibia vs Sri Lanka,Namibia
6,David Wiese,4,0,16,2,4.0,13,1,0,0,0,Namibia vs Sri Lanka,Namibia
7,Bernard Scholtz,4,0,18,2,4.5,10,1,0,0,0,Namibia vs Sri Lanka,Namibia
8,Ben Shikongo,3,1,22,2,7.33,6,3,0,0,0,Namibia vs Sri Lanka,Namibia
9,JJ Smit,3,0,16,1,5.33,7,0,0,1,0,Namibia vs Sri Lanka,Namibia


In [142]:
df['match_id'] = df['match'].map(match_id_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['match_id'] = df['match'].map(match_id_dict)


In [145]:
df.sample(20)

Unnamed: 0,player,overs,maiden,runs,wickets,econ,0s,4s,6s,wides,noballs,match,team,match_id
406,Gulbadin Naib,3.0,0,31,0,10.33,6,3,2,0,0,Australia vs Afghanistan,Afghanistan,T20I # 1864
448,Nasum Ahmed,4.0,0,14,1,3.5,13,0,0,2,0,Bangladesh vs Pakistan,Bangladesh,T20I # 1872
209,Anrich Nortje,2.0,0,10,1,5.0,5,0,0,0,0,Zimbabwe vs South Africa,South Africa,T20I # 1844
432,Keshav Maharaj,4.0,0,27,2,6.75,12,1,2,0,0,Netherlands vs South Africa,South Africa,T20I # 1871
178,Gareth Delany,4.0,0,28,1,7.0,8,3,0,2,0,Ireland vs Sri Lanka,Ireland,T20I # 1841
232,Liam Livingstone,3.0,0,17,3,5.66,5,1,0,0,0,Ireland vs England,England,T20I # 1846
118,Basil Hameed,3.0,0,17,2,5.66,10,2,0,0,0,U.A.E. vs Namibia,United Arab Emirates,T20I # 1836
222,Lahiru Kumara,3.3,0,22,0,6.28,15,0,1,3,0,Sri Lanka vs Australia,Sri Lanka,T20I # 1845
16,Roelof van der Merwe,3.0,0,19,1,6.33,7,0,1,0,0,U.A.E. vs Netherlands,Netherlands,T20I # 1825
309,Roelof van der Merwe,1.0,0,7,0,7.0,1,0,0,0,0,Netherlands vs Pakistan,Netherlands,T20I # 1852


In [143]:
df.to_csv('data/processed data/bowlingsummary.csv', index=False)