# Data Analysis with Python

### References
#### Data Source: https://stats.espncricinfo.com/ci/content/records/93276.html

#### Importing Libraries

In [1]:
import pandas as pd
import numpy as np

#### Read csv file


In [2]:
df = pd.read_csv("wickets.csv" , encoding ='unicode_escape')



### DIsplay the first 10 rows of the data frame

In [3]:
display(df.head(10))

Unnamed: 0,Player,Span,Mat,Inns,Balls,Runs,Wkts,BBI,BBM,Ave,Econ,SR,5,10
0,M Muralitharan (ICC/SL),1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22
1,SK Warne (AUS),1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10
2,JM Anderson (ENG),2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3
3,A Kumble (INDIA),1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8
4,GD McGrath (AUS),1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3
5,SCJ Broad (ENG),2007-2021,149,274,29863,14590,524,8/15,11/121,27.84,2.93,56.9,18,3
6,CA Walsh (WI),1984-2001,132,242,30019,12688,519,7/37,13/55,24.44,2.53,57.8,22,3
7,DW Steyn (SA),2004-2019,93,171,18608,10077,439,7/51,11/60,22.95,3.24,42.3,26,5
8,N Kapil Dev (INDIA),1978-1994,131,227,27740,12867,434,9/83,11/146,29.64,2.78,63.9,23,2
9,HMRKB Herath (SL),1999-2018,93,170,25993,12157,433,9/127,14/184,28.07,2.8,60.0,34,9


#### Player = Name of the test cricket player (Country Name)
#### Span = Career span of playing test cricket for the player
#### Mat = Total match played by the player
#### Inns = Total Innings played by the player
#### Balls = Total No of balls bowled by the player
#### Runs = Total runs conceded by the player
#### Wkts = Total wickets taken by the player
#### BBI = Best Innings Bowling (Wickets taken/Runs Conceded)
#### BBM = Best Match Bowling (Wickets taken/Runs Conceded)
#### Ave = Bowling Average
#### Econ = Economy rate (average runs per over)
#### SR = Bowling Strike Rate
#### 5 = No of total 5 Wickets taken in an Innings
#### 5 = No of total 10 Wickets taken in an Innings

### Number of rows and columns in dataframe

In [4]:
print('number of rows = ' , df.shape[0])
print('number of columns = ' , df.shape[1])

number of rows =  79
number of columns =  14


### Data types

In [5]:
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  79 non-null     object 
 1   Span    79 non-null     object 
 2   Mat     79 non-null     object 
 3   Inns    79 non-null     int64  
 4   Balls   79 non-null     int64  
 5   Runs    79 non-null     int64  
 6   Wkts    79 non-null     int64  
 7   BBI     79 non-null     object 
 8   BBM     79 non-null     object 
 9   Ave     79 non-null     float64
 10  Econ    79 non-null     float64
 11  SR      79 non-null     float64
 12  5       79 non-null     int64  
 13  10      79 non-null     int64  
dtypes: float64(3), int64(6), object(5)
memory usage: 8.8+ KB
None


### Data Statistics

In [6]:
display(df.describe())

Unnamed: 0,Inns,Balls,Runs,Wkts,Ave,Econ,SR,5,10
count,79.0,79.0,79.0,79.0,79.0,79.0,79.0,79.0,79.0
mean,144.911392,18638.35443,8599.35443,317.21519,27.469747,2.806835,59.193671,16.35443,2.797468
std,51.180222,7199.256972,3085.168807,121.924911,3.655658,0.351577,9.350132,9.642372,3.235935
min,67.0,8785.0,4846.0,200.0,20.94,1.98,41.2,3.0,0.0
25%,110.0,13583.0,6456.5,229.0,24.5,2.6,53.3,9.5,1.0
50%,129.0,16498.0,7742.0,266.0,28.0,2.82,57.4,14.0,2.0
75%,169.0,21742.5,9756.0,374.5,29.87,3.08,63.95,20.5,3.5
max,304.0,44039.0,18355.0,800.0,34.79,3.46,91.9,67.0,22.0


### No missing values present in dataset

### Rename the column names appropriately

In [7]:
print(df.columns)

Index(['Player', 'Span', 'Mat', 'Inns', 'Balls', 'Runs', 'Wkts', 'BBI', 'BBM',
       'Ave', 'Econ', 'SR', '5', '10'],
      dtype='object')


In [8]:
df = df.rename(columns = {'Mat' : 'Match',
                         'Inns' : 'Innings',
                         'Wkts' : 'Wickets',
                          'BBI' : 'Best_Innings_Bowling',
                          'BBM' : 'Best_Match_Bowling',
                         'Ave' : 'Average',
                         'Econ' : 'Economy',
                         'SR' : 'Strike_Rate',
                         '5' : 'Five_Wickets',
                         '10' : 'Ten_Wickets'})
display(df.head())

Unnamed: 0,Player,Span,Match,Innings,Balls,Runs,Wickets,Best_Innings_Bowling,Best_Match_Bowling,Average,Economy,Strike_Rate,Five_Wickets,Ten_Wickets
0,M Muralitharan (ICC/SL),1992-2010,133,230,44039,18180,800,9/51,16/220,22.72,2.47,55.0,67,22
1,SK Warne (AUS),1992-2007,145,273,40705,17995,708,8/71,12/128,25.41,2.65,57.4,37,10
2,JM Anderson (ENG),2003-2021,164*,304,35079,16575,623,7/42,11/71,26.6,2.83,56.3,30,3
3,A Kumble (INDIA),1990-2008,132,236,40850,18355,619,10/74,14/149,29.65,2.69,65.9,35,8
4,GD McGrath (AUS),1993-2007,124,243,29248,12186,563,8/24,10/27,21.64,2.49,51.9,29,3


### Remove a column(Runs) from the dataframe

In [9]:
df.drop('Runs', axis=1)

Unnamed: 0,Player,Span,Match,Innings,Balls,Wickets,Best_Innings_Bowling,Best_Match_Bowling,Average,Economy,Strike_Rate,Five_Wickets,Ten_Wickets
0,M Muralitharan (ICC/SL),1992-2010,133,230,44039,800,9/51,16/220,22.72,2.47,55.0,67,22
1,SK Warne (AUS),1992-2007,145,273,40705,708,8/71,12/128,25.41,2.65,57.4,37,10
2,JM Anderson (ENG),2003-2021,164*,304,35079,623,7/42,11/71,26.60,2.83,56.3,30,3
3,A Kumble (INDIA),1990-2008,132,236,40850,619,10/74,14/149,29.65,2.69,65.9,35,8
4,GD McGrath (AUS),1993-2007,124,243,29248,563,8/24,10/27,21.64,2.49,51.9,29,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,SCG MacGill (AUS),1998-2008,44,85,11237,208,8/108,12/107,29.02,3.22,54.0,12,2
75,Saqlain Mushtaq (PAK),1995-2004,49,86,14070,208,8/164,10/155,29.83,2.64,67.6,13,3
76,AME Roberts (WI),1974-1983,47,90,11135,202,7/54,12/121,25.61,2.78,55.1,11,2
77,JA Snow (ENG),1965-1976,49,93,12021,202,7/40,10/142,26.66,2.68,59.5,8,1
