# 1. Loading the Data and Knocking into Shape

Import relevant modules and import initial datasets

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

Load data

In [2]:
#Load different dataseets
xls = pd.ExcelFile(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\India Premier League - Sold and Unsold -  Auction Prices Dec 2022 for 2023 season.xlsx')
SoldIPL = pd.read_excel(xls, 'Sold Dec 2022')
UnSoldIPL = pd.read_excel(xls, 'Unsold Dec 2022')


In [3]:
#Look at dataset of players sold in IPL auction in Dec 2022
SoldIPL.info()
SoldIPL.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 204 entries, 0 to 203
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   PLAYER         204 non-null    object
 1   Country        204 non-null    object
 2   Team           204 non-null    object
 3   Type           204 non-null    object
 4   Auction_price  204 non-null    object
dtypes: object(5)
memory usage: 8.1+ KB


Unnamed: 0,PLAYER,Country,Team,Type,Auction_price
0,Deepak Chahar,India,Chennai Super Kings,Bowler,"₹14,00,00,000"
1,Ambati Rayudu,India,Chennai Super Kings,Wicket Keeper,"₹6,75,00,000"
2,Dwayne Bravo,West Indies,Chennai Super Kings,All-Rounder,"₹4,40,00,000"
3,Shivam Dube,India,Chennai Super Kings,All-Rounder,"₹4,00,00,000"
4,Chris Jordan,England,Chennai Super Kings,All-Rounder,"₹3,60,00,000"


Auction prices are in Indian rupees, and different numbering system:
One Crore = 10m Rupees = £99720 (so multiply rupees by 0.009972 to convert to pounds)
One lakh = 100,000 Rupees

In [4]:
#Remove rupee symbol
SoldIPL['Auction_price'] = SoldIPL['Auction_price'].apply(lambda x: x.replace('₹','')).apply(lambda x: x.replace(',','')).astype(int)





In [5]:
#Looks good
SoldIPL.head()



Unnamed: 0,PLAYER,Country,Team,Type,Auction_price
0,Deepak Chahar,India,Chennai Super Kings,Bowler,140000000
1,Ambati Rayudu,India,Chennai Super Kings,Wicket Keeper,67500000
2,Dwayne Bravo,West Indies,Chennai Super Kings,All-Rounder,44000000
3,Shivam Dube,India,Chennai Super Kings,All-Rounder,40000000
4,Chris Jordan,England,Chennai Super Kings,All-Rounder,36000000


In [6]:
SoldIPL.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 204 entries, 0 to 203
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   PLAYER         204 non-null    object
 1   Country        204 non-null    object
 2   Team           204 non-null    object
 3   Type           204 non-null    object
 4   Auction_price  204 non-null    int32 
dtypes: int32(1), object(4)
memory usage: 7.3+ KB


In [7]:
#Convert auction price to pounds sterling
SoldIPL['Auc_price_pounds'] = SoldIPL['Auction_price']*0.009972
SoldIPL.rename(columns = {'Auction_price':'Auction-Base_price', 'Auc_price_pounds':'Auc-Base_price_pounds'}, inplace = True)
SoldIPL.head()

Unnamed: 0,PLAYER,Country,Team,Type,Auction-Base_price,Auc-Base_price_pounds
0,Deepak Chahar,India,Chennai Super Kings,Bowler,140000000,1396080.0
1,Ambati Rayudu,India,Chennai Super Kings,Wicket Keeper,67500000,673110.0
2,Dwayne Bravo,West Indies,Chennai Super Kings,All-Rounder,44000000,438768.0
3,Shivam Dube,India,Chennai Super Kings,All-Rounder,40000000,398880.0
4,Chris Jordan,England,Chennai Super Kings,All-Rounder,36000000,358992.0


In [8]:
#And those that weren't sold
UnSoldIPL.info()
UnSoldIPL.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396 entries, 0 to 395
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   PLAYER      396 non-null    object 
 1   Country     396 non-null    object 
 2   Team        396 non-null    object 
 3   Type        396 non-null    object 
 4   Base Price  396 non-null    object 
 5   Unnamed: 5  0 non-null      float64
 6   Unnamed: 6  1 non-null      object 
dtypes: float64(1), object(6)
memory usage: 21.8+ KB


Unnamed: 0,PLAYER,Country,Team,Type,Base Price,Unnamed: 5,Unnamed: 6
0,Suresh Raina,India,Unsold,Batsman,"₹2,00,00,000",,
1,Steve Smith,Australia,Unsold,Batsman,"₹2,00,00,000",,
2,Shakib Al Hasan,Bangladesh,Unsold,All-Rounder,"₹2,00,00,000",,
3,Adil Rashid,England,Unsold,Bowler,"₹2,00,00,000",,
4,Imran Tahir,South Africa,Unsold,Bowler,"₹2,00,00,000",,


In [9]:
#Remove rupee symbol and convert to pounds
UnSoldIPL['Base Price'] = UnSoldIPL['Base Price'].apply(lambda x: x.replace('₹','')).apply(lambda x: x.replace(',','')).astype(int)
UnSoldIPL['Base_Price_Pounds'] = UnSoldIPL['Base Price']*0.009972
UnSoldIPL.head()

Unnamed: 0,PLAYER,Country,Team,Type,Base Price,Unnamed: 5,Unnamed: 6,Base_Price_Pounds
0,Suresh Raina,India,Unsold,Batsman,20000000,,,199440.0
1,Steve Smith,Australia,Unsold,Batsman,20000000,,,199440.0
2,Shakib Al Hasan,Bangladesh,Unsold,All-Rounder,20000000,,,199440.0
3,Adil Rashid,England,Unsold,Bowler,20000000,,,199440.0
4,Imran Tahir,South Africa,Unsold,Bowler,20000000,,,199440.0


In [10]:
#Rename columns and drop two unnamed ones
UnSoldIPL.rename(columns = {'Base Price':'Auction-Base_price', 'Base_Price_Pounds':'Auc-Base_price_pounds'}, inplace = True)
UnSoldIPL.drop(['Unnamed: 5', 'Unnamed: 6'], axis=1, inplace=True)
UnSoldIPL.head()

Unnamed: 0,PLAYER,Country,Team,Type,Auction-Base_price,Auc-Base_price_pounds
0,Suresh Raina,India,Unsold,Batsman,20000000,199440.0
1,Steve Smith,Australia,Unsold,Batsman,20000000,199440.0
2,Shakib Al Hasan,Bangladesh,Unsold,All-Rounder,20000000,199440.0
3,Adil Rashid,England,Unsold,Bowler,20000000,199440.0
4,Imran Tahir,South Africa,Unsold,Bowler,20000000,199440.0


In [11]:
#Concatenate the sold and unsold IPL players
SoldUnSold = pd.concat([SoldIPL, UnSoldIPL], ignore_index=True)
SoldUnSold.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   PLAYER                 600 non-null    object 
 1   Country                600 non-null    object 
 2   Team                   600 non-null    object 
 3   Type                   600 non-null    object 
 4   Auction-Base_price     600 non-null    int32  
 5   Auc-Base_price_pounds  600 non-null    float64
dtypes: float64(1), int32(1), object(4)
memory usage: 25.9+ KB


In [12]:
#Find key stats and characteristics of SoldUnSold dataset
SoldUnSold.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
PLAYER,600.0,596.0,Shubham Singh,2.0,,,,,,,
Country,600.0,18.0,India,375.0,,,,,,,
Team,600.0,11.0,Unsold,396.0,,,,,,,
Type,600.0,4.0,All-Rounder,232.0,,,,,,,
Auction-Base_price,600.0,,,,11779166.666667,22460593.931859,2000000.0,2000000.0,2000000.0,7500000.0,152500000.0
Auc-Base_price_pounds,600.0,,,,117461.85,223977.042688,19944.0,19944.0,19944.0,74790.0,1520730.0


In [13]:
#Assess levels of missing data. Use pd.concat to put into tabular form
missing = pd.concat([SoldUnSold.isnull().sum(), 100 * SoldUnSold.isnull().mean()], axis=1)
missing.columns=['count', '%']
missing.sort_values(by='count')

Unnamed: 0,count,%
PLAYER,0,0.0
Country,0,0.0
Team,0,0.0
Type,0,0.0
Auction-Base_price,0,0.0
Auc-Base_price_pounds,0,0.0


In [14]:
#Bring in the other datasets
IPLBatStat = pd.read_csv(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\IPL - BATTING ALL TIME .csv')
IPLBowlStat = pd.read_excel(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\ipl BOWLING ALL TIME.xlsx')
IPLMVPStat = pd.read_excel(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\ipl - most valuable player - 2022 season.xlsx')
IntCrickSals = pd.read_excel(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\International Mens Cricket Central Contract Salaries - 2021 22.xlsx')


When look through different IPL datasets below, looks like it could be worth renaming POS columns to indicate whether batting, bowling, MVP, as is a ranking system, and is another measure of worth or value.

In [15]:
#All time batting stats for IPL - data types look good, but lots of blank rows
IPLBatStat.info()
IPLBatStat.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 505 entries, 0 to 504
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   POS     126 non-null    float64
 1   Player  126 non-null    object 
 2   Mat     126 non-null    float64
 3   Inns    126 non-null    float64
 4   NO      126 non-null    float64
 5   Runs    126 non-null    float64
 6   HS      126 non-null    object 
 7   Avg     126 non-null    float64
 8   BF      126 non-null    float64
 9   SR      126 non-null    float64
 10  100     126 non-null    float64
 11  50      126 non-null    float64
 12  4s      126 non-null    float64
 13  6s      126 non-null    float64
dtypes: float64(12), object(2)
memory usage: 55.4+ KB


Unnamed: 0,POS,Player,Mat,Inns,NO,Runs,HS,Avg,BF,SR,100,50,4s,6s
0,1.0,Virat Kohli,223.0,215.0,32.0,6624.0,113,36.2,5129.0,129.15,5.0,44.0,578.0,218.0
1,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,
5,2.0,Shikhar Dhawan,206.0,205.0,27.0,6244.0,106*,35.08,4942.0,126.35,2.0,47.0,701.0,136.0
6,,,,,,,,,,,,,,
7,,,,,,,,,,,,,,
8,,,,,,,,,,,,,,
9,3.0,David Warner,162.0,162.0,22.0,5881.0,126,42.01,4180.0,140.69,4.0,54.0,561.0,211.0


In [16]:
#And thus lot of missing data
missing = pd.concat([IPLBatStat.isnull().sum(), 100 * IPLBatStat.isnull().mean()], axis=1)
missing.columns=['count', '%']
missing.sort_values(by='count')

Unnamed: 0,count,%
POS,379,75.049505
Player,379,75.049505
Mat,379,75.049505
Inns,379,75.049505
NO,379,75.049505
Runs,379,75.049505
HS,379,75.049505
Avg,379,75.049505
BF,379,75.049505
SR,379,75.049505


In [17]:
#Drop rows with missing values
IPLBatStat.dropna(inplace=True)

In [18]:
#Looks better
IPLBatStat.head(10)


Unnamed: 0,POS,Player,Mat,Inns,NO,Runs,HS,Avg,BF,SR,100,50,4s,6s
0,1.0,Virat Kohli,223.0,215.0,32.0,6624.0,113,36.2,5129.0,129.15,5.0,44.0,578.0,218.0
5,2.0,Shikhar Dhawan,206.0,205.0,27.0,6244.0,106*,35.08,4942.0,126.35,2.0,47.0,701.0,136.0
9,3.0,David Warner,162.0,162.0,22.0,5881.0,126,42.01,4180.0,140.69,4.0,54.0,561.0,211.0
13,4.0,Rohit Sharma,227.0,222.0,28.0,5879.0,109*,30.3,4526.0,129.89,1.0,40.0,519.0,240.0
17,5.0,Suresh Raina,205.0,200.0,30.0,5528.0,100*,32.52,4042.0,136.76,1.0,39.0,506.0,203.0
21,6.0,AB de Villiers,184.0,170.0,40.0,5162.0,133*,39.7,3403.0,151.68,3.0,40.0,413.0,251.0
25,7.0,MS Dhoni,234.0,206.0,79.0,4978.0,84*,39.2,3682.0,135.2,0.0,24.0,346.0,229.0
29,8.0,Chris Gayle,142.0,141.0,16.0,4965.0,175*,39.72,3333.0,148.96,6.0,31.0,405.0,357.0
33,9.0,Robin Uthappa,205.0,197.0,17.0,4952.0,88,27.51,3799.0,130.35,0.0,27.0,481.0,182.0
37,10.0,Dinesh Karthik,229.0,208.0,45.0,4376.0,97*,26.85,3299.0,132.65,0.0,19.0,406.0,125.0


In [19]:
#And right number of players, etc
IPLBatStat.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 126 entries, 0 to 501
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   POS     126 non-null    float64
 1   Player  126 non-null    object 
 2   Mat     126 non-null    float64
 3   Inns    126 non-null    float64
 4   NO      126 non-null    float64
 5   Runs    126 non-null    float64
 6   HS      126 non-null    object 
 7   Avg     126 non-null    float64
 8   BF      126 non-null    float64
 9   SR      126 non-null    float64
 10  100     126 non-null    float64
 11  50      126 non-null    float64
 12  4s      126 non-null    float64
 13  6s      126 non-null    float64
dtypes: float64(12), object(2)
memory usage: 14.8+ KB


In [20]:
#Rest index
IPLBatStat = IPLBatStat.reset_index(drop = True)
IPLBatStat.head(10)

Unnamed: 0,POS,Player,Mat,Inns,NO,Runs,HS,Avg,BF,SR,100,50,4s,6s
0,1.0,Virat Kohli,223.0,215.0,32.0,6624.0,113,36.2,5129.0,129.15,5.0,44.0,578.0,218.0
1,2.0,Shikhar Dhawan,206.0,205.0,27.0,6244.0,106*,35.08,4942.0,126.35,2.0,47.0,701.0,136.0
2,3.0,David Warner,162.0,162.0,22.0,5881.0,126,42.01,4180.0,140.69,4.0,54.0,561.0,211.0
3,4.0,Rohit Sharma,227.0,222.0,28.0,5879.0,109*,30.3,4526.0,129.89,1.0,40.0,519.0,240.0
4,5.0,Suresh Raina,205.0,200.0,30.0,5528.0,100*,32.52,4042.0,136.76,1.0,39.0,506.0,203.0
5,6.0,AB de Villiers,184.0,170.0,40.0,5162.0,133*,39.7,3403.0,151.68,3.0,40.0,413.0,251.0
6,7.0,MS Dhoni,234.0,206.0,79.0,4978.0,84*,39.2,3682.0,135.2,0.0,24.0,346.0,229.0
7,8.0,Chris Gayle,142.0,141.0,16.0,4965.0,175*,39.72,3333.0,148.96,6.0,31.0,405.0,357.0
8,9.0,Robin Uthappa,205.0,197.0,17.0,4952.0,88,27.51,3799.0,130.35,0.0,27.0,481.0,182.0
9,10.0,Dinesh Karthik,229.0,208.0,45.0,4376.0,97*,26.85,3299.0,132.65,0.0,19.0,406.0,125.0


In [21]:
#But no missing data now
missing = pd.concat([IPLBatStat.isnull().sum(), 100 * IPLBatStat.isnull().mean()], axis=1)
missing.columns=['count', '%']
missing.sort_values(by='count')

Unnamed: 0,count,%
POS,0,0.0
Player,0,0.0
Mat,0,0.0
Inns,0,0.0
NO,0,0.0
Runs,0,0.0
HS,0,0.0
Avg,0,0.0
BF,0,0.0
SR,0,0.0


In [22]:
IPLBatStat.rename(columns = {'POS':'IPL_Bat_Rank'}, inplace = True)


In [23]:
IPLBatStat.head()

Unnamed: 0,IPL_Bat_Rank,Player,Mat,Inns,NO,Runs,HS,Avg,BF,SR,100,50,4s,6s
0,1.0,Virat Kohli,223.0,215.0,32.0,6624.0,113,36.2,5129.0,129.15,5.0,44.0,578.0,218.0
1,2.0,Shikhar Dhawan,206.0,205.0,27.0,6244.0,106*,35.08,4942.0,126.35,2.0,47.0,701.0,136.0
2,3.0,David Warner,162.0,162.0,22.0,5881.0,126,42.01,4180.0,140.69,4.0,54.0,561.0,211.0
3,4.0,Rohit Sharma,227.0,222.0,28.0,5879.0,109*,30.3,4526.0,129.89,1.0,40.0,519.0,240.0
4,5.0,Suresh Raina,205.0,200.0,30.0,5528.0,100*,32.52,4042.0,136.76,1.0,39.0,506.0,203.0


It's also worth noting that HS (highest score) in this batting table is an object type, presumably because some numbers have an asterisk next to them, indicating that the highest score was "not out", so usually seen as having a bit more worth/value, due to the batsman not having lost his wicket. Therefore, will need to create another column to indicate whether or not the high score was not out. Will then have to apply this to other batting tables.

In [24]:
HS_was_NO = []
for score in IPLBatStat['HS']:
    if '*' in score:
        HS_was_NO.append(True)
    else:
        HS_was_NO.append(False)
IPLBatStat['HS_was_NO'] = HS_was_NO
IPLBatStat.head()

Unnamed: 0,IPL_Bat_Rank,Player,Mat,Inns,NO,Runs,HS,Avg,BF,SR,100,50,4s,6s,HS_was_NO
0,1.0,Virat Kohli,223.0,215.0,32.0,6624.0,113,36.2,5129.0,129.15,5.0,44.0,578.0,218.0,False
1,2.0,Shikhar Dhawan,206.0,205.0,27.0,6244.0,106*,35.08,4942.0,126.35,2.0,47.0,701.0,136.0,True
2,3.0,David Warner,162.0,162.0,22.0,5881.0,126,42.01,4180.0,140.69,4.0,54.0,561.0,211.0,False
3,4.0,Rohit Sharma,227.0,222.0,28.0,5879.0,109*,30.3,4526.0,129.89,1.0,40.0,519.0,240.0,True
4,5.0,Suresh Raina,205.0,200.0,30.0,5528.0,100*,32.52,4042.0,136.76,1.0,39.0,506.0,203.0,True


In [25]:
#Remove asterisk from High Score and convert type to float64
IPLBatStat['HS'] = IPLBatStat['HS'].apply(lambda x: x.replace('*','')).astype(float)
IPLBatStat.info()
IPLBatStat.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126 entries, 0 to 125
Data columns (total 15 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   IPL_Bat_Rank  126 non-null    float64
 1   Player        126 non-null    object 
 2   Mat           126 non-null    float64
 3   Inns          126 non-null    float64
 4   NO            126 non-null    float64
 5   Runs          126 non-null    float64
 6   HS            126 non-null    float64
 7   Avg           126 non-null    float64
 8   BF            126 non-null    float64
 9   SR            126 non-null    float64
 10  100           126 non-null    float64
 11  50            126 non-null    float64
 12  4s            126 non-null    float64
 13  6s            126 non-null    float64
 14  HS_was_NO     126 non-null    bool   
dtypes: bool(1), float64(13), object(1)
memory usage: 14.0+ KB


Unnamed: 0,IPL_Bat_Rank,Player,Mat,Inns,NO,Runs,HS,Avg,BF,SR,100,50,4s,6s,HS_was_NO
0,1.0,Virat Kohli,223.0,215.0,32.0,6624.0,113.0,36.2,5129.0,129.15,5.0,44.0,578.0,218.0,False
1,2.0,Shikhar Dhawan,206.0,205.0,27.0,6244.0,106.0,35.08,4942.0,126.35,2.0,47.0,701.0,136.0,True
2,3.0,David Warner,162.0,162.0,22.0,5881.0,126.0,42.01,4180.0,140.69,4.0,54.0,561.0,211.0,False
3,4.0,Rohit Sharma,227.0,222.0,28.0,5879.0,109.0,30.3,4526.0,129.89,1.0,40.0,519.0,240.0,True
4,5.0,Suresh Raina,205.0,200.0,30.0,5528.0,100.0,32.52,4042.0,136.76,1.0,39.0,506.0,203.0,True


In [26]:
#Make HS-NO changes into a function
def HS_NO_col(df, colname):
    '''Function to check which high scores were not-out scores
    and then create column with boolean True/False to indicate, 
    so that originaL HS col can have asterisks removed 
    and data type changed to float. Takes two arguments: dataframe 
    working on, and column name'''
    HS_was_NO = []
    for score in colname:
        if '*' in score:
            HS_was_NO.append(True)
        else:
            HS_was_NO.append(False)
    df['HS_was_NO'] = HS_was_NO
    df['colname'] = df['colname'].apply(lambda x: x.replace('*','')).astype(float)

In [27]:
#All time bowling data from IPL - same issues as with batting data
IPLBowlStat.info()
IPLBowlStat.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 668 entries, 0 to 667
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   POS     168 non-null    float64
 1   Player  168 non-null    object 
 2   Mat     168 non-null    float64
 3   Inns    168 non-null    float64
 4   Ov      168 non-null    float64
 5   Runs    168 non-null    float64
 6   Wkts    168 non-null    float64
 7   BBI     168 non-null    object 
 8   Avg     168 non-null    float64
 9   Econ    168 non-null    float64
 10  SR      168 non-null    float64
dtypes: float64(9), object(2)
memory usage: 57.5+ KB


Unnamed: 0,POS,Player,Mat,Inns,Ov,Runs,Wkts,BBI,Avg,Econ,SR
0,1.0,Dwayne Bravo,161.0,158.0,516.0,4359.0,183.0,2022-04-01 00:00:00,23.82,8.38,17.05
1,,,,,,,,,,,
2,,,,,,,,,,,
3,,,,,,,,,,,
4,,,,,,,,,,,


In [28]:
IPLBowlStat.dropna(inplace=True)
IPLBowlStat = IPLBowlStat.reset_index(drop = True)
IPLBowlStat.head(10)

Unnamed: 0,POS,Player,Mat,Inns,Ov,Runs,Wkts,BBI,Avg,Econ,SR
0,1.0,Dwayne Bravo,161.0,158.0,516.0,4359.0,183.0,2022-04-01 00:00:00,23.82,8.38,17.05
1,2.0,Lasith Malinga,122.0,122.0,471.0,3366.0,170.0,2013-05-01 00:00:00,19.8,7.14,16.63
2,3.0,Amit Mishra,154.0,154.0,540.0,3980.0,166.0,2017-05-01 00:00:00,23.98,7.36,19.55
3,4.0,Yuzvendra Chahal,131.0,130.0,475.0,3624.0,166.0,1940-05-01 00:00:00,21.83,7.61,17.2
4,5.0,Piyush Chawla,165.0,164.0,545.0,4301.0,157.0,2017-04-01 00:00:00,27.39,7.88,20.85
5,6.0,Ravichandran Ashwin,184.0,181.0,649.0,4535.0,157.0,1934-04-01 00:00:00,28.89,6.98,24.84
6,7.0,Bhuvneshwar Kumar,146.0,146.0,542.0,3971.0,154.0,2019-05-01 00:00:00,25.79,7.3,21.18
7,8.0,Sunil Narine,148.0,147.0,576.0,3820.0,152.0,2019-05-01 00:00:00,25.13,6.63,22.75
8,9.0,Harbhajan Singh,163.0,160.0,569.0,4030.0,150.0,2018-05-01 00:00:00,26.87,7.08,22.77
9,10.0,Jasprit Bumrah,120.0,120.0,455.0,3378.0,145.0,2023-10-05 00:00:00,23.3,7.39,18.91


In [29]:
IPLBowlStat.rename(columns = {'POS':'IPL_Bowl_Rank'}, inplace = True)
IPLBowlStat.head()

Unnamed: 0,IPL_Bowl_Rank,Player,Mat,Inns,Ov,Runs,Wkts,BBI,Avg,Econ,SR
0,1.0,Dwayne Bravo,161.0,158.0,516.0,4359.0,183.0,2022-04-01 00:00:00,23.82,8.38,17.05
1,2.0,Lasith Malinga,122.0,122.0,471.0,3366.0,170.0,2013-05-01 00:00:00,19.8,7.14,16.63
2,3.0,Amit Mishra,154.0,154.0,540.0,3980.0,166.0,2017-05-01 00:00:00,23.98,7.36,19.55
3,4.0,Yuzvendra Chahal,131.0,130.0,475.0,3624.0,166.0,1940-05-01 00:00:00,21.83,7.61,17.2
4,5.0,Piyush Chawla,165.0,164.0,545.0,4301.0,157.0,2017-04-01 00:00:00,27.39,7.88,20.85


In [30]:
#Most valued player ratings, with same issues
IPLMVPStat.info()
IPLMVPStat.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 686 entries, 0 to 685
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   POS        172 non-null    float64
 1   Player     172 non-null    object 
 2   Pts        172 non-null    float64
 3   Mat        172 non-null    float64
 4   Wkts       172 non-null    float64
 5   Dots       172 non-null    float64
 6   4s         172 non-null    float64
 7   6s         172 non-null    float64
 8   Catches    172 non-null    float64
 9   Run outs   172 non-null    float64
 10  Stumpings  172 non-null    float64
dtypes: float64(10), object(1)
memory usage: 59.1+ KB


Unnamed: 0,POS,Player,Pts,Mat,Wkts,Dots,4s,6s,Catches,Run outs,Stumpings
0,1.0,Jos Buttler,387.5,17.0,0.0,0.0,83.0,45.0,9.0,0.0,0.0
1,,,,,,,,,,,
2,,,,,,,,,,,
3,,,,,,,,,,,
4,,,,,,,,,,,


In [31]:
IPLMVPStat.dropna(inplace=True)
IPLMVPStat = IPLMVPStat.reset_index(drop = True)
IPLMVPStat.head(10)

Unnamed: 0,POS,Player,Pts,Mat,Wkts,Dots,4s,6s,Catches,Run outs,Stumpings
0,1.0,Jos Buttler,387.5,17.0,0.0,0.0,83.0,45.0,9.0,0.0,0.0
1,2.0,Hardik Pandya,284.5,15.0,8.0,76.0,49.0,12.0,4.0,6.0,0.0
2,3.0,Andre Russell,281.0,14.0,17.0,58.0,18.0,32.0,2.0,1.5,0.0
3,4.0,Prasidh Krishna,273.0,17.0,19.0,200.0,0.0,0.0,2.0,1.5,0.0
4,5.0,Liam Livingstone,265.5,14.0,6.0,38.0,29.0,34.0,6.0,0.0,0.0
5,6.0,Wanindu Hasaranga,258.0,16.0,26.0,143.0,4.0,1.0,3.0,3.0,0.0
6,7.0,Mohammed Shami,252.5,16.0,20.0,172.0,0.0,0.0,3.0,3.0,0.0
7,8.0,Rashid Khan,250.0,16.0,19.0,127.0,3.0,9.0,7.0,0.0,0.0
8,9.0,Trent Boult,245.5,16.0,16.0,176.0,2.0,1.0,2.0,0.0,0.0
9,10.0,Sanju Samson,244.5,17.0,0.0,0.0,43.0,26.0,14.0,6.0,2.0


In [32]:
IPLMVPStat.rename(columns = {'POS':'IPL_MVP_Rank'}, inplace = True)
IPLMVPStat.head()

Unnamed: 0,IPL_MVP_Rank,Player,Pts,Mat,Wkts,Dots,4s,6s,Catches,Run outs,Stumpings
0,1.0,Jos Buttler,387.5,17.0,0.0,0.0,83.0,45.0,9.0,0.0,0.0
1,2.0,Hardik Pandya,284.5,15.0,8.0,76.0,49.0,12.0,4.0,6.0,0.0
2,3.0,Andre Russell,281.0,14.0,17.0,58.0,18.0,32.0,2.0,1.5,0.0
3,4.0,Prasidh Krishna,273.0,17.0,19.0,200.0,0.0,0.0,2.0,1.5,0.0
4,5.0,Liam Livingstone,265.5,14.0,6.0,38.0,29.0,34.0,6.0,0.0,0.0


In [33]:
#Need to do same with Pts col
IPLMVPStat.rename(columns = {'Pts':'IPL_MVP_Pts'}, inplace = True)
IPLMVPStat.head()

Unnamed: 0,IPL_MVP_Rank,Player,IPL_MVP_Pts,Mat,Wkts,Dots,4s,6s,Catches,Run outs,Stumpings
0,1.0,Jos Buttler,387.5,17.0,0.0,0.0,83.0,45.0,9.0,0.0,0.0
1,2.0,Hardik Pandya,284.5,15.0,8.0,76.0,49.0,12.0,4.0,6.0,0.0
2,3.0,Andre Russell,281.0,14.0,17.0,58.0,18.0,32.0,2.0,1.5,0.0
3,4.0,Prasidh Krishna,273.0,17.0,19.0,200.0,0.0,0.0,2.0,1.5,0.0
4,5.0,Liam Livingstone,265.5,14.0,6.0,38.0,29.0,34.0,6.0,0.0,0.0


In [34]:
IntCrickSals.info()
IntCrickSals.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 15 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Players                                176 non-null    object 
 1   Country                                176 non-null    object 
 2   Retainer Fee / contract / base salary  165 non-null    object 
 3   Test Fee                               171 non-null    object 
 4   ODI Fee                                167 non-null    object 
 5   T20 Fee                                164 non-null    object 
 6   Unnamed: 6                             0 non-null      float64
 7   Unnamed: 7                             0 non-null      float64
 8   Unnamed: 8                             0 non-null      float64
 9   Unnamed: 9                             0 non-null      float64
 10  Unnamed: 10                            0 non-null      float64
 11  Unname

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
0,Babar Azam,Pakistan,1.25 million PKR (+ undisclosed Captain allowa...,"PKR 50,000","PKR 25,000","PKR 15,000",,,,,,,,,One Crore = 10m Rupees
1,Mohammad Rizwan,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",,,,,,,,,"One lakh = 100,000 Rupees"
2,Hasan Ali,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",,,,,,,,,
3,Shaheen Afridi,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",,,,,,,,,
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",,,,,,,,,,,


In [35]:
#Drop unnamed columns
IntCrickSals.drop(['Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14'], axis=1, inplace=True)
IntCrickSals.head()


Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
0,Babar Azam,Pakistan,1.25 million PKR (+ undisclosed Captain allowa...,"PKR 50,000","PKR 25,000","PKR 15,000"
1,Mohammad Rizwan,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000"
2,Hasan Ali,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000"
3,Shaheen Afridi,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000"
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",,


In [36]:
IntCrickSals.head(25)

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
0,Babar Azam,Pakistan,1.25 million PKR (+ undisclosed Captain allowa...,"PKR 50,000","PKR 25,000","PKR 15,000"
1,Mohammad Rizwan,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000"
2,Hasan Ali,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000"
3,Shaheen Afridi,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000"
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",,
5,Faheem Ashraf,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500"
6,Fakhar Zaman,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500"
7,Fawad Alam,Pakistan,"937,500 PKR per month","PKR 40,000",,
8,Shadab Khan,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500"
9,Yasir Shah,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000",


In [37]:
IntCrickSals.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 6 columns):
 #   Column                                 Non-Null Count  Dtype 
---  ------                                 --------------  ----- 
 0   Players                                176 non-null    object
 1   Country                                176 non-null    object
 2   Retainer Fee / contract / base salary  165 non-null    object
 3   Test Fee                               171 non-null    object
 4   ODI Fee                                167 non-null    object
 5   T20 Fee                                164 non-null    object
dtypes: object(6)
memory usage: 8.4+ KB


In [38]:
#How many countries are represented in dataset? Turns out to be 9
IntCrickSals.Country.unique()

array(['Pakistan', 'India', 'England', 'Australia', 'South Africa',
       'New Zealand', 'West Indies', 'Sri Lanka', 'Bangladesh'],
      dtype=object)

In [39]:
#View India players only - double-checked values and Jasprit Bumrah in partic - 
#the India base salary figures are annual, and JB's is a mistake to say monthly
India_players = IntCrickSals[IntCrickSals["Country"] == "India"]
print(India_players)

                Players Country Retainer Fee / contract / base salary  \
20          Virat Kohli   India                             7 Crores    
21     Rohit Sharma (C)   India                             7 Crores    
22       Jasprit Bumrah   India                    7 Crores per month   
23  Ravichandran Ashwin   India                             5 Crores    
24      Ravindra Jadeja   India                             5 Crores    
25         Rishabh Pant   India                              5 Crores   
26        KL Rahul (VC)   India                              5 Crores   
27       Mohammad Shami   India                              5 Crores   
28    Cheteshwar Pujara   India                              3 Crores   
29       Ajinkya Rahane   India                              3 Crores   
30           Axer Patel   India                              3 Crores   
31       Shradul Thakur   India                              3 Crores   
32         Sheryas Iyer   India                    

In [40]:
England_players = IntCrickSals[IntCrickSals["Country"] == "England"]
print(England_players)

              Players  Country          Retainer Fee / contract / base salary  \
47        Eoin Morgan  England  £925,000 (plus undisclosed captain allowance)   
48     James Anderson  England                                         925000   
49       Stuart Broad  England                                         925000   
50           Joe Root  England  £925,000 (Undisclosed test captain allowance)   
51         Ben Stokes  England                                         925000   
52  Jonathan Bairstow  England                                         925000   
53        Jos Buttler  England                                         925000   
54       Chris Woakes  England                                         925000   
55          Moeen Ali  England                                         925000   
56         Sam Curran  England                                         925000   
57       Jofra Archer  England                                         925000   
58          Jason Roy  Engla

In [41]:
print(IntCrickSals[IntCrickSals["Country"] == "England"])

              Players  Country          Retainer Fee / contract / base salary  \
47        Eoin Morgan  England  £925,000 (plus undisclosed captain allowance)   
48     James Anderson  England                                         925000   
49       Stuart Broad  England                                         925000   
50           Joe Root  England  £925,000 (Undisclosed test captain allowance)   
51         Ben Stokes  England                                         925000   
52  Jonathan Bairstow  England                                         925000   
53        Jos Buttler  England                                         925000   
54       Chris Woakes  England                                         925000   
55          Moeen Ali  England                                         925000   
56         Sam Curran  England                                         925000   
57       Jofra Archer  England                                         925000   
58          Jason Roy  Engla

In [42]:
Bangla_players = IntCrickSals[IntCrickSals["Country"] == "Bangladesh"]
Bangla_players.head()

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
155,Tamim Iqbal,Bangladesh,"$57,000","$7,100","$3,600","$2,400"
156,Soumya Sarkar,Bangladesh,,"$7,100","$3,600","$2,400"
157,Shakib Al Hasan,Bangladesh,"$57,000","$7,100","$3,600","$2,400"
158,Shadman Islam,Bangladesh,,"$7,100","$3,600","$2,400"
159,Sabbir Rahman,Bangladesh,,"$7,100","$3,600","$2,400"


In [43]:
Aust_players = IntCrickSals[IntCrickSals["Country"] == "Australia"]
Aust_players.head()

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
67,Aaron Finch,Australia,$1.5 million,"$15,000","$18,000","$20,000"
68,Tim Paine,Australia,$1 million,"$15,000","$18,000","$20,000"
69,Steve Smith,Australia,$1 million,"$15,000","$18,000","$20,000"
70,Pat Cummins,Australia,"$850,000","$15,000","$18,000","$20,000"
71,Glenn Maxwell,Australia,"$850,000","$15,000","$18,000","$20,000"


In [44]:
WI_players = IntCrickSals[IntCrickSals["Country"] == "West Indies"]
WI_players.head()

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
119,Jason Holder,West Indies,$80000,"$6,500","$4,500","$3,000"
120,Kraigg Brathwaite,West Indies,$60000,"$6,500","$4,500","$3,000"
121,Jermaine Blackwood,West Indies,$60000,"$6,500","$4,500","$3,000"
122,Nkrumah Bonner,West Indies,$60000,"$6,500","$4,500","$3,000"
123,Rahkeem Cornwall,West Indies,$60000,"$6,500","$4,500","$3,000"


In [45]:
SouthA_players = IntCrickSals[IntCrickSals["Country"] == "South Africa"]
SouthA_players.head()

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
84,Temba Bavuma,South Africa,"$100,000","$4,500","$1,200",$800
85,Quinton de Kock,South Africa,"$350,000","$4,500","$1,200",$800
86,Dean Elgar,South Africa,"$100,000","$4,500","$1,200",$800
87,Beuran Hendricks,South Africa,"$100,000","$4,500","$1,200",$800
88,Reeza Hendricks,South Africa,"$100,000","$4,500","$1,200",$800


In [46]:
NZ_players = IntCrickSals[IntCrickSals["Country"] == "New Zealand"]
NZ_players.head()

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
100,Kane Williamson (C),New Zealand,"$440,000","$8,495","$3,682","$2,407"
101,Martin Guptill,New Zealand,"$350,000",,"$3,682","$2,407"
102,Colin Munro,New Zealand,"$180,000",,,"$2,407"
103,Henry Nicholls,New Zealand,"$85,585","$8,495","$3,682",
104,Ross Taylor,New Zealand,"$400,000","$8,495",,"$2,407"


In [47]:
SriL_players = IntCrickSals[IntCrickSals["Country"] == "Sri Lanka"]
SriL_players.head()

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
137,Dimuth Karunaratne,Sri Lanka,"$100,000","$7,500","$5,500","$3,500"
138,Dinesh Chandimal,Sri Lanka,"$80,000","$7,500","$5,500","$3,500"
139,Dasun Shanaka,Sri Lanka,"$65,000","$7,500","$5,500","$3,500"
140,Suranga Lakmal,Sri Lanka,"$60,000","$7,500","$5,500","$3,500"
141,Lasith Embuldeniya,Sri Lanka,"$60,000","$7,500","$5,500","$3,500"


Looked at different salary levels - default currency is US dollars, but pounds for England, In Rupees for India, Pakistan rupees for Pakistan (and Pakistan also monthly, compared to annual for all others) 1.25milPKR = 4932
1 In Crore = £99590, 1 PKR = 0.0035

In [48]:
print(IntCrickSals[IntCrickSals["Country"] == "New Zealand"])

                  Players      Country Retainer Fee / contract / base salary  \
100   Kane Williamson (C)  New Zealand                              $440,000   
101        Martin Guptill  New Zealand                              $350,000   
102           Colin Munro  New Zealand                              $180,000   
103        Henry Nicholls  New Zealand                               $85,585   
104           Ross Taylor  New Zealand                              $400,000   
105        Daryl Mitchell  New Zealand                               $85,585   
106            Tom Latham  New Zealand                              $205,266   
107          Tom Blundell  New Zealand                               $85,585   
108           Tim Seifert  New Zealand                               $50,000   
109  Bradley John Watling  New Zealand                               $50,000   
110   Colin de Grandhomme  New Zealand                              $350,000   
111         James Neesham  New Zealand  

In [49]:
#To replace 'Crores' for India players - tried this approach below throughout whole dataframe column and ended up corrupting some other data - not sure why
IntCrickSals.loc[20:46, 'Retainer Fee / contract / base salary'] = IntCrickSals.loc[20:46, 'Retainer Fee / contract / base salary'].str.replace('Crores','0000000')
print(IntCrickSals[IntCrickSals["Country"] == "India"])

                Players Country Retainer Fee / contract / base salary  \
20          Virat Kohli   India                            7 0000000    
21     Rohit Sharma (C)   India                            7 0000000    
22       Jasprit Bumrah   India                   7 0000000 per month   
23  Ravichandran Ashwin   India                            5 0000000    
24      Ravindra Jadeja   India                            5 0000000    
25         Rishabh Pant   India                             5 0000000   
26        KL Rahul (VC)   India                             5 0000000   
27       Mohammad Shami   India                             5 0000000   
28    Cheteshwar Pujara   India                             3 0000000   
29       Ajinkya Rahane   India                             3 0000000   
30           Axer Patel   India                             3 0000000   
31       Shradul Thakur   India                             3 0000000   
32         Sheryas Iyer   India                    

In [50]:
#So to replace 'lakh'
IntCrickSals.loc[20:44, 'Test Fee'] = IntCrickSals.loc[20:44, 'Test Fee'].str.replace('lakh','00000')
IntCrickSals.loc[20:46, 'ODI Fee'] = IntCrickSals.loc[20:46, 'ODI Fee'].str.replace('lakh','00000')
IntCrickSals.loc[20:46, 'T20 Fee'] = IntCrickSals.loc[20:46, 'T20 Fee'].str.replace('Lakh', '00000')
print(IntCrickSals[IntCrickSals["Country"] == "India"])


                Players Country Retainer Fee / contract / base salary  \
20          Virat Kohli   India                            7 0000000    
21     Rohit Sharma (C)   India                            7 0000000    
22       Jasprit Bumrah   India                   7 0000000 per month   
23  Ravichandran Ashwin   India                            5 0000000    
24      Ravindra Jadeja   India                            5 0000000    
25         Rishabh Pant   India                             5 0000000   
26        KL Rahul (VC)   India                             5 0000000   
27       Mohammad Shami   India                             5 0000000   
28    Cheteshwar Pujara   India                             3 0000000   
29       Ajinkya Rahane   India                             3 0000000   
30           Axer Patel   India                             3 0000000   
31       Shradul Thakur   India                             3 0000000   
32         Sheryas Iyer   India                    

Looked up Deepak Chahar and Mayank Agarwal - mistake in their data - both on 15 lakh per test - https://www.timesofsports.com/cricket/indian-players-salary/

In [51]:
IntCrickSals.loc[45, 'Test Fee'] = 1500000
IntCrickSals.loc[46, 'Test Fee'] = 1500000
print(IntCrickSals[IntCrickSals["Country"] == "India"])

                Players Country Retainer Fee / contract / base salary  \
20          Virat Kohli   India                            7 0000000    
21     Rohit Sharma (C)   India                            7 0000000    
22       Jasprit Bumrah   India                   7 0000000 per month   
23  Ravichandran Ashwin   India                            5 0000000    
24      Ravindra Jadeja   India                            5 0000000    
25         Rishabh Pant   India                             5 0000000   
26        KL Rahul (VC)   India                             5 0000000   
27       Mohammad Shami   India                             5 0000000   
28    Cheteshwar Pujara   India                             3 0000000   
29       Ajinkya Rahane   India                             3 0000000   
30           Axer Patel   India                             3 0000000   
31       Shradul Thakur   India                             3 0000000   
32         Sheryas Iyer   India                    

In [52]:
IntCrickSals.loc[21, 'ODI Fee'] = 720000
IntCrickSals.loc[21, 'T20 Fee'] = 360000
print(IntCrickSals[IntCrickSals["Country"] == "India"])

                Players Country Retainer Fee / contract / base salary  \
20          Virat Kohli   India                            7 0000000    
21     Rohit Sharma (C)   India                            7 0000000    
22       Jasprit Bumrah   India                   7 0000000 per month   
23  Ravichandran Ashwin   India                            5 0000000    
24      Ravindra Jadeja   India                            5 0000000    
25         Rishabh Pant   India                             5 0000000   
26        KL Rahul (VC)   India                             5 0000000   
27       Mohammad Shami   India                             5 0000000   
28    Cheteshwar Pujara   India                             3 0000000   
29       Ajinkya Rahane   India                             3 0000000   
30           Axer Patel   India                             3 0000000   
31       Shradul Thakur   India                             3 0000000   
32         Sheryas Iyer   India                    

In [53]:
IntCrickSals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 6 columns):
 #   Column                                 Non-Null Count  Dtype 
---  ------                                 --------------  ----- 
 0   Players                                176 non-null    object
 1   Country                                176 non-null    object
 2   Retainer Fee / contract / base salary  165 non-null    object
 3   Test Fee                               171 non-null    object
 4   ODI Fee                                167 non-null    object
 5   T20 Fee                                164 non-null    object
dtypes: object(6)
memory usage: 8.4+ KB


In [54]:
null_series = pd.isnull(IntCrickSals['Retainer Fee / contract / base salary'])
IntCrickSals[null_series]

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
115,Lachlan Ferguson,New Zealand,,"$8,495","$3,682","$2,407"
156,Soumya Sarkar,Bangladesh,,"$7,100","$3,600","$2,400"
158,Shadman Islam,Bangladesh,,"$7,100","$3,600","$2,400"
159,Sabbir Rahman,Bangladesh,,"$7,100","$3,600","$2,400"
161,Nazmul Hossain Shanto,Bangladesh,,"$7,100","$3,600","$2,400"
165,Mohmaad Saifudin,Bangladesh,,"$7,100","$3,600","$2,400"
166,Mohammad Mithun Ali,Bangladesh,,"$7,100","$3,600","$2,400"
171,Kamrul Islam Rabbi,Bangladesh,,"$7,100","$3,600","$2,400"
173,Farhad Reza,Bangladesh,,"$7,100","$3,600","$2,400"
174,Anamul Haque,Bangladesh,,"$7,100","$3,600","$2,400"


In [55]:
null_series2 = pd.isnull(IntCrickSals['Test Fee'])
IntCrickSals[null_series2]

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
101,Martin Guptill,New Zealand,"$350,000",,"$3,682","$2,407"
102,Colin Munro,New Zealand,"$180,000",,,"$2,407"
105,Daryl Mitchell,New Zealand,"$85,585",,"$3,682","$2,407"
108,Tim Seifert,New Zealand,"$50,000",,,"$2,407"
111,James Neesham,New Zealand,"$350,000",,"$3,682",$2407


In [56]:
null_series3 = pd.isnull(IntCrickSals['ODI Fee'])
IntCrickSals[null_series3]

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",,
7,Fawad Alam,Pakistan,"937,500 PKR per month","PKR 40,000",,
10,Sarfaraz Ahmed,Pakistan,"562,500 PKR per month","PKR 37,500",,"PKR 10,000"
18,"Imran Butt,",Pakistan,"300,500 PKR per month","PKR 30,000",,
102,Colin Munro,New Zealand,"$180,000",,,"$2,407"
104,Ross Taylor,New Zealand,"$400,000","$8,495",,"$2,407"
108,Tim Seifert,New Zealand,"$50,000",,,"$2,407"
109,Bradley John Watling,New Zealand,"$50,000","$8,495",,
118,Neil Wagner,New Zealand,"$85,585","$8,495",,


In [57]:
null_series4 = pd.isnull(IntCrickSals['T20 Fee'])
IntCrickSals[null_series4]

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",,
7,Fawad Alam,Pakistan,"937,500 PKR per month","PKR 40,000",,
9,Yasir Shah,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000",
11,Imam-ul-Haq,Pakistan,"562,500 PKR per month","PKR 37,500","PKR 17,500",
15,Nauman Ali,Pakistan,"562,500 PKR per month","PKR 37,500","PKR 17,500",
16,Abid Ali,Pakistan,"562,500 PKR per month","PKR 37,500","PKR 17,500",
18,"Imran Butt,",Pakistan,"300,500 PKR per month","PKR 30,000",,
103,Henry Nicholls,New Zealand,"$85,585","$8,495","$3,682",
106,Tom Latham,New Zealand,"$205,266","$8,495","$3,682",
109,Bradley John Watling,New Zealand,"$50,000","$8,495",,


In [58]:
IntCrickSals = IntCrickSals.fillna(0)


In [59]:
IntCrickSals[null_series4]

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",0,0
7,Fawad Alam,Pakistan,"937,500 PKR per month","PKR 40,000",0,0
9,Yasir Shah,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000",0
11,Imam-ul-Haq,Pakistan,"562,500 PKR per month","PKR 37,500","PKR 17,500",0
15,Nauman Ali,Pakistan,"562,500 PKR per month","PKR 37,500","PKR 17,500",0
16,Abid Ali,Pakistan,"562,500 PKR per month","PKR 37,500","PKR 17,500",0
18,"Imran Butt,",Pakistan,"300,500 PKR per month","PKR 30,000",0,0
103,Henry Nicholls,New Zealand,"$85,585","$8,495","$3,682",0
106,Tom Latham,New Zealand,"$205,266","$8,495","$3,682",0
109,Bradley John Watling,New Zealand,"$50,000","$8,495",0,0


In [60]:
IntCrickSals['Base_sal_pounds'] = IntCrickSals['Retainer Fee / contract / base salary']
IntCrickSals.head()

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee,Base_sal_pounds
0,Babar Azam,Pakistan,1.25 million PKR (+ undisclosed Captain allowa...,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR (+ undisclosed Captain allowa...
1,Mohammad Rizwan,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR per month
2,Hasan Ali,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR per month
3,Shaheen Afridi,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR per month
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",0,0,"937,500 PKRper month"


In [61]:
IntCrickSals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 7 columns):
 #   Column                                 Non-Null Count  Dtype 
---  ------                                 --------------  ----- 
 0   Players                                176 non-null    object
 1   Country                                176 non-null    object
 2   Retainer Fee / contract / base salary  176 non-null    object
 3   Test Fee                               176 non-null    object
 4   ODI Fee                                176 non-null    object
 5   T20 Fee                                176 non-null    object
 6   Base_sal_pounds                        176 non-null    object
dtypes: object(7)
memory usage: 9.8+ KB


In [62]:
print(IntCrickSals[IntCrickSals["Country"] == "England"])


              Players  Country          Retainer Fee / contract / base salary  \
47        Eoin Morgan  England  £925,000 (plus undisclosed captain allowance)   
48     James Anderson  England                                         925000   
49       Stuart Broad  England                                         925000   
50           Joe Root  England  £925,000 (Undisclosed test captain allowance)   
51         Ben Stokes  England                                         925000   
52  Jonathan Bairstow  England                                         925000   
53        Jos Buttler  England                                         925000   
54       Chris Woakes  England                                         925000   
55          Moeen Ali  England                                         925000   
56         Sam Curran  England                                         925000   
57       Jofra Archer  England                                         925000   
58          Jason Roy  Engla

In [69]:
null_series5 = pd.isnull(IntCrickSals['Base_sal_pounds'])
print(IntCrickSals[null_series5])

Empty DataFrame
Columns: [Players, Country, Retainer Fee / contract / base salary, Test Fee, ODI Fee, T20 Fee, Base_sal_pounds]
Index: []


In [64]:
IntCrickSals.loc[47:66, 'Base_sal_pounds'] = 925000
print(IntCrickSals[IntCrickSals["Country"] == "England"])

              Players  Country          Retainer Fee / contract / base salary  \
47        Eoin Morgan  England  £925,000 (plus undisclosed captain allowance)   
48     James Anderson  England                                         925000   
49       Stuart Broad  England                                         925000   
50           Joe Root  England  £925,000 (Undisclosed test captain allowance)   
51         Ben Stokes  England                                         925000   
52  Jonathan Bairstow  England                                         925000   
53        Jos Buttler  England                                         925000   
54       Chris Woakes  England                                         925000   
55          Moeen Ali  England                                         925000   
56         Sam Curran  England                                         925000   
57       Jofra Archer  England                                         925000   
58          Jason Roy  Engla

In [65]:
print(IntCrickSals[IntCrickSals["Country"] == "Bangladesh"])

                   Players     Country Retainer Fee / contract / base salary  \
155            Tamim Iqbal  Bangladesh                               $57,000   
156          Soumya Sarkar  Bangladesh                                     0   
157        Shakib Al Hasan  Bangladesh                               $57,000   
158          Shadman Islam  Bangladesh                                     0   
159          Sabbir Rahman  Bangladesh                                     0   
160          Rubel Hossain  Bangladesh                               $43,000   
161  Nazmul Hossain Shanto  Bangladesh                                     0   
162        Mushfiqur Rahim  Bangladesh                               $57,000   
163       Mosaddek Hossain  Bangladesh                               $14,000   
164          Mominul Haque  Bangladesh                               $30,000   
165       Mohmaad Saifudin  Bangladesh                                     0   
166    Mohammad Mithun Ali  Bangladesh  

In [66]:
null_series6 = pd.isnull(IntCrickSals['Base_sal_pounds'])
IntCrickSals[null_series6]

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee,Base_sal_pounds


In [67]:
IntCrickSals['Base_sal_pounds'] = IntCrickSals['Base_sal_pounds'].fillna(0)
IntCrickSals.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 7 columns):
 #   Column                                 Non-Null Count  Dtype 
---  ------                                 --------------  ----- 
 0   Players                                176 non-null    object
 1   Country                                176 non-null    object
 2   Retainer Fee / contract / base salary  176 non-null    object
 3   Test Fee                               176 non-null    object
 4   ODI Fee                                176 non-null    object
 5   T20 Fee                                176 non-null    object
 6   Base_sal_pounds                        176 non-null    object
dtypes: object(7)
memory usage: 9.8+ KB


In [71]:
IntCrickSals.head(30)


Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee,Base_sal_pounds
0,Babar Azam,Pakistan,1.25 million PKR (+ undisclosed Captain allowa...,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR (+ undisclosed Captain allowa...
1,Mohammad Rizwan,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR per month
2,Hasan Ali,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR per month
3,Shaheen Afridi,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",1.25 million PKR per month
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",0,0,"937,500 PKRper month"
5,Faheem Ashraf,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500","937,500 PKR per month"
6,Fakhar Zaman,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500","937,500 PKR per month"
7,Fawad Alam,Pakistan,"937,500 PKR per month","PKR 40,000",0,0,"937,500 PKR per month"
8,Shadab Khan,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500","937,500 PKR per month"
9,Yasir Shah,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000",0,"937,500 PKR per month"


In [72]:
IntCrickSals['Base_sal_pounds'] = IntCrickSals['Base_sal_pounds'].str.replace(r'\D+', '')
IntCrickSals.head(30)

  IntCrickSals['Base_sal_pounds'] = IntCrickSals['Base_sal_pounds'].str.replace(r'\D+', '')


Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee,Base_sal_pounds
0,Babar Azam,Pakistan,1.25 million PKR (+ undisclosed Captain allowa...,"PKR 50,000","PKR 25,000","PKR 15,000",125
1,Mohammad Rizwan,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",125
2,Hasan Ali,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",125
3,Shaheen Afridi,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",125
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",0,0,937500
5,Faheem Ashraf,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500",937500
6,Fakhar Zaman,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500",937500
7,Fawad Alam,Pakistan,"937,500 PKR per month","PKR 40,000",0,0,937500
8,Shadab Khan,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000","PKR 12,500",937500
9,Yasir Shah,Pakistan,"937,500 PKR per month","PKR 40,000","PKR 20,000",0,937500


In [73]:
null_series7 = pd.isnull(IntCrickSals['Base_sal_pounds'])
IntCrickSals[null_series7]

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee,Base_sal_pounds
47,Eoin Morgan,England,"£925,000 (plus undisclosed captain allowance)",14500,4500,4500,
48,James Anderson,England,925000,14500,4500,4500,
49,Stuart Broad,England,925000,14500,4500,4500,
50,Joe Root,England,"£925,000 (Undisclosed test captain allowance)",14500,4500,4500,
51,Ben Stokes,England,925000,14500,4500,4500,
52,Jonathan Bairstow,England,925000,14500,4500,4500,
53,Jos Buttler,England,925000,14500,4500,4500,
54,Chris Woakes,England,925000,14500,4500,4500,
55,Moeen Ali,England,925000,14500,4500,4500,
56,Sam Curran,England,925000,14500,4500,4500,


In [74]:
IntCrickSals.loc[47:66, 'Base_sal_pounds'] = 925000
IntCrickSals['Base_sal_pounds'] = IntCrickSals['Base_sal_pounds'].fillna(0)
print(IntCrickSals[null_series7])

                   Players      Country  \
47             Eoin Morgan      England   
48          James Anderson      England   
49            Stuart Broad      England   
50                Joe Root      England   
51              Ben Stokes      England   
52       Jonathan Bairstow      England   
53             Jos Buttler      England   
54            Chris Woakes      England   
55               Moeen Ali      England   
56              Sam Curran      England   
57            Jofra Archer      England   
58               Jason Roy      England   
59               Mark Wood      England   
60             Adil Rashid      England   
61              Rory Burns      England   
62             Zak Crawley      England   
63              Jack Leach      England   
64             Dawid Malan      England   
65              Ollie Pope      England   
66          Ollie Robinson      England   
115       Lachlan Ferguson  New Zealand   
156          Soumya Sarkar   Bangladesh   
158        

In [75]:
IntCrickSals['Base_sal_pounds'] = IntCrickSals['Base_sal_pounds'].astype(int)

In [76]:
IntCrickSals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 7 columns):
 #   Column                                 Non-Null Count  Dtype 
---  ------                                 --------------  ----- 
 0   Players                                176 non-null    object
 1   Country                                176 non-null    object
 2   Retainer Fee / contract / base salary  176 non-null    object
 3   Test Fee                               176 non-null    object
 4   ODI Fee                                176 non-null    object
 5   T20 Fee                                176 non-null    object
 6   Base_sal_pounds                        176 non-null    int32 
dtypes: int32(1), object(6)
memory usage: 9.1+ KB


In [77]:
print(IntCrickSals[IntCrickSals["Country"] == "Australia"])

               Players    Country Retainer Fee / contract / base salary  \
67         Aaron Finch  Australia                          $1.5 million   
68           Tim Paine  Australia                            $1 million   
69         Steve Smith  Australia                            $1 million   
70         Pat Cummins  Australia                              $850,000   
71       Glenn Maxwell  Australia                              $850,000   
72        David Warner  Australia                            $1 million   
73      Mitchell Starc  Australia                              $750,000   
74          Adam Zampa  Australia                              $700,000   
75     James Pattinson  Australia                              $500,000   
76      Josh Hazlewood  Australia                              $500,000   
77         Nathan Lyon  Australia                              $500,000   
78         Ashton Agar  Australia                              $500,000   
79          Alex Carey  A

In [79]:
#NOTE - got stuck here trying to remove text from some of numbers cols in order to 
#convert everything into pounds - will leave for now to research and concentrate on joining 
#some of previous
#Eventually sorted out,but laborious
IntCrickSals.loc[0:3, 'Base_sal_pounds'] = 59184
IntCrickSals.loc[4:19, 'Base_sal_pounds'] = IntCrickSals.loc[4:19, 'Base_sal_pounds']*12*0.0035
IntCrickSals.loc[20:46, 'Base_sal_pounds'] = IntCrickSals.loc[20:46, 'Base_sal_pounds']*0.0099
IntCrickSals.loc[155:175, 'Base_sal_pounds'] = IntCrickSals.loc[155:175, 'Base_sal_pounds']*0.81

IntCrickSals.loc[84:99, 'Base_sal_pounds'] = IntCrickSals.loc[84:99, 'Base_sal_pounds']*0.81

IntCrickSals.loc[119:136, 'Base_sal_pounds'] = IntCrickSals.loc[119:136, 'Base_sal_pounds']*0.81

IntCrickSals.loc[137:154, 'Base_sal_pounds'] = IntCrickSals.loc[137:154, 'Base_sal_pounds']*0.81

IntCrickSals.loc[100:118, 'Base_sal_pounds'] = IntCrickSals.loc[100:118, 'Base_sal_pounds']*0.81

IntCrickSals.loc[68:69, 'Base_sal_pounds'] = 807025
IntCrickSals.loc[67, 'Base_sal_pounds'] = 1210537
IntCrickSals.loc[72, 'Base_sal_pounds'] = 59184
IntCrickSals.loc[70:71, 'Base_sal_pounds'] = IntCrickSals.loc[70:71, 'Base_sal_pounds']*0.81
IntCrickSals.loc[73:83, 'Base_sal_pounds'] = IntCrickSals.loc[73:83, 'Base_sal_pounds']*0.81

IntCrickSals.head()
        

Unnamed: 0,Players,Country,Retainer Fee / contract / base salary,Test Fee,ODI Fee,T20 Fee,Base_sal_pounds
0,Babar Azam,Pakistan,1.25 million PKR (+ undisclosed Captain allowa...,"PKR 50,000","PKR 25,000","PKR 15,000",59184.0
1,Mohammad Rizwan,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",59184.0
2,Hasan Ali,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",59184.0
3,Shaheen Afridi,Pakistan,1.25 million PKR per month,"PKR 50,000","PKR 25,000","PKR 15,000",59184.0
4,Azhar Ali,Pakistan,"937,500 PKRper month","PKR 40,000",0,0,39375.0


In [80]:
SoldUnSold.rename(columns = {'PLAYER':'Player'}, inplace = True)
SoldUnSold.head()

Unnamed: 0,Player,Country,Team,Type,Auction-Base_price,Auc-Base_price_pounds
0,Deepak Chahar,India,Chennai Super Kings,Bowler,140000000,1396080.0
1,Ambati Rayudu,India,Chennai Super Kings,Wicket Keeper,67500000,673110.0
2,Dwayne Bravo,West Indies,Chennai Super Kings,All-Rounder,44000000,438768.0
3,Shivam Dube,India,Chennai Super Kings,All-Rounder,40000000,398880.0
4,Chris Jordan,England,Chennai Super Kings,All-Rounder,36000000,358992.0


In [81]:
IPLPriceNStat1 = SoldUnSold.merge(IPLBatStat, on='Player', how='left')
IPLPriceNStat1.info()
IPLPriceNStat1.head()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 600 entries, 0 to 599
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Player                 600 non-null    object 
 1   Country                600 non-null    object 
 2   Team                   600 non-null    object 
 3   Type                   600 non-null    object 
 4   Auction-Base_price     600 non-null    int32  
 5   Auc-Base_price_pounds  600 non-null    float64
 6   IPL_Bat_Rank           46 non-null     float64
 7   Mat                    46 non-null     float64
 8   Inns                   46 non-null     float64
 9   NO                     46 non-null     float64
 10  Runs                   46 non-null     float64
 11  HS                     46 non-null     float64
 12  Avg                    46 non-null     float64
 13  BF                     46 non-null     float64
 14  SR                     46 non-null     float64
 15  100   

Unnamed: 0,Player,Country,Team,Type,Auction-Base_price,Auc-Base_price_pounds,IPL_Bat_Rank,Mat,Inns,NO,Runs,HS,Avg,BF,SR,100,50,4s,6s,HS_was_NO
0,Deepak Chahar,India,Chennai Super Kings,Bowler,140000000,1396080.0,,,,,,,,,,,,,,
1,Ambati Rayudu,India,Chennai Super Kings,Wicket Keeper,67500000,673110.0,12.0,188.0,175.0,31.0,4190.0,100.0,29.1,3296.0,127.12,1.0,22.0,349.0,164.0,True
2,Dwayne Bravo,West Indies,Chennai Super Kings,All-Rounder,44000000,438768.0,58.0,161.0,113.0,44.0,1560.0,70.0,22.61,1204.0,129.57,0.0,5.0,120.0,66.0,True
3,Shivam Dube,India,Chennai Super Kings,All-Rounder,40000000,398880.0,103.0,35.0,33.0,5.0,688.0,95.0,24.57,516.0,133.33,0.0,3.0,46.0,38.0,True
4,Chris Jordan,England,Chennai Super Kings,All-Rounder,36000000,358992.0,,,,,,,,,,,,,,


In [82]:
IPLPriceNStat2 = IPLPriceNStat1.merge(IPLBowlStat, on='Player', how='left')
IPLPriceNStat2.info()
IPLPriceNStat2.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 600 entries, 0 to 599
Data columns (total 30 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Player                 600 non-null    object 
 1   Country                600 non-null    object 
 2   Team                   600 non-null    object 
 3   Type                   600 non-null    object 
 4   Auction-Base_price     600 non-null    int32  
 5   Auc-Base_price_pounds  600 non-null    float64
 6   IPL_Bat_Rank           46 non-null     float64
 7   Mat_x                  46 non-null     float64
 8   Inns_x                 46 non-null     float64
 9   NO                     46 non-null     float64
 10  Runs_x                 46 non-null     float64
 11  HS                     46 non-null     float64
 12  Avg_x                  46 non-null     float64
 13  BF                     46 non-null     float64
 14  SR_x                   46 non-null     float64
 15  100   

Unnamed: 0,Player,Country,Team,Type,Auction-Base_price,Auc-Base_price_pounds,IPL_Bat_Rank,Mat_x,Inns_x,NO,...,IPL_Bowl_Rank,Mat_y,Inns_y,Ov,Runs_y,Wkts,BBI,Avg_y,Econ,SR_y
0,Deepak Chahar,India,Chennai Super Kings,Bowler,140000000,1396080.0,,,,,...,52.0,63.0,63.0,220.0,1722.0,59.0,2013-04-01 00:00:00,29.19,7.8,22.44
1,Ambati Rayudu,India,Chennai Super Kings,Wicket Keeper,67500000,673110.0,12.0,188.0,175.0,31.0,...,,,,,,,,,,
2,Dwayne Bravo,West Indies,Chennai Super Kings,All-Rounder,44000000,438768.0,58.0,161.0,113.0,44.0,...,1.0,161.0,158.0,516.0,4359.0,183.0,2022-04-01 00:00:00,23.82,8.38,17.05
3,Shivam Dube,India,Chennai Super Kings,All-Rounder,40000000,398880.0,103.0,35.0,33.0,5.0,...,,,,,,,,,,
4,Chris Jordan,England,Chennai Super Kings,All-Rounder,36000000,358992.0,,,,,...,114.0,28.0,28.0,88.0,833.0,27.0,2023-11-04 00:00:00,30.85,9.32,19.85


Could consider adding MPV ratings here, but had trouble joining them. Need to do further exploration
Joining has only brought across a fraction of those in all time stats, presumably because many of those not now playing
Need to consider way to search and include players who may not be playing this year, but could be in future years. 
Perhaps this could be supplied by the full international T20 data.
Let's bring that in now, so we have it to hand


In [83]:
IntT20Bat = pd.read_csv(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\International Mens Cricket Stats\Batting\t20.csv')
IntT20Bowl = pd.read_csv(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\International Mens Cricket Stats\Bowling\Bowling_t20.csv')
IntT20Field = pd.read_csv(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\raw_data\International Mens Cricket Stats\Fielding\Fielding_t20.csv')


In [84]:
IntT20Bat.info()
IntT20Bat.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2006 entries, 0 to 2005
Data columns (total 17 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   2006 non-null   int64  
 1   Player       2006 non-null   object 
 2   Span         2006 non-null   object 
 3   Mat          2006 non-null   int64  
 4   Inns         2006 non-null   object 
 5   NO           2006 non-null   object 
 6   Runs         2006 non-null   object 
 7   HS           2006 non-null   object 
 8   Ave          2006 non-null   object 
 9   BF           2006 non-null   object 
 10  SR           2006 non-null   object 
 11  100          2006 non-null   object 
 12  50           2006 non-null   object 
 13  0            2006 non-null   object 
 14  4s           2006 non-null   object 
 15  6s           2006 non-null   object 
 16  Unnamed: 15  0 non-null      float64
dtypes: float64(1), int64(2), object(14)
memory usage: 266.5+ KB


Unnamed: 0.1,Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s,Unnamed: 15
0,0,V Kohli (INDIA),2010-2019,75,70,20,2633,94*,52.66,1907,138.07,0,24,2,247,71,
1,1,RG Sharma (INDIA),2007-2019,104,96,14,2633,118,32.1,1905,138.21,4,19,6,234,120,
2,2,MJ Guptill (NZ),2009-2019,83,80,7,2436,105,33.36,1810,134.58,2,15,2,215,113,
3,3,Shoaib Malik (ICC/PAK),2006-2019,111,104,30,2263,75,30.58,1824,124.06,0,7,1,186,61,
4,4,BB McCullum (NZ),2005-2015,71,70,10,2140,123,35.66,1571,136.21,2,13,3,199,91,


In [85]:
IntT20Bowl.info()
IntT20Bowl.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2006 entries, 0 to 2005
Data columns (total 16 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   2006 non-null   int64  
 1   Player       2006 non-null   object 
 2   Span         2006 non-null   object 
 3   Mat          2006 non-null   int64  
 4   Inns         2006 non-null   object 
 5   Overs        2006 non-null   object 
 6   Mdns         2006 non-null   object 
 7   Runs         2006 non-null   object 
 8   Wkts         2006 non-null   object 
 9   BBI          2006 non-null   object 
 10  Ave          2006 non-null   object 
 11  Econ         2006 non-null   object 
 12  SR           2006 non-null   object 
 13  4            2006 non-null   object 
 14  5            2006 non-null   object 
 15  Unnamed: 14  0 non-null      float64
dtypes: float64(1), int64(2), object(13)
memory usage: 250.9+ KB


Unnamed: 0.1,Unnamed: 0,Player,Span,Mat,Inns,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5,Unnamed: 14
0,0,SL Malinga (SL),2006-2019,79,79,284.5,1,2061,106,5/6,19.44,7.23,16.1,1,2,
1,1,Shahid Afridi (ICC/PAK),2006-2018,99,97,361.2,4,2396,98,4/11,24.44,6.63,22.1,3,0,
2,2,Shakib Al Hasan (BDESH),2006-2019,76,75,277.5,2,1894,92,5/20,20.58,6.81,18.1,3,1,
3,3,Saeed Ajmal (PAK),2009-2015,64,63,238.2,2,1516,85,4/19,17.83,6.36,16.8,4,0,
4,4,Umar Gul (PAK),2007-2016,60,60,200.3,2,1443,85,5/6,16.97,7.19,14.1,4,2,


In [86]:
IntT20Field.info()
IntT20Field.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2006 entries, 0 to 2005
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   2006 non-null   int64  
 1   Player       2006 non-null   object 
 2   Span         2006 non-null   object 
 3   Mat          2006 non-null   int64  
 4   Inns         2006 non-null   object 
 5   Dis          2006 non-null   object 
 6   Ct           2006 non-null   object 
 7   St           2006 non-null   object 
 8   Ct Wk        2006 non-null   object 
 9   Ct Fi        2006 non-null   object 
 10  MD           2006 non-null   object 
 11  D/I          2006 non-null   object 
 12  Unnamed: 11  0 non-null      float64
dtypes: float64(1), int64(2), object(10)
memory usage: 203.9+ KB


Unnamed: 0.1,Unnamed: 0,Player,Span,Mat,Inns,Dis,Ct,St,Ct Wk,Ct Fi,MD,D/I,Unnamed: 11
0,0,MS Dhoni (INDIA),2006-2019,98,97,91,57,34,57,0,5 (5ct 0st),0.938,
1,1,AB de Villiers (SA),2006-2017,78,78,72,65,7,21,44,4 (4ct 0st),0.923,
2,2,D Ramdin (WI),2006-2019,71,71,63,43,20,43,0,4 (4ct 0st),0.887,
3,3,Kamran Akmal (PAK),2006-2017,58,58,60,28,32,28,0,4 (0ct 4st),1.034,
4,4,Mushfiqur Rahim (BDESH),2006-2019,84,82,59,31,28,30,1,3 (1ct 2st),0.7190000000000001,


If need to use this data, will need to work on naming of players col as this won't automatically match with IPL listings

In [87]:
IPLPriceNStat2.to_excel(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\interim_data\IPLPriceNStatcleaneddata.xlsx')
IPLBatStat.to_csv(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\interim_data\IPLBatStatscleaneddata.csv')
IPLBowlStat.to_excel(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\interim_data\IPLBowlStatscleandata.xlsx')
IPLMVPStat.to_excel(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\interim_data\IPLMVPStatscleaneddata.xlsx')
IntCrickSals.to_excel(r'C:\Users\Pearc\OneDrive\Documents\Data Science\Springboard\SpringboardCapstone2\data\interim_data\InternatCrickSalspartcleaneddata.xlsx')
