# Scrap data from Cricket 

#### Cricket is most watched game after football and very famous in India. Here, we are going to scrap different data from ICC website.

### Importing Necessary Libraries

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

# Q1. Scraping ICC Men's ODI Team and Create a Data Frame

In [2]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/team-rankings/odi"
page1 = requests.get(url)

# Check the page content
soup = BeautifulSoup(page1.content)

First we take a instance of 'url' and send request to the webpage. Now, taking our tool 'BeautifulSoup' to parser the content.

In [3]:
# Scraping the team names
team = soup.find_all('span', class_ = "u-hide-phablet")

# Creating empty list and store data into that
team_name = []
for i in team:
    team_name.append(i.text)

Here, we scrap 'Team Name' by inspect them from webpage and stored into an empty list.

In [4]:
# Create empty list and scrap frist element
matches = []
points = []
ratings = []
new_list = []
for i in soup.find_all('td', class_ = "rankings-block__banner--matches"):
    matches.append(i.text)
    
for i in soup.find_all('td', class_ = "rankings-block__banner--points"):
    points.append(i.text)
    
for i in soup.find_all('td', class_ = "rankings-block__banner--rating u-text-right"):
    ratings.append(i.text.replace("\n", ""))
    
# Scraping other teame's number of matches and points:    
for i in soup.find_all('td', class_ = "table-body__cell u-center-text"):
    new_list.append(i.text)

# Scraping other team's matches and points:    
for i in range(0,len(new_list)-1,2):
    matches.append(new_list[i])
    points.append(new_list[i+1])
    
for i in soup.find_all('td', class_ = "table-body__cell u-text-right rating"):
    ratings.append(i.text)

While scraping data like matches, points etc. we see it contains only 1 information and rest information present into next element. So, solving this problem we scrap two elements of the same purpose. Then append those elements in to empty list.

In [5]:
print(len(team_name), len(matches), len(points), len(ratings))

20 20 20 20


We see our fetched data is in same lenth.

# Create a DataFrame

In [6]:
# Creating dataframe with giving column name:
icc_ODI = pd.DataFrame({})
icc_ODI['Team Name'] = team_name
icc_ODI['Matches'] = matches
icc_ODI['Points'] = points
icc_ODI['Ratings'] = ratings
icc_ODI

Unnamed: 0,Team Name,Matches,Points,Ratings
0,England,27,3226,119 ...
1,New Zealand,22,2508,114
2,India,34,3802,112
3,Pakistan,22,2354,107
4,Australia,29,3071,106
5,South Africa,24,2392,100
6,Bangladesh,30,2753,92
7,Sri Lanka,29,2658,92
8,West Indies,41,2902,71
9,Afghanistan,18,1238,69


In [7]:
df = icc_ODI
df.head()

Unnamed: 0,Team Name,Matches,Points,Ratings
0,England,27,3226,119 ...
1,New Zealand,22,2508,114
2,India,34,3802,112
3,Pakistan,22,2354,107
4,Australia,29,3071,106


In [8]:
df.tail()

Unnamed: 0,Team Name,Matches,Points,Ratings
15,UAE,22,697,32
16,Oman,30,919,31
17,United States,27,641,24
18,Nepal,22,331,15
19,Papua New Guinea,26,209,8


In [9]:
df.sample(5)

Unnamed: 0,Team Name,Matches,Points,Ratings
2,India,34,3802,112
19,Papua New Guinea,26,209,8
6,Bangladesh,30,2753,92
7,Sri Lanka,29,2658,92
1,New Zealand,22,2508,114


In [10]:
df.describe()

Unnamed: 0,Team Name,Matches,Points,Ratings
count,20,20,20,20
unique,20,12,20,18
top,United States,22,2392,92
freq,1,4,1,2


# Q2.Scrap ICC Men's ODI Batsmen and create a dataframe

In [11]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/odi/batting"
page2 = requests.get(url)

# Check page contents
soup = BeautifulSoup(page2.content)

In [12]:
# Create empty list
players = []
team_name = []
rating = []

for i in soup.find_all("div", class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all("div", class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace("\n", ""))
    
for i in soup.find_all("div", class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    

for i in soup.find_all('td', class_ = "table-body__cell rankings-table__name name"):
    for j in i.find_all('a'):
        players.append(j.text)
        
for i in soup.find_all('span', class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell rating"):
    rating.append(i.text)

In [13]:
print(len(players), len(team_name), len(rating))

100 100 100


# Creating Data Frame

In [14]:
Batsmen = pd.DataFrame({})
Batsmen['Player'] = players
Batsmen['Team'] = team_name
Batsmen['Rating'] = rating
Batsmen

Unnamed: 0,Player,Team,Rating
0,Babar Azam,PAK,890
1,Imam-ul-Haq,PAK,779
2,Rassie van der Dussen,SA,766
3,Quinton de Kock,SA,759
4,Jonny Bairstow,ENG,732
...,...,...,...
95,Mohammad Rizwan,PAK,438
96,Steven Taylor,USA,437
97,Mitchell Marsh,AUS,437
98,Matthew Cross,SCO,436


In [15]:
df = Batsmen
df.head(20)

Unnamed: 0,Player,Team,Rating
0,Babar Azam,PAK,890
1,Imam-ul-Haq,PAK,779
2,Rassie van der Dussen,SA,766
3,Quinton de Kock,SA,759
4,Jonny Bairstow,ENG,732
5,David Warner,AUS,725
6,Virat Kohli,IND,722
7,Rohit Sharma,IND,718
8,Ross Taylor,NZ,701
9,Steve Smith,AUS,697


In [16]:
df.tail(20)

Unnamed: 0,Player,Team,Rating
80,Niroshan Dickwella,SL,465
81,R.K. Paudel,NEP,462
82,Mitchell Santner,NZ,462
83,Afif Hossain,BAN,459
84,Dasun Shanaka,SL,455
85,Colin de Grandhomme,NZ,453
86,Shamarh Brooks,WI,452
87,Rashid Khan,AFG,447
88,Michael Leask,SCO,446
89,Charles Amini,PNG,444


In [17]:
df.sample(10)

Unnamed: 0,Player,Team,Rating
87,Rashid Khan,AFG,447
44,Najibullah Zadran,AFG,541
60,Scott Edwards,NED,504
8,Ross Taylor,NZ,701
50,Aqib Ilyas,OMA,524
73,Soumya Sarkar,BAN,476
43,Hashmatullah Shaidi,AFG,544
55,Aiden Markram,SA,515
76,Sarfaraz Ahmed,PAK,468
29,Tom Latham,NZ,613


In [18]:
df.describe()

Unnamed: 0,Player,Team,Rating
count,100,100,100
unique,100,21,84
top,Janneman Malan,NZ,442
freq,1,9,3


# Q3. Scrape ICC Men's ODI Bowlers and create a data frame

In [19]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/odi/bowling"
page3 = requests.get(url)

# Check page content
soup = BeautifulSoup(page3.content)

In [20]:
# Create empty list and scrap frist element
players = []
team_name = []
rating = []
best = []

for i in soup.find_all('div', class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all("div", class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace("\n",""))
    
for i in soup.find_all("div", class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all("span", class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace("\n",""))

for i in soup.find_all("td", class_ = "table-body__cell rankings-table__name name"):
    for p in i.find_all('a'):
        players.append(p.text)
        
for i in soup.find_all("td", class_ = "table-body__cell nationality-logo rankings-table__team"):
    team_name.append(i.text.replace("\n",""))
    
for i in soup.find_all("td", class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace("\n",""))

In [21]:
print(len(players), len(team_name), len(rating), len(best))

100 100 100 100


# Create Data Frame

In [22]:
Bowlers = pd.DataFrame({})
Bowlers['Player'] = players
Bowlers['Team'] = team_name
Bowlers['Rating'] = rating
Bowlers['Best'] = best
Bowlers

Unnamed: 0,Player,Team,Rating,Best
0,Trent Boult,NZ,775,775 v Australi...
1,Josh Hazlewood,AUS,718,"733 v England,..."
2,Mujeeb Ur Rahman,AFG,676,"712 v Ireland,..."
3,Shaheen Afridi,PAK,661,688 v West Ind...
4,Mohammad Nabi,AFG,657,657 v Zimbabwe...
...,...,...,...,...
95,Khawar Ali,OMA,398,"401 v Nepal, 0..."
96,Sean Abbott,AUS,398,398 v New Zeal...
97,Norman Vanua,PNG,397,"397 v Namibia,..."
98,Jimmy Neesham,NZ,397,458 v Banglade...


In [23]:
df = Bowlers
df.head(20)

Unnamed: 0,Player,Team,Rating,Best
0,Trent Boult,NZ,775,775 v Australi...
1,Josh Hazlewood,AUS,718,"733 v England,..."
2,Mujeeb Ur Rahman,AFG,676,"712 v Ireland,..."
3,Shaheen Afridi,PAK,661,688 v West Ind...
4,Mohammad Nabi,AFG,657,657 v Zimbabwe...
5,Mehedi Hasan,BAN,655,725 v Sri Lank...
6,Matt Henry,NZ,654,691 v Banglade...
7,Mitchell Starc,AUS,653,783 v New Zeal...
8,Rashid Khan,AFG,651,806 v Pakistan...
9,Jasprit Bumrah,IND,642,841 v West Ind...


In [24]:
df.tail(20)

Unnamed: 0,Player,Team,Rating,Best
80,Nuwan Pradeep,SL,432,"506 v England,..."
81,Mohammad Mohammad Saifuddin,BAN,428,496 v West Ind...
82,Jeffrey Vandersay,SL,422,431 v Australi...
83,Assad Vala,PNG,422,424 v United S...
84,Tendai Chatara,ZIM,417,"562 v Ireland,..."
85,Saqib Mahmood,ENG,415,454 v Pakistan...
86,Wahab Riaz,PAK,415,"522 v England,..."
87,Faheem Ashraf,PAK,413,477 v South Af...
88,Richard Ngarava,ZIM,413,413 v Australi...
89,Zahoor Khan,UAE,408,417 v Papua Ne...


In [25]:
df.sample(10)

Unnamed: 0,Player,Team,Rating,Best
10,Mustafizur Rahman,BAN,640,695 v West Ind...
33,Bilal Khan,OMA,532,545 v United S...
65,Anrich Nortje,SA,450,"459 v England,..."
86,Wahab Riaz,PAK,415,"522 v England,..."
63,Imad Wasim,PAK,452,"570 v England,..."
56,Jason Holder,WI,464,486 v Sri Lank...
3,Shaheen Afridi,PAK,661,688 v West Ind...
58,Craig Young,IRE,462,470 v West Ind...
36,Tim Southee,NZ,524,"638 v England,..."
51,Hamza Tahir,SCO,472,"484 v Nepal, 1..."


In [26]:
df.iloc[5:17]

Unnamed: 0,Player,Team,Rating,Best
5,Mehedi Hasan,BAN,655,725 v Sri Lank...
6,Matt Henry,NZ,654,691 v Banglade...
7,Mitchell Starc,AUS,653,783 v New Zeal...
8,Rashid Khan,AFG,651,806 v Pakistan...
9,Jasprit Bumrah,IND,642,841 v West Ind...
10,Mustafizur Rahman,BAN,640,695 v West Ind...
11,Chris Woakes,ENG,640,711 v Sri Lank...
12,Andy McBrine,IRE,630,646 v West Ind...
13,Kagiso Rabada,SA,629,"724 v England,..."
14,Adam Zampa,AUS,622,650 v Pakistan...


In [27]:
df.loc[:3]

Unnamed: 0,Player,Team,Rating,Best
0,Trent Boult,NZ,775,775 v Australi...
1,Josh Hazlewood,AUS,718,"733 v England,..."
2,Mujeeb Ur Rahman,AFG,676,"712 v Ireland,..."
3,Shaheen Afridi,PAK,661,688 v West Ind...


# Q4. Scrap ICC Men's All Rounder in ODI and Create a Data Frame

In [28]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/odi/all-rounder"
page4 = requests.get(url)

# Check page content
soup = BeautifulSoup(page4.content)

In [29]:
# Create empty list and store data
players = []
team_name = []
rating = []
best = []

for i in soup.find_all("div", class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all("div", class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace("\n", ""))
    
for i in soup.find_all("div", class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all("span", class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace("\n", ""))
    
for i in soup.find_all("td", class_ = "table-body__cell rankings-table__name name"):
    for f in i.find_all("a"):
        players.append(f.text)
        
for i in soup.find_all("span", class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace("\n", ""))

In [30]:
print(len(players), len(team_name), len(rating), len(best))

20 20 20 20


# Create a Data Frame

In [31]:
All_Rounder = pd.DataFrame({})
All_Rounder['Player Name'] = players
All_Rounder['Team Name'] = team_name
All_Rounder['Rating'] = rating
All_Rounder['Best Performance'] = best
All_Rounder

Unnamed: 0,Player Name,Team Name,Rating,Best Performance
0,Shakib Al Hasan,BAN,372,453 v Zimbabwe...
1,Mohammad Nabi,AFG,325,349 v Zimbabwe...
2,Rashid Khan,AFG,290,359 v Pakistan...
3,Mitchell Santner,NZ,275,"317 v England,..."
4,Sikandar Raza,ZIM,261,"280 v India, 2..."
5,Zeeshan Maqsood,OMA,238,"238 v Nepal, 1..."
6,Mehedi Hasan,BAN,238,244 v West Ind...
7,Chris Woakes,ENG,235,312 v New Zeal...
8,Glenn Maxwell,AUS,227,"350 v England,..."
9,Imad Wasim,PAK,220,299 v Banglade...


In [32]:
df = All_Rounder
df.head(10)

Unnamed: 0,Player Name,Team Name,Rating,Best Performance
0,Shakib Al Hasan,BAN,372,453 v Zimbabwe...
1,Mohammad Nabi,AFG,325,349 v Zimbabwe...
2,Rashid Khan,AFG,290,359 v Pakistan...
3,Mitchell Santner,NZ,275,"317 v England,..."
4,Sikandar Raza,ZIM,261,"280 v India, 2..."
5,Zeeshan Maqsood,OMA,238,"238 v Nepal, 1..."
6,Mehedi Hasan,BAN,238,244 v West Ind...
7,Chris Woakes,ENG,235,312 v New Zeal...
8,Glenn Maxwell,AUS,227,"350 v England,..."
9,Imad Wasim,PAK,220,299 v Banglade...


In [33]:
df.tail(10)

Unnamed: 0,Player Name,Team Name,Rating,Best Performance
10,Andy McBrine,IRE,218,218 v New Zeal...
11,Shadab Khan,PAK,217,221 v Netherla...
12,Assad Vala,PNG,217,225 v United S...
13,Dhananjaya de Silva,SL,215,220 v Australi...
14,Colin de Grandhomme,NZ,214,"266 v India, 1..."
15,Wanindu Hasaranga,SL,211,218 v Australi...
16,Josh Hazlewood,AUS,211,214 v New Zeal...
17,Hardik Pandya,IND,202,262 v South Af...
18,Trent Boult,NZ,200,"213 v India, 0..."
19,Alzarri Joseph,WI,197,197 v New Zeal...


In [34]:
df.sample(5)

Unnamed: 0,Player Name,Team Name,Rating,Best Performance
10,Andy McBrine,IRE,218,218 v New Zeal...
13,Dhananjaya de Silva,SL,215,220 v Australi...
1,Mohammad Nabi,AFG,325,349 v Zimbabwe...
19,Alzarri Joseph,WI,197,197 v New Zeal...
18,Trent Boult,NZ,200,"213 v India, 0..."


# Q5. Scrap ICC ODI Women's Team and Create a Data Frame

In [35]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/womens/team-rankings/odi"
page5 = requests.get(url)

# Check page content
soup = BeautifulSoup(page5.content)

In [36]:
# Scrap Team Name
team = soup.find_all('span', class_ = "u-hide-phablet")

# Create empty list and store first team name into that
womens_team = []
for i in team:
    womens_team.append(i.text)

In [37]:
# Create empty list and store data into that
match = []
points = []
rating = []
new_list = []

for i in soup.find_all('td', class_ = "rankings-block__banner--matches"):
    match.append(i.text)
    
for i in soup.find_all('td', class_ = "rankings-block__banner--points"):
    points.append(i.text)
    
for i in soup.find_all('td', class_ = "rankings-block__banner--rating u-text-right"):
    rating.append(i.text.replace("\n", ""))
    
for i in soup.find_all('td', class_ = "table-body__cell u-center-text"):
    new_list.append(i.text)
    
# Scrap more details
    
for i in range(0, len(new_list)-1,2):
    match.append(new_list[i])
    points.append(new_list[i+1])
    
for i in soup.find_all('td', class_ = "table-body__cell u-text-right rating"):
    rating.append(i.text)

In [38]:
print(len(womens_team), len(match), len(points), len(rating))

11 11 11 11


# Create DataFrame

In [39]:
womens_icc = pd.DataFrame({})
womens_icc['Team'] = womens_team
womens_icc['Matches'] = match
womens_icc['Points'] = points
womens_icc['Ratings'] = rating
womens_icc

Unnamed: 0,Team,Matches,Points,Ratings
0,Australia,18,3061,170 ...
1,South Africa,26,3098,119
2,England,25,2904,116
3,India,27,2820,104
4,New Zealand,24,2425,101
5,West Indies,24,2334,97
6,Bangladesh,12,932,78
7,Pakistan,21,1237,59
8,Ireland,11,516,47
9,Sri Lanka,8,353,44


# Q6. Scrap ICC Women's ODI players(Batting) and Create a Data Frame

In [40]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/womens/player-rankings/odi/batting"
page6 = requests.get(url)

# Check page content
soup = BeautifulSoup(page6.content)

In [41]:
# Create empty list and store data 
players = []
team_name = []
rating = []
best = []

for i in soup.find_all("div", class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all('div', class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace('\n', ''))
    
for i in soup.find_all('div', class_ = 'rankings-block__banner--rating'):
    rating.append(i.text)
    
for i in soup.find_all('span', class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace('\n', ''))
    
for i in soup.find_all("td", class_ = "table-body__cell rankings-table__name name"):
    for k in i.find_all("a"):
        players.append(k.text)
        
for i in soup.find_all("span", class_ = "table-body__logo-text"):
    team_name.append(i.text.replace("\n", ""))
    
for i in soup.find_all("td", class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace("\n", ""))

In [42]:
print(len(players), len(team_name), len(rating), len(best))

100 100 100 100


# Create DataFrame

In [43]:
Top_women_players = pd.DataFrame({})
Top_women_players['Players Name'] = players
Top_women_players['Team'] = team_name
Top_women_players['Rating'] = rating
Top_women_players['Players Best'] = best
Top_women_players

Unnamed: 0,Players Name,Team,Rating,Players Best
0,Alyssa Healy,AUS,785,"785 v England,..."
1,Beth Mooney,AUS,749,"748 v England,..."
2,Laura Wolvaardt,SA,732,741 v Australi...
3,Natalie Sciver,ENG,725,755 v South Af...
4,Harmanpreet Kaur,IND,716,"731 v England,..."
...,...,...,...,...
95,Hayley Jensen,NZ,212,"271 v England,..."
96,Frederique Overdijk,NED,210,"210 v Ireland,..."
97,Jemimah Rodriques,IND,210,411 v West Ind...
98,Anushka Sanjeewani,SL,209,222 v Australi...


In [44]:
df = Top_women_players
df.head(20)

Unnamed: 0,Players Name,Team,Rating,Players Best
0,Alyssa Healy,AUS,785,"785 v England,..."
1,Beth Mooney,AUS,749,"748 v England,..."
2,Laura Wolvaardt,SA,732,741 v Australi...
3,Natalie Sciver,ENG,725,755 v South Af...
4,Harmanpreet Kaur,IND,716,"731 v England,..."
5,Smriti Mandhana,IND,714,"797 v England,..."
6,Meg Lanning,AUS,710,834 v New Zeal...
7,Rachael Haynes,AUS,701,713 v West Ind...
8,Amy Satterthwaite,NZ,661,756 v Australi...
9,Chamari Athapaththu,SL,655,691 v South Af...


In [45]:
df.tail(10)

Unnamed: 0,Players Name,Team,Rating,Players Best
90,Kyshona Knight,WI,226,379 v Pakistan...
91,Tazmin Brits,SA,224,291 v West Ind...
92,Josephine Nkomo,ZIM,222,"267 v Ireland,..."
93,Rajeshwari Gayakwad,IND,215,229 v South Af...
94,Sophie MacMahon,IRE,213,225 v South Af...
95,Hayley Jensen,NZ,212,"271 v England,..."
96,Frederique Overdijk,NED,210,"210 v Ireland,..."
97,Jemimah Rodriques,IND,210,411 v West Ind...
98,Anushka Sanjeewani,SL,209,222 v Australi...
99,Meghna Singh,IND,208,"212 v England,..."


In [46]:
df.sample(20)

Unnamed: 0,Players Name,Team,Rating,Players Best
90,Kyshona Knight,WI,226,379 v Pakistan...
62,Salma Khatun,BAN,323,366 v South Af...
81,Murshida Khatun,BAN,252,308 v Zimbabwe...
36,Yastika Bhatia,IND,426,"441 v England,..."
82,Shamima Sultana,BAN,248,"248 v England,..."
39,Nida Dar,PAK,411,426 v Sri Lank...
66,Chedean Nation,WI,307,"385 v England,..."
13,Stafanie Taylor,WI,606,766 v Pakistan...
17,Hayley Matthews,WI,567,"591 v India, 1..."
61,Charlotte Dean,ENG,325,"325 v India, 2..."


In [47]:
df.iloc[45:75]

Unnamed: 0,Players Name,Team,Rating,Players Best
45,Lara Goodall,SA,394,"400 v Ireland,..."
46,Nilakshi Silva,SL,384,"384 v India, 0..."
47,Javeria Khan,PAK,382,573 v South Af...
48,Pooja Vastrakar,IND,380,"380 v England,..."
49,Leah Paul,IRE,377,377 v Netherla...
50,Shemaine Campbelle,WI,366,404 v Banglade...
51,Dane van Niekerk,SA,363,677 v Sri Lank...
52,Katherine Brunt,ENG,356,439 v Australi...
53,Brooke Halliday,NZ,347,"449 v England,..."
54,Lauren Down,NZ,346,346 v West Ind...


In [48]:
df.isnull()

Unnamed: 0,Players Name,Team,Rating,Players Best
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False
...,...,...,...,...
95,False,False,False,False
96,False,False,False,False
97,False,False,False,False
98,False,False,False,False


# Q7. Scrap ICC Women's ODI all rounder and Create a Data Frame

In [49]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/womens/player-rankings/odi/all-rounder"
page7 = requests.get(url)

# Check page content
soup = BeautifulSoup(page7.content)

In [50]:
# Create empty list and store data into the list:
players = []
team_name = []
rating = []
best = []

for i in soup.find_all("div", class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all("div", class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace('\n', ''))
    
for i in soup.find_all("div", class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all("span", class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace("\n", ""))
    
for i in soup.find_all("td", class_ = "table-body__cell rankings-table__name name"):
    for j in i.find_all("a"):
        players.append(j.text)
        
for i in soup.find_all("span", class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace("\n", ""))

In [51]:
print(len(players), len(team_name), len(rating), len(best))

20 20 20 20


# Create a DataFrame

In [52]:
all_rounder = pd.DataFrame({})
all_rounder['Players Name'] = players
all_rounder['Team'] = team_name
all_rounder['Rating'] = rating
all_rounder['Best Performance'] = best
all_rounder

Unnamed: 0,Players Name,Team,Rating,Best Performance
0,Hayley Matthews,WI,380,380 v New Zeal...
1,Ellyse Perry,AUS,374,548 v West Ind...
2,Natalie Sciver,ENG,357,395 v South Af...
3,Amelia Kerr,NZ,356,356 v West Ind...
4,Marizanne Kapp,SA,349,419 v West Ind...
5,Deepti Sharma,IND,322,397 v South Af...
6,Ashleigh Gardner,AUS,270,279 v West Ind...
7,Jess Jonassen,AUS,246,308 v West Ind...
8,Jhulan Goswami,IND,214,308 v Australi...
9,Katherine Brunt,ENG,207,296 v Australi...


In [53]:
df = all_rounder
df.head()

Unnamed: 0,Players Name,Team,Rating,Best Performance
0,Hayley Matthews,WI,380,380 v New Zeal...
1,Ellyse Perry,AUS,374,548 v West Ind...
2,Natalie Sciver,ENG,357,395 v South Af...
3,Amelia Kerr,NZ,356,356 v West Ind...
4,Marizanne Kapp,SA,349,419 v West Ind...


In [54]:
df.tail()

Unnamed: 0,Players Name,Team,Rating,Best Performance
15,Sophie Devine,NZ,189,305 v Australi...
16,Salma Khatun,BAN,178,170 v Australi...
17,Chamari Athapaththu,SL,178,187 v South Af...
18,Charlotte Dean,ENG,175,"175 v India, 2..."
19,Sune Luus,SA,171,"223 v Ireland,..."


In [55]:
df.sample(5)

Unnamed: 0,Players Name,Team,Rating,Best Performance
10,Sophie Ecclestone,ENG,201,217 v South Af...
0,Hayley Matthews,WI,380,380 v New Zeal...
11,Rumana Ahmed,BAN,201,198 v South Af...
14,Stafanie Taylor,WI,192,559 v New Zeal...
19,Sune Luus,SA,171,"223 v Ireland,..."


In [56]:
df.iloc[-7:]

Unnamed: 0,Players Name,Team,Rating,Best Performance
13,Chloe-Lesleigh Tryon,SA,197,"197 v England,..."
14,Stafanie Taylor,WI,192,559 v New Zeal...
15,Sophie Devine,NZ,189,305 v Australi...
16,Salma Khatun,BAN,178,170 v Australi...
17,Chamari Athapaththu,SL,178,187 v South Af...
18,Charlotte Dean,ENG,175,"175 v India, 2..."
19,Sune Luus,SA,171,"223 v Ireland,..."


In [57]:
df.notnull()

Unnamed: 0,Players Name,Team,Rating,Best Performance
0,True,True,True,True
1,True,True,True,True
2,True,True,True,True
3,True,True,True,True
4,True,True,True,True
5,True,True,True,True
6,True,True,True,True
7,True,True,True,True
8,True,True,True,True
9,True,True,True,True


# Q8. Scrap ICC Men's T20 Team and Create a Data Frame

In [59]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/team-rankings/t20i"
page8 = requests.get(url)

# Check page content
soup = BeautifulSoup(page8.content)

In [60]:
# Scrap Team Name:
team = soup.find_all("span", class_ = "u-hide-phablet")

team_name = []
for i in team:
    team_name.append(i.text)

In [62]:
# Create empty list and store data into that list:
matches = []
points = []
rating = []
new_list = []

for i in soup.find_all("td", class_ = "rankings-block__banner--matches"):
    matches.append(i.text)
    
for i in soup.find_all("td", class_ = "rankings-block__banner--points"):
    points.append(i.text)
    
for i in soup.find_all("td", class_ = "rankings-block__banner--rating u-text-right"):
    rating.append(i.text.replace("\n", ""))
    
for i in soup.find_all("td", class_ = "table-body__cell u-center-text"):
    new_list.append(i.text)
    
for i in range(0,len(new_list)-1,2):
    matches.append(new_list[i])
    points.append(new_list[i+1])
    
for i in soup.find_all("td", class_ = "table-body__cell u-text-right rating"):
    rating.append(i.text)

In [63]:
print(len(team_name), len(matches), len(points), len(rating))

84 84 84 84


# Create a Data Frame

In [64]:
T20_team = pd.DataFrame({})
T20_team['T20 Team'] = team_name
T20_team['Matches'] = matches
T20_team['Points'] = points
T20_team['Rating'] = rating
T20_team

Unnamed: 0,T20 Team,Matches,Points,Rating
0,India,58,15556,268 ...
1,England,45,11814,263
2,South Africa,39,10135,260
3,Pakistan,51,13067,256
4,New Zealand,46,11663,254
...,...,...,...,...
79,Estonia,12,0,0
80,Lesotho,6,0,0
81,Eswatini,11,0,0
82,Thailand,10,0,0


In [65]:
df = T20_team
df.head(20)

Unnamed: 0,T20 Team,Matches,Points,Rating
0,India,58,15556,268 ...
1,England,45,11814,263
2,South Africa,39,10135,260
3,Pakistan,51,13067,256
4,New Zealand,46,11663,254
5,Australia,45,11304,251
6,West Indies,51,12039,236
7,Sri Lanka,49,11520,235
8,Bangladesh,49,10908,223
9,Afghanistan,29,6312,218


In [66]:
df.tail(20)

Unnamed: 0,T20 Team,Matches,Points,Rating
64,Cyprus,11,283,26
65,Bahamas,11,260,24
66,Hungary,16,358,22
67,Belize,6,132,22
68,Panama,6,125,21
69,Mozambique,15,303,20
70,Rwanda,12,215,18
71,Serbia,17,184,11
72,Seychelles,6,54,9
73,Bulgaria,34,296,9


In [67]:
df.sample(10)

Unnamed: 0,T20 Team,Matches,Points,Rating
45,Portugal,11,644,59
4,New Zealand,46,11663,254
41,Austria,26,1686,65
66,Hungary,16,358,22
2,South Africa,39,10135,260
44,Vanuatu,11,645,59
14,Scotland,24,4373,182
53,Ghana,16,659,41
70,Rwanda,12,215,18
16,Netherlands,30,5157,172


In [68]:
df.iloc[55:75]

Unnamed: 0,T20 Team,Matches,Points,Rating
55,Czech Republic,28,1137,41
56,Indonesia,7,265,38
57,Switzerland,11,396,36
58,Malta,33,1179,36
59,Japan,7,236,34
60,Luxembourg,23,774,34
61,Sierra Leone,10,331,33
62,Bhutan,8,239,30
63,Fiji,6,177,30
64,Cyprus,11,283,26


In [69]:
df.describe()

Unnamed: 0,T20 Team,Matches,Points,Rating
count,84,84,84,84
unique,84,33,79,64
top,UAE,6,0,0
freq,1,8,6,6


# Q9. Scrap ICC Men's T20 Players(Batsman) and Create a Data Frame

In [70]:
# Scrap get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/t20i/batting"
page9 = requests.get(url)

# Check page content
soup = BeautifulSoup(page9.content)

In [71]:
# Create empty list and store data into that list:
players = []
team_name = []
rating = []
best = []

for i in soup.find_all("div", class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all("div", class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace('\n', ''))
    
for i in soup.find_all("div", class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all("span", class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace('\n', ''))
    
for i in soup.find_all("td", class_ = "table-body__cell rankings-table__name name"):
    for j in i.find_all("a"):
        players.append(j.text)
        
for i in soup.find_all("span", class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace('\n', ''))

In [72]:
print(len(players), len(team_name), len(rating), len(best))

100 100 100 100


# Create a Data Frame

In [75]:
T20_players = pd.DataFrame({})
T20_players['T20 Players'] = players
T20_players['Team Name'] = team_name
T20_players['Rating'] = rating
T20_players['Best Performance'] = best
df = T20_players
df

Unnamed: 0,T20 Players,Team Name,Rating,Best Performance
0,Suryakumar Yadav,IND,863,863 v South Af...
1,Mohammad Rizwan,PAK,842,"875 v England,..."
2,Devon Conway,NZ,792,831 v Australi...
3,Babar Azam,PAK,780,"896 v England,..."
4,Aiden Markram,SA,767,"796 v England,..."
...,...,...,...,...
95,Ryan Burl,ZIM,375,"407 v Ireland,..."
96,Muhammad Usman,UAE,375,"490 v Canada, ..."
97,Shimron Hetmyer,WI,374,481 v Australi...
98,Norman Vanua,PNG,371,422 v Singapor...


In [76]:
df.head(20)

Unnamed: 0,T20 Players,Team Name,Rating,Best Performance
0,Suryakumar Yadav,IND,863,863 v South Af...
1,Mohammad Rizwan,PAK,842,"875 v England,..."
2,Devon Conway,NZ,792,831 v Australi...
3,Babar Azam,PAK,780,"896 v England,..."
4,Aiden Markram,SA,767,"796 v England,..."
5,Dawid Malan,ENG,743,915 v South Af...
6,Glenn Phillips,NZ,703,"703 v England,..."
7,Rilee Rossouw,SA,689,692 v Banglade...
8,Aaron Finch,AUS,687,900 v Zimbabwe...
9,Virat Kohli,IND,638,"897 v England,..."


In [77]:
df.tail(20)

Unnamed: 0,T20 Players,Team Name,Rating,Best Performance
80,Sean Williams,ZIM,411,"481 v Ireland,..."
81,Calum MacLeod,SCO,410,"488 v Bermuda,..."
82,J.J. Smit,NAM,409,425 v Sri Lank...
83,Usman Ghani,AFG,407,"426 v Oman, 29..."
84,Matthew Cross,SCO,404,478 v Zimbabwe...
85,Sikandar Raza,ZIM,404,430 v Scotland...
86,Dhananjaya de Silva,SL,401,401 v Afghanis...
87,Kamran Khan,QAT,398,"463 v Bahrain,..."
88,Sese Bau,PNG,393,"427 v Uganda, ..."
89,Stephan Baard,NAM,389,"440 v Uganda, ..."


In [78]:
df.sample(10)

Unnamed: 0,T20 Players,Team Name,Rating,Best Performance
68,Hardik Pandya,IND,441,455 v Pakistan...
69,Lorcan Tucker,IRE,440,440 v Australi...
96,Muhammad Usman,UAE,375,"490 v Canada, ..."
71,Alex Hales,ENG,440,"866 v India, 0..."
10,Quinton de Kock,SA,631,671 v Sri Lank...
12,Muhammad Waseem,UAE,626,703 v Singapor...
14,Rohit Sharma,IND,601,718 v West Ind...
5,Dawid Malan,ENG,743,915 v South Af...
25,Richard Berrington,SCO,558,"587 v Oman, 21..."
23,George Munsey,SCO,572,600 v Netherla...


# Q10. Scrap ICC Men's T20 Bowler and Create a Data Frame

In [79]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/t20i/bowling"
page10 = requests.get(url)

# Check page content
soup = BeautifulSoup(page10.content)

In [82]:
# Create empty list and store data:
players = []
team_name = []
rating = []
best = []

for i in soup.find_all("div", class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all("div", class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace("\n", ""))
    
for i in soup.find_all("div", class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all("span", class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace("\n", ""))
    
for i in soup.find_all("td", class_ = "table-body__cell rankings-table__name name"):
    for k in i.find_all("a"):
        players.append(k.text)
        
for i in soup.find_all("span", class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all("td", class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace("\n", ""))

In [83]:
print(len(players), len(team_name), len(rating), len(best))

100 100 100 100


# Create a Data Frame

In [84]:
T20_bowler = pd.DataFrame({})
T20_bowler['Players Name'] = players
T20_bowler['Team Name'] = team_name
T20_bowler['Rating'] = rating
T20_bowler['Best Performance'] = best
T20_bowler 

Unnamed: 0,Players Name,Team Name,Rating,Best Performance
0,Rashid Khan,AFG,700,816 v Banglade...
1,Wanindu Hasaranga,SL,697,797 v West Ind...
2,Tabraiz Shamsi,SA,694,821 v West Ind...
3,Josh Hazlewood,AUS,692,806 v Sri Lank...
4,Mujeeb Ur Rahman,AFG,687,"745 v Ireland,..."
...,...,...,...,...
95,Roelof van der Merwe,NED,391,516 v Papua Ne...
96,Fabian Allen,WI,387,625 v Australi...
97,Nathan Ellis,AUS,384,"404 v England,..."
98,Barry McCarthy,IRE,384,384 v Australi...


In [85]:
df = T20_bowler
df.head(20)

Unnamed: 0,Players Name,Team Name,Rating,Best Performance
0,Rashid Khan,AFG,700,816 v Banglade...
1,Wanindu Hasaranga,SL,697,797 v West Ind...
2,Tabraiz Shamsi,SA,694,821 v West Ind...
3,Josh Hazlewood,AUS,692,806 v Sri Lank...
4,Mujeeb Ur Rahman,AFG,687,"745 v Ireland,..."
5,Sam Curran,ENG,665,665 v New Zeal...
6,Adam Zampa,AUS,662,743 v Sri Lank...
7,Anrich Nortje,SA,655,"695 v India, 1..."
8,Maheesh Theekshana,SL,653,680 v Pakistan...
9,Mitchell Santner,NZ,651,731 v Pakistan...


In [86]:
df.tail(20)

Unnamed: 0,Players Name,Team Name,Rating,Best Performance
80,J.J. Smit,NAM,427,472 v Afghanis...
81,Hasan Mahmud,BAN,415,415 v Zimbabwe...
82,Hardik Pandya,IND,413,545 v South Af...
83,Wellington Masakadza,ZIM,411,"469 v Ireland,..."
84,Khawar Ali,OMA,410,"530 v Canada, ..."
85,Palaniapan Meiyappan,UAE,407,"407 v Namibia,..."
86,Ruben Trumpelmann,NAM,406,"406 v UAE, 20/..."
87,Rohan Mustafa,UAE,404,499 v Hong Kon...
88,Fayyaz Butt,OMA,403,"413 v Canada, ..."
89,Alasdair Evans,SCO,400,598 v Netherla...


In [87]:
df.iloc[50:75]

Unnamed: 0,Players Name,Team Name,Rating,Best Performance
50,Michael Bracewell,NZ,499,514 v Pakistan...
51,Alzarri Joseph,WI,494,"494 v Ireland,..."
52,Bernard Scholtz,NAM,484,"484 v UAE, 20/..."
53,David Willey,ENG,484,640 v West Ind...
54,Lahiru Kumara,SL,482,"493 v India, 2..."
55,Richard Ngarava,ZIM,479,501 v Scotland...
56,David Wiese,NAM,479,646 v South Af...
57,Chamika Karunaratne,SL,479,498 v Australi...
58,Zahoor Khan,UAE,476,"476 v Namibia,..."
59,Naseem Shah,PAK,474,474 v Netherla...


In [90]:
df.sample(10)

Unnamed: 0,Players Name,Team Name,Rating,Best Performance
22,Ashton Agar,AUS,595,"717 v England,..."
49,Pat Cummins,AUS,505,"610 v England,..."
93,Ahmed Raza,UAE,393,578 v Papua Ne...
3,Josh Hazlewood,AUS,692,806 v Sri Lank...
61,Taskin Ahmed,BAN,470,470 v Zimbabwe...
73,Sandeep Lamichhane,NEP,447,512 v Papua Ne...
18,Shadab Khan,PAK,624,769 v Scotland...
66,Ravi Bishnoi,IND,454,507 v Pakistan...
86,Ruben Trumpelmann,NAM,406,"406 v UAE, 20/..."
74,Odean Smith,WI,446,460 v Zimbabwe...


# Q11. Scrap ICC Men's T20 All Rounder and Create a Data Frame

In [91]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/t20i/all-rounder"
page11 = requests.get(url)

# Check page content
soup = BeautifulSoup(page11.content)

In [93]:
# Create empty list and store data:
players = []
team_name = []
rating = []
best = []

for i in soup.find_all('div', class_ = 'rankings-block__banner--name-large'):
    players.append(i.text)
    
for i in soup.find_all('div', class_ = 'rankings-block__banner--nationality'):
    team_name.append(i.text.replace('\n', ''))
    
for i in soup.find_all('div', class_ = 'rankings-block__banner--rating'):
    rating.append(i.text)
    
for i in soup.find_all('span', class_ = 'rankings-block__career-best-text'):
    best.append(i.text.replace('\n', ''))
    
for i in soup.find_all('td', class_ = 'table-body__cell rankings-table__name name'):
    for u in i.find_all('a'):
        players.append(u.text)
        
for i in soup.find_all('span', class_ = 'table-body__logo-text'):
    team_name.append(i.text)
    
for i in soup.find_all('td', class_ = 'table-body__cell rating'):
    rating.append(i.text)
    
for i in soup.find_all('td', class_ = 'table-body__cell u-text-right u-hide-phablet'):
    best.append(i.text.replace('\n', ''))

In [94]:
print(len(players), len(team_name), len(rating), len(best))

20 20 20 20


# Create a Data Frame

In [95]:
T20_All_Rounser = pd.DataFrame({})
T20_All_Rounser['Player Name'] = players
T20_All_Rounser['Team Name'] = team_name
T20_All_Rounser['Rating'] = rating
T20_All_Rounser['Best Performance'] = best
T20_All_Rounser

Unnamed: 0,Player Name,Team Name,Rating,Best Performance
0,Shakib Al Hasan,BAN,255,408 v Pakistan...
1,Mohammad Nabi,AFG,244,355 v Banglade...
2,Hardik Pandya,IND,182,189 v Pakistan...
3,Moeen Ali,ENG,175,221 v South Af...
4,J.J. Smit,NAM,174,185 v Afghanis...
5,Sikandar Raza,ZIM,172,184 v Scotland...
6,David Wiese,NAM,170,"170 v UAE, 20/..."
7,Wanindu Hasaranga,SL,165,184 v Pakistan...
8,Marcus Stoinis,AUS,164,"164 v Ireland,..."
9,Glenn Maxwell,AUS,159,397 v Sri Lank...


# Q12. Scrap ICC Men's Test Team and Create a Data Frame

In [96]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/team-rankings/test"
page12 = requests.get(url)

# Check page content
soup = BeautifulSoup(page12.content)

In [97]:
# Create empty list and store data:
team = soup.find_all('span', class_ = 'u-hide-phablet')
team_name = []
for i in team:
    team_name.append(i.text)

In [99]:
matches = []
points = []
rating = []
new_list = []

for i in soup.find_all('td', class_ = 'rankings-block__banner--matches'):
    matches.append(i.text)
    
for i in soup.find_all('td', class_ = 'rankings-block__banner--points'):
    points.append(i.text)
    
for i in soup.find_all('td', class_ = 'rankings-block__banner--rating u-text-right'):
    rating.append(i.text.replace('\n', ''))
    
for i in soup.find_all('td', class_ = 'table-body__cell u-center-text'):
    new_list.append(i.text)
    
for i in range(0, len(new_list)-1,2):
    matches.append(new_list[i])
    points.append(new_list[i+1])
    
for i in soup.find_all('td', class_ = 'table-body__cell u-text-right rating'):
    rating.append(i.text)

In [100]:
print(len(team_name), len(matches), len(points), len(rating))

10 10 10 10


# Create a Data Frame

In [101]:
Test = pd.DataFrame({})
Test['Team Name'] = team_name
Test['Matches'] = matches
Test['Points'] = points
Test['Rating'] = rating
Test

Unnamed: 0,Team Name,Matches,Points,Rating
0,Australia,19,2439,128 ...
1,India,29,3318,114
2,South Africa,25,2606,104
3,England,43,4449,103
4,New Zealand,27,2704,100
5,Pakistan,23,2111,92
6,Sri Lanka,23,1916,83
7,West Indies,25,1988,80
8,Bangladesh,22,1047,48
9,Zimbabwe,6,148,25


# Q13. Scrap ICC Men's Test Batsmen and Create a Data Frame

In [102]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/test/batting"
page13 = requests.get(url)

# Check page content
soup = BeautifulSoup(page13.content)

In [103]:
# Create empty list and store data:
players = []
team_name = []
rating = []
best = []

for i in soup.find_all('div', class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all('div', class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace('\n', ''))
    
for i in soup.find_all('div', class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all('span', class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace('\n', ""))
    
for i in soup.find_all('td', class_ = "table-body__cell rankings-table__name name"):
    for l in i.find_all('a'):
        players.append(l.text)
        
for i in soup.find_all('span', class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all('td', class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all('td', class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace('\n', ""))

In [104]:
print(len(players), len(team_name), len(rating), len(best))

100 100 100 100


# Create a Data Frame

In [105]:
test_players = pd.DataFrame({})
test_players['Players Name'] = players
test_players['Teame Name'] = team_name
test_players['Rating'] = rating
test_players['Best Performance'] = best
test_players

Unnamed: 0,Players Name,Teame Name,Rating,Best Performance
0,Joe Root,ENG,887,"923 v India, 0..."
1,Marnus Labuschagne,AUS,885,936 v Pakistan...
2,Babar Azam,PAK,879,879 v Sri Lank...
3,Steve Smith,AUS,848,"947 v England,..."
4,Rishabh Pant,IND,801,"801 v England,..."
...,...,...,...,...
95,Dan Lawrence,ENG,377,442 v West Ind...
96,Haris Sohail,PAK,369,563 v New Zeal...
97,Shamarh Brooks,WI,369,"536 v England,..."
98,Roston Chase,WI,369,626 v Pakistan...


In [106]:
df = test_players
df.head(15)

Unnamed: 0,Players Name,Teame Name,Rating,Best Performance
0,Joe Root,ENG,887,"923 v India, 0..."
1,Marnus Labuschagne,AUS,885,936 v Pakistan...
2,Babar Azam,PAK,879,879 v Sri Lank...
3,Steve Smith,AUS,848,"947 v England,..."
4,Rishabh Pant,IND,801,"801 v England,..."
5,Kane Williamson,NZ,786,919 v Pakistan...
6,Usman Khawaja,AUS,766,779 v Sri Lank...
7,Dimuth Karunaratne,SL,748,782 v Australi...
8,Rohit Sharma,IND,746,"813 v England,..."
9,Jonny Bairstow,ENG,719,772 v South Af...


In [107]:
df.tail()

Unnamed: 0,Players Name,Teame Name,Rating,Best Performance
95,Dan Lawrence,ENG,377,442 v West Ind...
96,Haris Sohail,PAK,369,563 v New Zeal...
97,Shamarh Brooks,WI,369,"536 v England,..."
98,Roston Chase,WI,369,626 v Pakistan...
99,Mitchell Starc,AUS,362,"446 v India, 2..."


In [108]:
df.sample(20)

Unnamed: 0,Players Name,Teame Name,Rating,Best Performance
15,Mohammad Rizwan,PAK,670,700 v Australi...
17,Dean Elgar,SA,665,784 v Australi...
88,Hashmatullah Shaidi,AFG,397,397 v Zimbabwe...
45,Rassie van der Dussen,SA,547,557 v New Zeal...
48,Niroshan Dickwella,SL,534,595 v Zimbabwe...
81,Lahiru Thirimanne,SL,419,498 v Banglade...
16,David Warner,AUS,667,"880 v India, 1..."
96,Haris Sohail,PAK,369,563 v New Zeal...
29,Henry Nicholls,NZ,613,778 v Banglade...
30,Abid Ali,PAK,610,643 v Banglade...


In [109]:
df.iloc[35:55]

Unnamed: 0,Players Name,Teame Name,Rating,Best Performance
35,Ravindra Jadeja,IND,590,"590 v England,..."
36,Shreyas Iyer,IND,585,"585 v England,..."
37,Jermaine Blackwood,WI,583,596 v Banglade...
38,Ajinkya Rahane,IND,577,825 v New Zeal...
39,Cameron Green,AUS,576,607 v Sri Lank...
40,Tamim Iqbal,BAN,572,709 v Australi...
41,Dhananjaya de Silva,SL,570,636 v West Ind...
42,Shakib Al Hasan,BAN,570,694 v Australi...
43,Keegan Petersen,SA,569,"597 v India, 1..."
44,Lokesh Rahul,IND,560,761 v Sri Lank...


# Q14. Scrap ICC Men's Test Bowlers and Create a Data Frame

In [110]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/test/bowling"
page14 = requests.get(url)

# Check page content
soup = BeautifulSoup(page14.content)

In [111]:
# Create empty list and store data:
players = []
team_name = []
rating = []
best = []

for i in soup.find_all('div', class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all('div', class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace('\n', ''))
    
for i in soup.find_all('div', class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all('span', class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace('\n', ""))
    
for i in soup.find_all('td', class_ = "table-body__cell rankings-table__name name"):
    for l in i.find_all('a'):
        players.append(l.text)
        
for i in soup.find_all('span', class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all('td', class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all('td', class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace('\n', ""))

In [112]:
print(len(players), len(team_name), len(rating), len(best))

100 100 100 100


# Create a Data Frame

In [113]:
test_bowlers = pd.DataFrame({})
test_bowlers['Bowler Name'] = players
test_bowlers['Teame Name'] = team_name
test_bowlers['Rating'] = rating
test_bowlers['Best Performance'] = best
test_bowlers

Unnamed: 0,Bowler Name,Teame Name,Rating,Best Performance
0,Pat Cummins,AUS,891,"914 v England,..."
1,Ravichandran Ashwin,IND,842,"904 v England,..."
2,Shaheen Afridi,PAK,828,836 v Sri Lank...
3,Jasprit Bumrah,IND,828,835 v West Ind...
4,James Anderson,ENG,825,"903 v India, 1..."
...,...,...,...,...
95,Dasun Shanaka,SL,190,"269 v India, 2..."
96,Mitchell Swepson,AUS,187,187 v Sri Lank...
97,Mahmudullah,BAN,185,381 v West Ind...
98,Saqib Mahmood,ENG,182,195 v West Ind...


In [114]:
df = test_bowlers
df.head(20)

Unnamed: 0,Bowler Name,Teame Name,Rating,Best Performance
0,Pat Cummins,AUS,891,"914 v England,..."
1,Ravichandran Ashwin,IND,842,"904 v England,..."
2,Shaheen Afridi,PAK,828,836 v Sri Lank...
3,Jasprit Bumrah,IND,828,835 v West Ind...
4,James Anderson,ENG,825,"903 v India, 1..."
5,Kagiso Rabada,SA,799,902 v Australi...
6,Kyle Jamieson,NZ,788,"836 v England,..."
7,Kemar Roach,WI,756,"780 v India, 0..."
8,Neil Wagner,NZ,747,859 v Australi...
9,Mitchell Starc,AUS,739,806 v New Zeal...


In [115]:
df.tail(10)

Unnamed: 0,Bowler Name,Teame Name,Rating,Best Performance
90,Mitchell Santner,NZ,217,344 v West Ind...
91,Craig Overton,ENG,216,"256 v India, 2..."
92,Haris Sohail,PAK,208,298 v Banglade...
93,George Linde,SA,201,227 v Pakistan...
94,Taskin Ahmed,BAN,193,221 v New Zeal...
95,Dasun Shanaka,SL,190,"269 v India, 2..."
96,Mitchell Swepson,AUS,187,187 v Sri Lank...
97,Mahmudullah,BAN,185,381 v West Ind...
98,Saqib Mahmood,ENG,182,195 v West Ind...
99,Sean Williams,ZIM,181,247 v Banglade...


In [116]:
df.iloc[60:80]

Unnamed: 0,Bowler Name,Teame Name,Rating,Best Performance
60,Nauman Ali,PAK,347,420 v West Ind...
61,Praveen Jayawickrama,SL,343,"472 v India, 1..."
62,Jomel Warrican,WI,329,346 v Sri Lank...
63,Vishwa Fernando,SL,328,402 v Banglade...
64,Simon Harmer,SA,325,391 v Banglade...
65,Jofra Archer,ENG,323,465 v Australi...
66,Dominic Bess,ENG,321,"439 v India, 0..."
67,Mohammad Nawaz,PAK,316,316 v Sri Lank...
68,Sikandar Raza,ZIM,315,340 v Sri Lank...
69,Kuldeep Yadav,IND,315,402 v Australi...


In [117]:
df.sample(10)

Unnamed: 0,Bowler Name,Teame Name,Rating,Best Performance
84,Olly Stone,ENG,244,295 v New Zeal...
11,Josh Hazlewood,AUS,737,"863 v India, 0..."
38,Umesh Yadav,IND,537,672 v Banglade...
67,Mohammad Nawaz,PAK,316,316 v Sri Lank...
8,Neil Wagner,NZ,747,859 v Australi...
83,Victor Nyauchi,ZIM,247,327 v Afghanis...
90,Mitchell Santner,NZ,217,344 v West Ind...
5,Kagiso Rabada,SA,799,902 v Australi...
35,Prabath Jayasuriya,SL,550,550 v Pakistan...
87,Joe Root,ENG,227,251 v Australi...


# Q15. Scrap ICC Men's Test All Rounder and Create a Data Frame

In [118]:
# Send get request to the webpage server to get the source code of the page
url = "https://www.icc-cricket.com/rankings/mens/player-rankings/test/all-rounder"
page15 = requests.get(url)

# Check page content
soup = BeautifulSoup(page15.content)

In [119]:
# Create empty list and store data:
players = []
team_name = []
rating = []
best = []

for i in soup.find_all('div', class_ = "rankings-block__banner--name-large"):
    players.append(i.text)
    
for i in soup.find_all('div', class_ = "rankings-block__banner--nationality"):
    team_name.append(i.text.replace('\n', ''))
    
for i in soup.find_all('div', class_ = "rankings-block__banner--rating"):
    rating.append(i.text)
    
for i in soup.find_all('span', class_ = "rankings-block__career-best-text"):
    best.append(i.text.replace('\n', ""))
    
for i in soup.find_all('td', class_ = "table-body__cell rankings-table__name name"):
    for l in i.find_all('a'):
        players.append(l.text)
        
for i in soup.find_all('span', class_ = "table-body__logo-text"):
    team_name.append(i.text)
    
for i in soup.find_all('td', class_ = "table-body__cell rating"):
    rating.append(i.text)
    
for i in soup.find_all('td', class_ = "table-body__cell u-text-right u-hide-phablet"):
    best.append(i.text.replace('\n', ""))

In [120]:
print(len(players), len(team_name), len(rating), len(best))

20 20 20 20


# Create a Data Frame

In [None]:
test_allrounder = pd.DataFrame({})
test_allrounder['Bowler Name'] = players
test_allrounder['Teame Name'] = team_name
test_allrounder['Rating'] = rating
test_bowlers['Best Performance'] = best
test_bowlers