## Web Scraping with Python using requests and BeautifulSoup

### 1. Import the Libraries

In [None]:
# Import requests, BeautifulSoup and Pandas libraries
import requests
import bs4
import pandas as pd

## I. Scrape data from National Weather Service website

### 1. Access and get HTML content from a website

In [None]:
# Generate URL request and retrieve HTML content (Response object from the URL)
url = "https://www.weather.gov"
response = requests.get(url)

In [None]:
# Print out the status code value
print(response.status_code)

200


In [None]:
# Print out the HTML content of the web page in unicode (Binary Response content)
response.text

In [None]:
# Length of the HTML content of the web page
len(response.text)

148442

In [None]:
# Save the HTML content of the web page to a file
with open('weather_webpage.html', 'w', encoding="utf-8") as f:
    f.write(response.text)

### 2. Parse a web page

In [None]:
# Parse the HTML content and return BeautifulSoup object
soup = bs4.BeautifulSoup(response.text, 'lxml')

In [None]:
# Print out a nicely formatted HTML content of the web page
print(soup.prettify())

#### 2.1. Extract web 'head' tag

In [None]:
# Select the head tag of a web page
head_tag = soup.select('head')

In [None]:
# Length of the head tags list
len(head_tag)

1

In [None]:
# Select the first element from the list of all "head" elements
head_tag[0]

#### 2.2. Extract web 'body' tag

In [None]:
# Select the body tag of a web page
body_tag = soup.select('body')

In [None]:
# # Length of the body tags list
len(body_tag)

1

In [None]:
# Select the first element from the list of all "body" elements
body_tag[0]

#### 2.3. Extract web 'title' tag

In [None]:
# Select the title tag of a web page
title_tag = soup.select('title')

In [None]:
# Length of a title tag
len(title_tag)

1

In [None]:
# Select the first element from the list of all "title" elements
(title_tag)[0]

<title>National Weather Service</title>

In [None]:
# Grab a text between tags of "title" element
(title_tag)[0].getText()

'National Weather Service'

## II. Scrape data from World Population Clock website

### 1. Access and get HTML content from a website

In [None]:
# Generate URL request and retrieve HTML content (Response object from the URL)
url = "https://www.worldometers.info/world-population/"
response = requests.get(url)

In [None]:
# Print out the status code value
print(response.status_code)

200


In [None]:
# Print out the HTML content of the web page in unicode (Binary Response content)
response.text

In [None]:
# Length of the HTML content of the web page
len(response.text)

137270

In [None]:
# Save the HTML content of the web page to a file
with open('worldometers_webpage.html', 'w', encoding="utf-8") as f:
    f.write(response.text)

### 2. Parse a web page

In [None]:
# Parse the HTML content and return BeautifulSoup object
soup = bs4.BeautifulSoup(response.text, 'lxml')

In [None]:
# Print out a nicely formatted HTML content of the web page
print(soup.prettify())

#### 2.1. Extract web 'h2' tags

In [None]:
# Select the h2 tags of a web page
h2_tags = soup.select('h2')

or

In [None]:
# Find the instances of a h2 tag
h2_tags = soup.find_all('h2')

In [None]:
# Length of the h2 tags list
len(h2_tags)

12

In [None]:
# Print out the h2 tags
h2_tags

[<h2>World Population: Past, Present, and Future</h2>,
 <h2>Growth Rate</h2>,
 <h2>World Population (2020 and historical)</h2>,
 <h2>World Population Forecast (2020-2050) </h2>,
 <h2>World Population Milestones </h2>,
 <h2>World Population by Region </h2>,
 <h2>World Population Density (people/km<sup>2</sup>)</h2>,
 <h2>World Population by Religion </h2>,
 <h2>World Population by Country</h2>,
 <h2>How many people have ever lived on earth?</h2>,
 <h2>World Population clock: sources and methodology </h2>,
 <h2>Why Worldometer clocks are the most accurate</h2>]

In [None]:
# Grab the first element from the list of all "h2" elements
h2_tags[0]

<h2>World Population: Past, Present, and Future</h2>

In [None]:
# Grab a text between tags of "h2" element
h2_tags[0].getText()

'World Population: Past, Present, and Future'

or

In [None]:
# Grab a text between all tags of "h2" element
for h2_tag in soup.find_all('h2'):
    print(h2_tag.getText())

World Population: Past, Present, and Future
Growth Rate
World Population (2020 and historical)
World Population Forecast (2020-2050) 
World Population Milestones 
World Population by Region 
World Population Density (people/km2)
World Population by Religion 
World Population by Country
How many people have ever lived on earth?
World Population clock: sources and methodology 
Why Worldometer clocks are the most accurate


#### 2.2. Extract web 'p' tags

In [None]:
# Select the p tags of a web page
p_tags = soup.select('p')

or

In [None]:
# Find the instances of a p tag
p_tags = soup.find_all('p')

In [None]:
# # Length of the p tags list
len(p_tags)

22

In [None]:
# Print out the p tags
p_tags

In [None]:
# Grab the third element from the list of all "p" elements
p_tags[2]

<p>At the dawn of agriculture, about 8000 B.C., the population of the world was approximately 5 million. Over the 8,000-year period up to 1 A.D. it grew to 200 million (some estimate 300 million or even 600, suggesting how imprecise population estimates of early historical periods can be), with a growth rate of under 0.05% per year.</p>

In [None]:
# Grab a text between tags of "p" element
p_tags[2].getText()

'At the dawn of agriculture, about 8000 B.C., the population of the world was approximately 5 million. Over the 8,000-year period up to 1 A.D. it grew to 200 million (some estimate 300 million or even 600, suggesting how imprecise population estimates of early historical periods can be), with a growth rate of under 0.05% per year.'

or

In [None]:
# Grab a text between all tags of "p" element
for p_tag in soup.find_all('p'):
    print(p_tag.getText())

#### 2.3. Extract web 'a' tags

In [None]:
# Grab a link between tags of "a" element
a_tags = soup.select('a')

or

In [None]:
# Get all links on the web page
a_tags = soup.find_all('a')

In [None]:
# # Length of the a tags list
len(a_tags)

351

In [None]:
# Print out the a tags
a_tags

In [None]:
# Grab the fifth element from the list of all "a" elements
a_tags[4]

<a href="/population/">Population</a>

In [None]:
# Grab the fifth element from the list of all "a" elements and attribute 'href'
a_tags[4]['href']

'/population/'

In [None]:
# Grab a text between tags of "a" element
a_tags[4].getText()

'Population'

or

In [None]:
# Grab a text between all tags of "a" element and attribute 'href'
for a_tag in soup.find_all('a'):
    print(a_tag.get('href'))

In [None]:
# Grab a text between all tags of "a" element
for a_tag in soup.find_all('a'):
    print(a_tag.getText())

#### 2.4. Extract web 'div' tags

In [None]:
# Grab a division between tags of "div" element
div_tags = soup.select('div')

or

In [None]:
# Get all divisions on the web page
div_tags = soup.find_all('div')

In [None]:
# Length of the div tags list
len(div_tags)

179

In [None]:
# Print out the div tags
div_tags

In [None]:
# Grab the first element from the list of all "div" elements
div_tags[0]

<div class="navbar navbar-default"> <div class="container"> <div class="navbar-header"> <div class="logo"><a class="navbar-brand" href="/"><img border="0" src="/img/worldometers-logo.gif" title="Worldometer"/></a></div> <button class="navbar-toggle" data-target="#navbar-main" data-toggle="collapse" type="button"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> </div> <div class="navbar-collapse collapse" id="navbar-main"> <ul class="nav navbar-nav"> <li><a href="/coronavirus/"><span style="color:#FF9900; font-weight:bold">Coronavirus</span></a></li> <li><a href="/population/">Population</a></li> </ul> </div> </div></div>

In [None]:
# Grab a text between tags of "div" element
div_tags[0].getText()

'            Coronavirus Population   '

or

In [None]:
# Grab a text between all tags of "div" element
for div_tag in soup.find_all('div'):
    print(div_tag.getText())

##### using 'class' attribute

In [None]:
# Select all elements of a class
div_tags = soup.select(".sec-text")

or

In [None]:
# Find the element from the list of all "div" elements and CSS .class
div_tags = soup.find_all('div', class_="sec-text")

In [None]:
# Print out the div tags and CSS .class
div_tags

[<div class="sec-text">Births today </div>,
 <div class="sec-text">Deaths today </div>,
 <div class="sec-text">Population Growth today </div>,
 <div class="sec-text">Births this year </div>,
 <div class="sec-text">Deaths this year </div>,
 <div class="sec-text">Population Growth this year </div>]

In [None]:
# Grab the third element from the list of all "div" elements and CSS #class
div_tags[2]

<div class="sec-text">Population Growth today </div>

In [None]:
# Grab a text between tags of "div" element and CSS .class
div_tags[2].getText()

'Population Growth today '

or

In [None]:
# Grab a text between all tags of "div" element and CSS .class
for div_tag in soup.find_all('div', class_="sec-text"):
    print(div_tag.getText())

Births today 
Deaths today 
Population Growth today 
Births this year 
Deaths this year 
Population Growth this year 


##### using 'id' attribute

In [None]:
# Select all elements of a id
div_tags = soup.select("#maincounter-wrap")

or

In [None]:
# Find the element from the list of all "div" elements and CSS #id
div_tags = soup.find_all('div', id="maincounter-wrap")

In [None]:
# Print out the div tags and CSS #id
div_tags

[<div id="maincounter-wrap"> <h1> Current World Population</h1> <div class="maincounter-number"> <span class="rts-counter" rel="current_population">retrieving data... </span></div> </div>]

In [None]:
# Grab the first element from the list of all "div" elements and CSS #id
div_tags[0]

<div id="maincounter-wrap"> <h1> Current World Population</h1> <div class="maincounter-number"> <span class="rts-counter" rel="current_population">retrieving data... </span></div> </div>

In [None]:
# Grab a text between tags of "div" element and CSS #id
div_tags[0].getText()

'  Current World Population  retrieving data...  '

or

In [None]:
# Grab a text between all tags of "div" element and CSS #id
for div_tag in soup.find_all('div', id="maincounter-wrap"):
    print(div_tag.getText())

  Current World Population  retrieving data...  


## III. Scrape data from Data scraping Wikipedia website

### 1. Access and get HTML content from a website

In [None]:
# Generate URL request and retrieve HTML content (Response object from the URL)
url = "https://en.wikipedia.org/wiki/Data_scraping"
response = requests.get(url)

In [None]:
# Print out the status code value
print(response.status_code)

200


In [None]:
# Print out the HTML content of the web page in unicode (Binary Response content)
response.text

In [None]:
# Length of the HTML content of the web page
len(response.text)

86056

In [None]:
# Save the HTML content of the web page to a file
with open('data_webpage.html', 'w', encoding="utf-8") as f:
    f.write(response.text)

### 2. Parse a web page

In [None]:
# Parse the HTML content and return BeautifulSoup object
soup = bs4.BeautifulSoup(response.text, 'lxml')

In [None]:
# Print out a nicely formatted HTML content of the web page
print(soup.prettify())

#### 2.1. Extract web 'img' tag

In [None]:
# Select all images
img_tags = soup.select('img')

In [None]:
# Length of the img tags list
len(img_tags)

6

In [None]:
# Print out the img tags
img_tags

[<img alt="" data-file-height="399" data-file-width="512" decoding="async" height="39" src="//upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/50px-Question_book-new.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/75px-Question_book-new.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/100px-Question_book-new.svg.png 2x" width="50"/>,
 <img alt="vectorial version" data-file-height="496" data-file-width="496" decoding="async" height="150" src="//upload.wikimedia.org/wikipedia/commons/thumb/c/c5/CIAJMK1209-en.svg/150px-CIAJMK1209-en.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/c/c5/CIAJMK1209-en.svg/225px-CIAJMK1209-en.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/c/c5/CIAJMK1209-en.svg/300px-CIAJMK1209-en.svg.png 2x" width="150"/>,
 <img alt="" class="thumbimage" data-file-height="195" data-file-width="570" decoding="async" height="130" src="//upload.wikimedia.org/w

In [None]:
# Select all elements of a class
img_tags = soup.select('.thumbimage')

In [None]:
# Call the variable
img_tags

[<img alt="" class="thumbimage" data-file-height="195" data-file-width="570" decoding="async" height="130" src="//upload.wikimedia.org/wikipedia/commons/thumb/d/db/Screen-Scraping-OCRget.jpg/380px-Screen-Scraping-OCRget.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/d/db/Screen-Scraping-OCRget.jpg 1.5x" width="380"/>]

In [None]:
# Grab the first element from the list of all "img" elements and CSS .class
img_tags[0]

<img alt="" class="thumbimage" data-file-height="195" data-file-width="570" decoding="async" height="130" src="//upload.wikimedia.org/wikipedia/commons/thumb/d/db/Screen-Scraping-OCRget.jpg/380px-Screen-Scraping-OCRget.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/d/db/Screen-Scraping-OCRget.jpg 1.5x" width="380"/>

In [None]:
# Grab the first element from the list of all "img" elements, 'alt' attribute and CSS .class
img_tags[0]['alt']

''

In [None]:
# Grab the first element from the list of all "img" elements, 'src' attribute and CSS .class
img_tags[0]['src']

'//upload.wikimedia.org/wikipedia/commons/thumb/d/db/Screen-Scraping-OCRget.jpg/380px-Screen-Scraping-OCRget.jpg'

or

In [None]:
# Grab a text between all tags of "div" element and alt' attribute
for img_tag in soup.find_all('img'):
    print(img_tag['alt'])


vectorial version


Wikimedia Foundation
Powered by MediaWiki


In [None]:
# Grab a text between all tags of "div" element and 'src' attribute
for img_tag in soup.find_all('img'):
    print(img_tag['src'])

//upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/50px-Question_book-new.svg.png
//upload.wikimedia.org/wikipedia/commons/thumb/c/c5/CIAJMK1209-en.svg/150px-CIAJMK1209-en.svg.png
//upload.wikimedia.org/wikipedia/commons/thumb/d/db/Screen-Scraping-OCRget.jpg/380px-Screen-Scraping-OCRget.jpg
//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1
/static/images/footer/wikimedia-button.png
/static/images/footer/poweredby_mediawiki_88x31.png


In [None]:
# Create an object that contains the information from the website
image_link = requests.get("https://upload.wikimedia.org/wikipedia/commons/thumb/d/db/Screen-Scraping-OCRget.jpg/380px-Screen-Scraping-OCRget.jpg")

<img src="//upload.wikimedia.org/wikipedia/commons/thumb/d/db/Screen-Scraping-OCRget.jpg/380px-Screen-Scraping-OCRget.jpg">

In [None]:
# Access to the raw bytes of the response payload
image_link.content

In [None]:
# Open a new .jpg file
f = open('E:/Dragan/Programiranje/Web scraping/Screen_scraping.jpg', 'wb')

In [None]:
# Write into a .jpeg file
f.write(image_link.content)

10179

In [None]:
# Close a file
f.close()

## IV. Scrape data from Books to Scrape website

### 1. Access and get HTML content from a website

In [None]:
# Generate URL request and retrieve HTML content (Response object from the URL)
url = "http://quotes.toscrape.com"
response = requests.get(url)

In [None]:
# Print out the status code value
print(response.status_code)

200


In [None]:
# Print out the HTML content of the web page in unicode (Binary Response content)
response.text

In [None]:
# Length of the HTML content of the web page
len(response.text)

11010

In [None]:
# Save the HTML content of the web page to a file
with open('quotes_webpage.html', 'w', encoding="utf-8") as f:
    f.write(response.text)

### 2. Parse a web page

In [None]:
# Parse the HTML content and return BeautifulSoup object
soup = bs4.BeautifulSoup(response.text, 'lxml')

In [None]:
# Print out a nicely formatted HTML content of the web page
print(soup.prettify())

#### 2.1. Extract elements across multiple web pages

In [None]:
# Set a variable
page_still_valid = True

# Create an empry set
quotes_list = []

# Set a variable
page = 1

In [None]:
# Loop through unknown number of pages
while page_still_valid:

    # Website specifically designed for people to scrape it
    url = 'http://quotes.toscrape.com/page/'

    # Create a base URL to insert a string version of a number in order to loop through every page on the website
    base_url = url + str(page)
    
    # Create an object that contains the information from the website
    response = requests.get(base_url)
    
    # Check to it is the last page
    if "No quotes found!" in response.text:
        break
    
    # Create an object using content of the response (HTML text file) and engine used to parse through the HTML text file
    soup = bs4.BeautifulSoup(response.text,"lxml")
    
    # Select, grab and print a text between tags of all elements of a class
    for quote in soup.select('.text'):
    
    # Add an element to a list
      quotes_list.append(quote.text)
        
    # Go to Next page
    page += 1

In [None]:
# Call the variable
quotes_list

In [None]:
# Length of the quotes list
len(quotes_list)

100

## V. Scrape data from Worldometers website

### 1. Access and get HTML content from a website

In [None]:
# Generate URL request and retrieve HTML content (Response object from the URL)
url = "https://www.worldometers.info/coronavirus/"
response = requests.get(url)

In [None]:
# Print out the status code value
print(response.status_code)

200


In [None]:
# Print out the HTML content of the web page in unicode (Binary Response content)
response.text

'\n<!DOCTYPE html>\n<!--[if IE 8]> <html lang="en" class="ie8"> <![endif]-->\n<!--[if IE 9]> <html lang="en" class="ie9"> <![endif]-->\n<!--[if !IE]><!-->\n<html lang="en">\n<!--<![endif]-->\n<head>\n<meta charset="utf-8">\n<meta http-equiv="X-UA-Compatible" content="IE=edge">\n<meta name="viewport" content="width=device-width, initial-scale=1">\n<title>COVID Live - Coronavirus Statistics - Worldometer</title>\n<meta name="description" content="Live statistics and coronavirus news tracking the number of confirmed cases, recovered patients, tests, and death toll due to the COVID-19 coronavirus from Wuhan, China. Coronavirus counter with new cases, deaths, and number of tests per 1 Million population. Historical data and info. Daily charts, graphs, news and updates">\n\n<link rel="shortcut icon" href="/favicon/favicon.ico" type="image/x-icon">\n<link rel="apple-touch-icon" sizes="57x57" href="/favicon/apple-icon-57x57.png">\n<link rel="apple-touch-icon" sizes="60x60" href="/favicon/apple

In [None]:
# Length of the HTML content of the web page
len(response.text)

1591275

In [None]:
# Save the HTML content of the web page to a file
with open('world_meters_webpage.html', 'w', encoding="utf-8") as f:
    f.write(response.text)

### 2. Parse a web page

In [None]:
# Parse the HTML content and return BeautifulSoup object
soup = bs4.BeautifulSoup(response.text, 'lxml')

In [None]:
# Print out a nicely formatted HTML content of the web page
print(soup.prettify())

#### 2.1. Extract web 'table' tag and save data to .csv file

In [None]:
# Find the first element from the list of all "table" elements and CSS .class
table = soup.find("table", id="main_table_countries_today")

In [None]:
# Call the variable
table

In [None]:
# Create an empty list
headers = []

# Find all columns with 'th' attribute to extract header row and fill the empty headers list with each column
for column in table.find_all("th"):
  title = column.text
  headers.append(title)

In [None]:
# Create a Pandas DataFrame
worldometers_df = pd.DataFrame(columns = headers)

In [None]:
# Loop through to fill the Pandas DataFrame with information
for j in table.find_all("tr")[1:]:
  row_data = j.find_all("td")
  row = [column.text for column in row_data]
  length = len(worldometers_df)
  worldometers_df.loc[length] = row

In [None]:
# Show the first 10 rows
worldometers_df.head(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,,\nNorth America\n,116321752,,1538890,,111091329,2978.0,3691533,8115,...,,,,North America,\n,,,,,
1,,\nAsia\n,189652997,118877.0,1479603,237.0,183788585,66794.0,4384809,10850,...,,,,Asia,\n,,,,,
2,,\nEurope\n,226902293,45625.0,1919533,145.0,220507760,103171.0,4475000,9314,...,,,,Europe,\n,,,,,
3,,\nSouth America\n,64077512,,1329360,,62348474,2325.0,399678,10273,...,,,,South America,\n,,,,,
4,,\nOceania\n,12364054,1652.0,20730,21.0,12199220,,144104,104,...,,,,Australia/Oceania,\n,,,,,
5,,\nAfrica\n,12644904,,257605,,11987562,,399737,1016,...,,,,Africa,\n,,,,,
6,,\n\n,721,,15,,706,,0,0,...,,,,,\n,,,,,
7,,World,621964233,166154.0,6545736,403.0,601923636,175268.0,13494861,39672,...,,,,All,\n,,,,,
8,1.0,USA,98104113,,1083798,,94855749,,2164566,2911,...,1116585485.0,3332495.0,335059865.0,North America,3,309.0,0.0,,,6460.0
9,2.0,India,44583360,,528611,,44013999,,40750,698,...,894416853.0,634182.0,1410347555.0,Asia,32,2668.0,2.0,,,29.0


In [None]:
# Show the last 10 rows
worldometers_df.tail(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
236,229.0,MS Zaandam,9,,2,,7,,0,,...,,,,,,,,,,
237,230.0,China,250293,170.0,5226,,242070,164.0,2997,41.0,...,160000000.0,111163.0,1439323776.0,Asia,5751,275416.0,9.0,0.1,,2.0
238,,Total:,116321752,,1538890,,111091329,,3691533,8115.0,...,,,,North America,,,,,,
239,,Total:,189652997,118877.0,1479603,237.0,183788585,66794.0,4384809,10850.0,...,,,,Asia,,,,,,
240,,Total:,226902293,45625.0,1919533,145.0,220507760,103171.0,4475000,9314.0,...,,,,Europe,,,,,,
241,,Total:,64077512,,1329360,,62348474,,399678,10273.0,...,,,,South America,,,,,,
242,,Total:,12364054,1652.0,20730,21.0,12199220,0.0,144104,104.0,...,,,,Australia/Oceania,,,,,,
243,,Total:,12644904,,257605,,11987562,,399737,1016.0,...,,,,Africa,,,,,,
244,,Total:,721,,15,,706,,0,0.0,...,,,,,,,,,,
245,,Total:,621964233,166154.0,6545736,403.0,601923636,175268.0,13494861,39672.0,...,,,,All,\n,,,,,


In [None]:
# Drop the rows
worldometers_df.drop(worldometers_df.index[0:7], inplace = True)

In [None]:
# Drop the rows
worldometers_df.drop(worldometers_df.index[231:240], inplace = True)

In [None]:
# Reset the index of the DataFrame, and use the default one instead
worldometers_df.reset_index(inplace = True, drop = True)

In [None]:
# Drop the “#” column
worldometers_df.drop("#", inplace = True, axis=1)

In [None]:
# Show the first 10 rows
worldometers_df.head(10)

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,...,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,World,621964233,166154.0,6545736,403.0,601923636,175268.0,13494861,39672,79792,...,,,,All,\n,,,,,
1,USA,98104113,,1083798,,94855749,,2164566,2911,292796,...,1116585485.0,3332495.0,335059865.0,North America,3,309.0,0.0,,,6460.0
2,India,44583360,,528611,,44013999,,40750,698,31612,...,894416853.0,634182.0,1410347555.0,Asia,32,2668.0,2.0,,,29.0
3,France,35291584,,155045,,34508061,,628478,869,538011,...,271490188.0,4138796.0,65596411.0,Europe,2,423.0,0.0,,,9581.0
4,Brazil,34696863,,685978,,33838636,,172249,8318,160680,...,63776166.0,295345.0,215938217.0,South America,6,315.0,3.0,,,798.0
5,Germany,33216006,,149808,,32284200,32300.0,781998,1406,393634,...,122332384.0,1449730.0,84382898.0,Europe,3,563.0,1.0,,,9267.0
6,S. Korea,24740635,30846.0,28364,46.0,23976569,28168.0,735702,363,481637,...,15804065.0,307665.0,51367817.0,Asia,2,1811.0,3.0,600.0,0.9,14322.0
7,UK,23621952,,189919,,23344215,4230.0,87818,146,343925,...,522526476.0,7607757.0,68683378.0,Europe,3,362.0,0.0,,,1279.0
8,Italy,22395282,,177024,,21758885,,459373,139,371625,...,246438376.0,4089373.0,60263118.0,Europe,3,340.0,0.0,,,7623.0
9,Japan,21229216,42173.0,44678,127.0,20380089,,804449,204,169007,...,76201407.0,606645.0,125611198.0,Asia,6,2811.0,2.0,336.0,1.0,6404.0


In [None]:
# Show the last 10 rows
worldometers_df.tail(10)

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,...,TotalTests,Tests/\n1M pop\n,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
221,Saint Helena,1355,,,,2.0,,1353,,221514.0,...,,,6117.0,Africa,5.0,,,,,221187.0
222,Macao,793,,6.0,,787.0,,0,,1185.0,...,7850.0,11732.0,669133.0,Asia,844.0,111522.0,85.0,,,
223,Wallis and Futuna,761,,7.0,,438.0,,316,,70463.0,...,20508.0,1898889.0,10800.0,Australia/Oceania,14.0,1543.0,1.0,,,29259.0
224,Diamond Princess,712,,13.0,,699.0,,0,,,...,,,,,,,,,,
225,Niue,80,,,,80.0,,0,,48455.0,...,,,1651.0,Australia/Oceania,21.0,,,,,
226,Vatican City,29,,,,29.0,,0,,36025.0,...,,,805.0,Europe,28.0,,,,,
227,Tuvalu,20,,,,,,20,,1651.0,...,,,12117.0,Australia/Oceania,606.0,,,,,1651.0
228,Western Sahara,10,,1.0,,9.0,,0,,16.0,...,,,630195.0,Africa,63020.0,630195.0,,,,
229,MS Zaandam,9,,2.0,,7.0,,0,,,...,,,,,,,,,,
230,China,250293,170.0,5226.0,,242070.0,164.0,2997,41.0,174.0,...,160000000.0,111163.0,1439323776.0,Asia,5751.0,275416.0,9.0,0.1,,2.0


In [None]:
# Export the Pandas DataFrame to .csv file
worldometers_df.to_csv("covid_data.csv", index = False)

In [None]:
# Read the .csv file
worldometers_csv = pd.read_csv("covid_data.csv")