# Scraping Covid-19 statistics using BeautifulSoup

In [1]:
#Importing modules-

import requests
from bs4 import BeautifulSoup

In [2]:
#URL for scraping data-

url = 'https://www.worldometers.info/coronavirus/countries-where-coronavirus-has-spread/'

In [3]:
#Get URL html-

page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')

In [8]:
data = []

In [4]:
print(soup)


<!DOCTYPE html>

<!--[if IE 8]> <html lang="en" class="ie8"> <![endif]-->
<!--[if IE 9]> <html lang="en" class="ie9"> <![endif]-->
<!--[if !IE]><!--> <html lang="en"> <!--<![endif]-->
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>Countries where Coronavirus has spread - Worldometer</title>
<meta content="Where is Coronavirus in the world? List of countries with confirmed cases and deaths (total cumulative number) due to the novel coronavirus COVID-19" name="description"/>
<!-- Favicon -->
<link href="/favicon/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
<link href="/favicon/apple-icon-57x57.png" rel="apple-touch-icon" sizes="57x57"/>
<link href="/favicon/apple-icon-60x60.png" rel="apple-touch-icon" sizes="60x60"/>
<link href="/favicon/apple-icon-72x72.png" rel="apple-touch-icon" sizes="72x72"/>
<link href="/favicon/apple-icon-76x76.png" rel="apple-touch-icon" 

In [5]:
#soup.find_all('td') will scrape every element in the url's table-

data_iterator = iter(soup.find_all('td'))

In [6]:
print(data_iterator)

<list_iterator object at 0x0000020B59BE1CC0>


In [10]:
# Data_iterator is the iterator of the table
# This loop will keep repeating until there is data available in the iterator

while True:
    try:
        country = next(data_iterator).text
        confirmed = next(data_iterator).text.replace(',', '')  #Remove commas from confirmed count
        deaths = next(data_iterator).text.replace(',', '')  #Remove commas from death count
        continent = next(data_iterator).text

        #Convert confirmed and deaths to integers
        
        confirmed = int(confirmed) if confirmed else 0
        deaths = int(deaths) if deaths else 0

        data.append((country, confirmed, deaths, continent))

    #StopIteration error is raised when there are no more elements left to iterate through
    
    except StopIteration:
        break

In [11]:
#Sort the data by the number of confirmed cases-

data.sort(key = lambda row: row[1], reverse = True)

In [12]:
print(data)

[('France', 40118617, 167508, 'Europe'), ('Germany', 38428685, 174352, 'Europe'), ('Brazil', 37639324, 703399, 'South America'), ('Japan (+Diamond Princess)', 33804284, 74707, 'Asia'), ('South Korea', 31904667, 34893, 'Asia'), ('Italy', 25879984, 190625, 'Europe'), ('United Kingdom', 24618436, 226278, 'Europe'), ('Russia', 22949243, 399339, 'Europe'), ('Turkey', 17232066, 102174, 'Asia'), ('Spain', 13890555, 121416, 'Europe'), ('Australia', 11653091, 21258, 'Australia/Oceania'), ('Vietnam', 11618751, 43206, 'Asia'), ('Taiwan', 10239998, 19005, 'Asia'), ('Argentina', 10054251, 130509, 'South America'), ('Netherlands', 8610372, 22992, 'Europe'), ('Mexico', 7627964, 334250, 'North America'), ('Iran', 7612280, 146290, 'Asia'), ('Indonesia', 6810778, 161836, 'Asia'), ('Poland', 6517529, 119622, 'Europe'), ('Colombia', 6369916, 142780, 'South America'), ('Greece', 6095350, 37052, 'Europe'), ('Austria', 6079622, 22522, 'Europe'), ('Portugal', 5590870, 26824, 'Europe'), ('Ukraine', 5557995, 11

### To print the data in human-readable format, we will use the library 'texttable':

In [14]:
#create texttable object

import texttable as tt
table = tt.Texttable()

In [15]:
#Add an empty row at the beginning for the headers-

table.add_rows([(None, None, None, None)] + data)

<texttable.Texttable at 0x20b5cf733a0>

In [17]:
#'l' denotes left, 'c' denotes center and 'r' denotes right-

table.set_cols_align(('c', 'c', 'c', 'c')) 
table.header((' Country ', ' Number of cases ', ' Deaths ', ' Continent '))


print(table.draw())

+---------------------------+-------------------+----------+-------------------+
|          Country          |  Number of cases  |  Deaths  |     Continent     |
|          France           |     40118617      |  167508  |      Europe       |
+---------------------------+-------------------+----------+-------------------+
|          Germany          |     38428685      |  174352  |      Europe       |
+---------------------------+-------------------+----------+-------------------+
|          Brazil           |     37639324      |  703399  |   South America   |
+---------------------------+-------------------+----------+-------------------+
| Japan (+Diamond Princess) |     33804284      |  74707   |       Asia        |
+---------------------------+-------------------+----------+-------------------+
|        South Korea        |     31904667      |  34893   |       Asia        |
+---------------------------+-------------------+----------+-------------------+
|           Italy           