# HW13 - Worldometers Coronavirus

Importing libraries:

In [8]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import sqlite3
import pandas as pd

Getting the website HTML and finding the table with the countries:

In [2]:
source = requests.get("https://www.worldometers.info/coronavirus/").text

soup = BeautifulSoup(source, 'lxml')
table = soup.find("table", id="main_table_countries_today" )

In [161]:
type(table)

bs4.element.Tag

First, we get nicely formatted headers:

In [4]:
# Getting the headers
col_names = []
for i in table.find_all("th"):
    col_names.append("_".join(i.get_text(strip=True, separator="\n").replace(" ", "_").splitlines()))

# Correcting some reading error
col_names[col_names.index('Tot\xa0Cases/_1M_pop')] = 'Tot._Cases/_1M_pop'
# There are some remnant column names in the end...
col_names = col_names[:-6]

['#',
 'Country,_Other',
 'Total_Cases',
 'New_Cases',
 'Total_Deaths',
 'New_Deaths',
 'Total_Recovered',
 'New_Recovered',
 'Active_Cases',
 'Serious,_Critical',
 'Tot._Cases/_1M_pop',
 'Deaths/_1M_pop',
 'Total_Tests',
 'Tests/_1M_pop',
 'Population',
 'Continent']

In [11]:
def pd_to_sql(pd_df, path_to_db):
    
    # Creating connection and database
    con = sqlite3.connect(path_to_db)
    
    with con:
        # Converting pd dataframe to SQL
        pd_df.to_sql(name=path_to_db, con=con, if_exists='replace', index = False) # one could also append here

In [6]:
# Index
ind = 0

# List of dictionaries corresponding to the continents
data_cont = []
for i in table.find_all("tr", class_="total_row_world"):
    # Numerating from 1
    ind += 1
    # Separating the text in the row
    dt = i.text.strip().split("\n")
    # Continent name
    continent = dt[0]
    # Creating a dictionary from the rest of the values
    dic_inf = dict(zip(col_names[2:], dt[2:]))
    # Trying to convert all the numbers to integers
    for key, value in dic_inf.items():
        try:
            dic_inf[key] = int(value.replace(",", ""))
        except ValueError:
            pass
    # Encompassing dictionary for each row
    dic = {
        "#": ind,
        "Continent,Other": continent,
        **dic_inf
    }
    data_cont.append(dic)


In [9]:
# Dropping one random weird row (7)
cont = pd.DataFrame(data_cont).set_index("#").drop(7)

# Reindexing as on the website
cont.index = np.arange(1, len(cont) + 1)
cont.index.name = "#"

Unnamed: 0_level_0,"Continent,Other",Total_Cases,New_Cases,Total_Deaths,New_Deaths,Total_Recovered,New_Recovered,Active_Cases,"Serious,_Critical",Tot._Cases/_1M_pop,Deaths/_1M_pop,Total_Tests,Tests/_1M_pop,Population,Continent
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,North America,95872928,6060,1427708,244.0,69510210,5878.0,24935010.0,9437.0,,,,,,North America
2,Asia,129601984,726095,1378818,1171.0,113874613,134429.0,14348553.0,28530.0,,,,,,Asia
3,South America,55419552,32,1269479,,50459110,,3690963.0,12309.0,,,,,,South America
4,Europe,167715318,225466,1746866,1113.0,148535948,475852.0,17432504.0,11632.0,,,,,,Europe
5,Oceania,4463110,72397,8378,37.0,3852697,17676.0,602035.0,151.0,,,,,,Australia/Oceania
6,Africa,11649905,568,251870,5.0,10814082,1649.0,583953.0,1506.0,,,,,,Africa
7,World,1030618,6083134,2570,397047366.0,635461,61593018.0,63565.0,59620.0,780.4,,,,All,


In [12]:
#Creating databases

pd_to_sql(cont, "cont.db")

In [19]:
ind = 0
country_data = []
for i in table.tbody.find_all("tr")[8:]:
    ind += 1
    dt = i.text.strip().split("\n")
    continent = dt[1]
    dic_inf = dict(zip(col_names[2:], dt[2:]))
    for key, value in dic_inf.items():
        try:
            dic_inf[key] = int(value.replace(",", ""))
        except ValueError:
            pass
    dic = {
        "#": ind,
        "Country,Other": continent,
        **dic_inf
    }
    
    country_data.append(dic)

In [20]:
country = pd.DataFrame(country_data).set_index("#")
pd_to_sql(country, "country.db")

## Creating a Class to handle a new country that could be inserted

In [66]:
class COVID_Country:
    """A Sample COVID_Country Class for COVID statistics
    It can be used to create a new object, country, and add it to the table
    """

    def __init__(self, country, total_cases, new_cases, total_deaths, new_deaths, total_recovered, new_recovered,
                 active_cases, ser_cr, cas_p_mil, death_p_mil, tot_test, test_p_mil, pop, continent):
        """
        Initiating the class
        @param country: str, country
        @param total_cases: int, total cases
        @param new_cases: int, new cases
        @param total_deaths: int, total deaths
        @param new_deaths: int, new deaths
        @param total_recovered: int, total recovered
        @param new_recovered: int, new, recovered
        @param active_cases: int, active cases
        @param ser_cr: int, serious or critical cases
        @param cas_p_mil: int, cases per million
        @param death_p_mil: int, deaths per million
        @param tot_test: int, total tests
        @param test_p_mil: int, tests per million
        @param pop: int, population size
        @param continent: str, continent of the country
        """
        self.country = country
        self.total_cases = total_cases
        self.new_cases = new_cases
        self.total_deaths = total_deaths
        self.new_deaths = new_deaths
        self.total_recovered = total_recovered
        self.new_recovered = new_recovered
        self.active_cases = active_cases
        self.ser_cr = ser_cr
        self.cas_p_mil = cas_p_mil
        self.death_p_mil = death_p_mil
        self.tot_test = tot_test
        self.test_p_mil = test_p_mil
        self.pop = pop
        self.continent = continent

    def __repr__(self):
        """
        Class representation when printed
        @return: None
        """
        return f"Country {self.country} with total cases of {self.total_cases} and population of {self.pop}"


In [67]:
utopia = COVID_Country("utopia", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3500000, "dreams")

## Creating some functions to play with the database

In [69]:
def insert_country(country):
    """
    Inserts a new COVID_Country object into the database
    @param country: COVID_Country object
    @return: None
    """
    con = sqlite3.connect("country.db")
    cur = con.cursor()
    with con:
        query = "INSERT INTO `country.db` VALUES (" +("?,"*15)[:-1] + ")"
        cur.execute(query, tuple(country.__dict__.values()))

In [171]:
insert_country(utopia)

INSERT INTO `country.db` VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)


In [165]:
def get_stat_by_country(country_name, col_names):
    """
    Gets statistics (row) about the country of the choice
    @param country_name: str, name of the country
    @param col_names: , list of column names
    @return: list, statistics about the country
    """
    con = sqlite3.connect("country.db")
    cur = con.cursor()
    with con:
        query = "SELECT * FROM `country.db` WHERE `Country,Other`=:country"
        cur.execute(query, {'country': country_name})
        stat_dic = dict(zip(col_names, *cur.fetchall()))
        return stat_dic

In [166]:
get_stat_by_country("Georgia", col_names)

{'#': 'Georgia',
 'Country,_Other': 1639678,
 'Total_Cases': '888',
 'New_Cases': '16612',
 'Total_Deaths': '17',
 'New_Deaths': '1594011',
 'Total_Recovered': '2831',
 'New_Recovered': '29055',
 'Active_Cases': '',
 'Serious,_Critical': 412380.0,
 'Tot._Cases/_1M_pop': '4178',
 'Deaths/_1M_pop': '16142347',
 'Total_Tests': '4059807',
 'Tests/_1M_pop': 3976137.0,
 'Population': 'Europe'}

In [167]:
def update_continent(country_name, continent):
    """
    Changing the continent of a country
    @param country_name: str, name of the country
    @param continent: str, name of the continent
    @return: None
    """
    con = sqlite3.connect("country.db")
    cur = con.cursor()
    with con:
        query = """
        UPDATE `country.db` SET Continent = :continent
        WHERE `Country,Other`=:country
        """
        cur.execute(query, {'country': country_name, 'continent': continent})


In [168]:
update_continent("Georgia", "Europe")

In [169]:
def update_total_count(country_name, new_cases):
    """
    Updates total count of cases and new cases
    @param country_name: str, name of the country
    @param new_cases: int, number of the new cases
    @return: None
    """
    con = sqlite3.connect("country.db")
    cur = con.cursor()
    with con:
        query = """
        UPDATE `country.db` 
        SET 
            Total_Cases = Total_Cases + :new_cases,
            New_Cases = :new_cases
        WHERE
            `Country,Other` = :country
        """
        cur.execute(query, {'country': country_name, 'new_cases': new_cases})


In [173]:
update_total_count('utopia', 1)


In [176]:
def max_tot_count_country_on_cont():
    """
    Returns a list of countries with maximal total count on a continent/undefined
    @return: list
    """
    con = sqlite3.connect("country.db")
    cur = con.cursor()
    with con:
        query = """
        SELECT Continent, `Country,Other`, Total_Cases, New_Cases, `Tot._Cases/_1M_pop`, Population
        FROM `country.db`
        GROUP BY Continent
        ORDER BY Total_Cases DESC
        """
        cur.execute(query)
        return cur.fetchall()

In [177]:
max_tot_count_country_on_cont()

[('North America', 'USA', 81289602, '', 243160.0, 334305317.0),
 ('Asia', 'India', 43001477, '', 30648.0, 1403087655.0),
 ('South America', 'Brazil', 29478039, '', 137024.0, 215130305.0),
 ('Europe', 'France', 23758447, '', 362618.0, 65519286.0),
 ('Australia/Oceania', 'Australia', 3767956, '52341', 144903.0, 26003221.0),
 ('Africa', 'South Africa', 3698803, '', 61056.0, 60580760.0),
 (None, 'Diamond Princess', 712, '', None, None),
 ('dreams', 'utopia', 2, '1', 0.0, 3500000.0)]

In [159]:
def count_country_per_continent():
    """
    Returns a list of tuples with count of countries on each continent
     - (count_of_countries, continent)
    @return: list, list of (count_of_countries, continent)
    """
    con = sqlite3.connect("country.db")
    cur = con.cursor()
    with con:
        query="""
        SELECT COUNT(`Country,Other`), Continent
        FROM `country.db`
        GROUP BY Continent
        """
        cur.execute(query)
        return cur.fetchall()

In [160]:
count_country_per_continent()

[(2, None),
 (58, 'Africa'),
 (48, 'Asia'),
 (17, 'Australia/Oceania'),
 (49, 'Europe'),
 (39, 'North America'),
 (14, 'South America')]

In [112]:
def remove_country(country_name):
    """
    Removes a country
    @param country_name: str, name of the country
    @return: None
    """
    con = sqlite3.connect("country.db")
    cur = con.cursor()
    with con:
        query = """
        DELETE from `country.db`
        WHERE 
            `Country,Other` = :country
        """
        cur.execute(query, {"country": country_name})

In [114]:
remove_country("utopia")