In [2]:
# data science
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import datetime

# API
import requests
import json

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, text, inspect, func

In [3]:
# Create engine using the `covid.sqlite` database file
engine = create_engine("sqlite:///covid.sqlite")

In [4]:
# INSPECT to confirm existence

# Create the inspector and connect it to the engine
inspector_gadget = inspect(engine)

# Collect the names of tables within the database
tables = inspector_gadget.get_table_names()

# print metadata for each table
for table in tables:
    print(table)
    print("-----------")
    
    # get columns
    columns = inspector_gadget.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])

    print()

covid
-----------
index BIGINT
Serial Number BIGINT
Country TEXT
Total Cases INTEGER
Total Deaths INTEGER
Total Recovered INTEGER
Active Cases INTEGER
Total Test INTEGER
Population INTEGER
Continent TEXT
country_code TEXT
latitude FLOAT
longitude FLOAT



In [5]:
# # user inputs
# user_region = 'All'
# user_min_attempts = 0

# # switch on user_region
# if user_region == 'All':
#     where_clause = "and 1=1"
# else:
#     where_clause = f"and region = '{user_region}'"

# build the query
query = f"""
    SELECT
        *
    FROM
        covid
        ;
"""

# execute query
df = pd.read_sql(text(query), con=engine)
df.head(10)

Unnamed: 0,index,Serial Number,Country,Total Cases,Total Deaths,Total Recovered,Active Cases,Total Test,Population,Continent,country_code,latitude,longitude
0,0,1,United States,104196861,1132935,101322779,1741147,1159832679,334805269,North America,US,37.09024,-95.712891
1,1,2,India,44682784,530740,44150289,1755,915265788,1406631776,Asia,IN,20.593684,78.96288
2,2,3,France,39524311,164233,39264546,95532,271490188,65584518,Europe,FR,46.227638,2.213749
3,3,4,Germany,37779833,165711,37398100,216022,122332384,83883596,Europe,DE,51.165691,10.451526
4,4,5,Brazil,36824580,697074,35919372,208134,63776166,215353593,South America,BR,-14.235004,-51.92528
5,5,6,Japan,32588442,68399,21567425,10952618,92144639,125584838,Asia,JP,36.204824,138.252924
6,6,7,S. Korea,30197066,33486,29740877,422703,15804065,51329899,Asia,KR,35.907757,127.766922
7,7,8,Italy,25453789,186833,25014986,251970,265478247,60262770,Europe,IT,41.87194,12.56738
8,8,9,UK,24274361,204171,24020088,50102,522526476,68497907,Europe,GB,55.378051,-3.435973
9,9,10,Russia,21958696,395108,21356008,207580,273400000,145805947,Asia,RU,61.52401,105.318756


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197 entries, 0 to 196
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   index            197 non-null    int64  
 1   Serial Number    197 non-null    int64  
 2   Country          197 non-null    object 
 3   Total Cases      197 non-null    int64  
 4   Total Deaths     197 non-null    int64  
 5   Total Recovered  197 non-null    int64  
 6   Active Cases     197 non-null    int64  
 7   Total Test       197 non-null    int64  
 8   Population       197 non-null    int64  
 9   Continent        197 non-null    object 
 10  country_code     185 non-null    object 
 11  latitude         195 non-null    float64
 12  longitude        196 non-null    float64
dtypes: float64(2), int64(8), object(3)
memory usage: 20.1+ KB


In [7]:
df.describe()

Unnamed: 0,index,Serial Number,Total Cases,Total Deaths,Total Recovered,Active Cases,Total Test,Population,latitude,longitude
count,197.0,197.0,197.0,197.0,197.0,197.0,197.0,197.0,195.0,196.0
mean,105.619289,106.619289,3384525.0,33985.97,3251507.0,99032.09,35050340.0,39349920.0,19.594188,13.960951
std,64.350778,64.350778,10191920.0,112074.4,9845320.0,794791.0,122090000.0,147136900.0,23.781978,67.306978
min,0.0,1.0,1403.0,1.0,438.0,0.0,7850.0,4965.0,-40.900557,-177.156097
25%,50.0,51.0,38240.0,314.0,36366.0,82.0,401622.0,1184817.0,5.787949,-15.212844
50%,102.0,103.0,329227.0,3164.0,325934.0,1319.0,2697814.0,7040745.0,18.109581,18.687854
75%,160.0,161.0,1746997.0,16926.0,1731007.0,11740.0,15804060.0,29178080.0,40.106102,46.366989
max,224.0,225.0,104196900.0,1132935.0,101322800.0,10952620.0,1159833000.0,1412000000.0,71.706936,179.414413


In [8]:
query = """
            SELECT 
                "Total Cases",
                "Active Cases",
                "Total Recovered",
                Continent
                        
            From 
                covid
            Group by 
                 Continent
            Order by 
                "Total Cases" DESC;
            
            """
df3 = pd.read_sql(text(query), con=engine)
df3.head()
# df3.info()

Unnamed: 0,Total Cases,Active Cases,Total Recovered,Continent
0,104196861,1741147,101322779,North America
1,44682784,1755,44150289,Asia
2,39524311,95532,39264546,Europe
3,36824580,208134,35919372,South America
4,11295446,41060,11235771,Oceania


In [9]:
query = """
            SELECT 
                "Total Cases",
                "Active Cases",
                "Total Recovered",
               Country,
               Continent
                        
            From 
                covid
            Order by 
                "Total Cases" DESC;
            
            """
df3 = pd.read_sql(text(query), con=engine)
df3.head(10)

Unnamed: 0,Total Cases,Active Cases,Total Recovered,Country,Continent
0,104196861,1741147,101322779,United States,North America
1,44682784,1755,44150289,India,Asia
2,39524311,95532,39264546,France,Europe
3,37779833,216022,37398100,Germany,Europe
4,36824580,208134,35919372,Brazil,South America
5,32588442,10952618,21567425,Japan,Asia
6,30197066,422703,29740877,S. Korea,Asia
7,25453789,251970,25014986,Italy,Europe
8,24274361,50102,24020088,UK,Europe
9,21958696,207580,21356008,Russia,Asia


In [11]:
query = """
            SELECT 
                "Total Cases",
                "Active Cases",
                "Total Recovered",
               Country,
               Continent,
               Population
                        
            From 
                covid
            Order by 
                "Population" DESC;
            
            """
df3 = pd.read_sql(text(query), con=engine)
df3.head(10)

Unnamed: 0,Total Cases,Active Cases,Total Recovered,Country,Continent,Population
0,503302,118977,379053,China,Asia,1412000000
1,44682784,1755,44150289,India,Asia,1406631776
2,104196861,1741147,101322779,United States,North America,334805269
3,6730289,4264,6565208,Indonesia,Asia,279134505
4,1576313,6984,1538689,Pakistan,Asia,229488994
5,266463,3458,259850,Nigeria,Africa,216746934
6,36824580,208134,35919372,Brazil,South America,215353593
7,2037556,15420,1992694,Bangladesh,Asia,167885689
8,21958696,207580,21356008,Russia,Asia,145805947
9,7368252,429421,6606633,Mexico,North America,131562772


In [None]:
# user inputs
user_continent = 'All'
user_min_attempts = 0

# switch on user_region
if user_continent == 'All':
    where_clause = "and 1=1"
else:
    where_clause = f"and Continent = '{user_continent}'"

