In [10]:
import sqlite3
import pandas as pd
import os
import helper as h

### Connect to Database

Since I am using the SQLite database, the main table needs to be imported from this csv file https://covid.ourworldindata.org/data/owid-covid-data.csv manually to the database if one would like to update the data source.

In [11]:
# Create a SQL connection to our SQLite database
con = sqlite3.connect("data/owid_covid_data.sqlite3")
con.row_factory = sqlite3.Row # fetch both column names and value pairs
cur = con.cursor()

### Create output folder if not exists

In [12]:
# if folder to save the query results does not exist, create one
path = 'df_output'
if not os.path.exists(path):
    os.makedirs(path)

### Check if data exists

In [13]:
# check if main data exist
h.execute_q('''
    SELECT * 
    FROM main
    ORDER BY location, date
    LIMIT 1
''', cur).head()

no such table: main


AttributeError: 'int' object has no attribute 'head'

In [None]:
# Extract covid deaths info table from main
h.execute_q('''
    CREATE TABLE CovidDeaths
    AS SELECT
        iso_code, continent, location, date, population,
        total_cases, new_cases,
        CAST(new_cases_smoothed AS REAL) new_cases_smoothed,
        CAST(total_deaths AS REAL) total_deaths,
        CAST(new_deaths AS REAL) new_deaths, 
        CAST(new_deaths_smoothed AS REAL) new_deaths_smoothed,
        total_cases_per_million, new_cases_per_million,
        CAST(new_cases_smoothed_per_million AS REAL) new_cases_smoothed_per_million,
        CAST(total_deaths_per_million AS REAL) total_deaths_per_million,
        CAST(new_deaths_per_million AS REAL) new_deaths_per_million,
        CAST(new_deaths_smoothed_per_million AS REAL) new_deaths_smoothed_per_million,
        CAST(reproduction_rate AS REAL) reproduction_rate,
        CAST(icu_patients AS INT) icu_patients,
        CAST(icu_patients_per_million AS REAL) icu_patients_per_million,
        CAST(hosp_patients AS INT) hosp_patients,
        CAST(hosp_patients_per_million AS REAL),
        CAST(weekly_icu_admissions AS INT) weekly_icu_admissions,
        CAST(weekly_icu_admissions_per_million AS REAL) weekly_icu_admissions_per_million,
        CAST(weekly_hosp_admissions AS INT) weekly_hosp_admissions,
        CAST(weekly_hosp_admissions_per_million AS REAL) weekly_hosp_admissions_per_million
    FROM main;
''', cur)

In [None]:
# Extract covid vaccination info table from main
h.execute_q('''
    CREATE TABLE CovidVacc
    AS SELECT
        iso_code, continent, location, date, 
        CAST(new_tests AS REAL) new_tests,
        CAST(total_tests AS REAL) total_tests,
        CAST(total_tests_per_thousand AS REAL) total_tests_per_thousand,
        CAST(new_tests_per_thousand AS REAL) new_tests_per_thousand,
        CAST(new_tests_smoothed AS REAL) new_tests_smoothed,
        CAST(new_tests_smoothed_per_thousand AS REAL) new_tests_smoothed_per_thousand,
        CAST(positive_rate AS REAL) positive_rate,
        CAST(tests_per_case AS REAL) tests_per_case,
        tests_units, 
        CAST(total_vaccinations AS INT) total_vaccinations,
        CAST(people_vaccinated AS INT) people_vaccinated,
        CAST(people_fully_vaccinated AS INT) people_fully_vaccinated,
        CAST(total_boosters AS INT) total_boosters,
        CAST(new_vaccinations AS INT) new_vaccinations,
        CAST(new_vaccinations_smoothed AS INT) new_vaccinations_smoothed,
        CAST(total_vaccinations_per_hundred AS REAL) total_vaccinations_per_hundred,
        CAST(people_vaccinated_per_hundred AS REAL) people_vaccinated_per_hundred,
        CAST(people_fully_vaccinated_per_hundred AS REAL) people_fully_vaccinated_per_hundred,
        CAST(total_boosters_per_hundred AS REAL) total_boosters_per_hundred,
        CAST(new_vaccinations_smoothed_per_million AS REAL) new_vaccinations_smoothed_per_million,
        CAST(new_people_vaccinated_smoothed AS INT) new_people_vaccinated_smoothed,
        CAST(new_people_vaccinated_smoothed_per_hundred AS REAL) new_people_vaccinated_smoothed_per_hundred,
        CAST(stringency_index AS REAL) stringency_index,
        CAST(population_density AS REAL) population_density,
        CAST(median_age AS REAL) median_age,
        CAST(aged_65_older AS REAL) aged_65_older,
        CAST(aged_70_older AS REAL) aged_70_older,
        CAST(gdp_per_capita AS REAL) gdp_per_capita,
        CAST(extreme_poverty AS REAL) extreme_poverty,
        CAST(cardiovasc_death_rate AS REAL) cardiovasc_death_rate,
        CAST(diabetes_prevalence AS REAL) diabetes_prevalence,
        CAST(female_smokers AS REAL) female_smokers,
        CAST(male_smokers AS REAL) male_smokers,
        CAST(handwashing_facilities AS REAL) handwashing_facilities,
        CAST(hospital_beds_per_thousand AS REAL) hospital_beds_per_thousand,
        CAST(life_expectancy AS REAL) life_expectancy,
        CAST(human_development_index AS REAL) human_development_index,
        CAST(excess_mortality_cumulative_absolute AS REAL) excess_mortality_cumulative_absolute,
        CAST(excess_mortality_cumulative AS REAL) excess_mortality_cumulative,
        CAST(excess_mortality AS REAL) excess_mortality,
        CAST(excess_mortality_cumulative_per_million AS REAL) excess_mortality_cumulative_per_million    
    FROM main;
''', cur)

In [None]:
# To drop the main table (optional)

# h.execute_q('''
#     DROP TABLE main
# ''', cur)

In [14]:
# check if covid deaths table exist
h.execute_q('''
    SELECT *
    FROM CovidDeaths
    ORDER BY location, date
    LIMIT 5
''', cur).columns

Index(['iso_code', 'continent', 'location', 'date', 'population',
       'total_cases', 'new_cases', 'new_cases_smoothed', 'total_deaths',
       'new_deaths', 'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'CAST(hosp_patients_per_million AS REAL)', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million'],
      dtype='object')

In [15]:
# check if covid vaccination table exist
h.execute_q('''
    SELECT *
    FROM CovidVacc
    ORDER BY location, date
    LIMIT 5
''', cur).columns

Index(['iso_code', 'continent', 'location', 'date', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred',
       'people_fully_vaccinated_per_hundred', 'total_boosters_per_hundred',
       'new_vaccinations_smoothed_per_million',
       'new_people_vaccinated_smoothed',
       'new_people_vaccinated_smoothed_per_hundred', 'stringency_index',
       'population_density', 'median_age', 'aged_65_older', 'aged_70_older',
       'gdp_per_capita', 'extreme_poverty', 'cardiovasc_death_rate',
       'diabetes_prevalence', 'female_smokers', 'male_smokers',
       'handwashing_facilities', 'hospital_beds_per_thousand',
  

In [16]:
# Be sure to close the connection
con.close()