In [1]:
import pandas as pd
import os
from datetime import datetime, timedelta
from sqlalchemy import create_engine

# Setup Steps

In [2]:
# Set where .csv files needs to be saved
base_output_path = "../../../data"

# Create a POSTGRES database with the name 'COVID19_db'
# Replace username:password if it's not set to postgres:postgres
DATABASE_URI = os.environ.get('DATABASE_URL', '') or "postgresql://postgres:password@localhost:5432/COVID19_db"
print(DATABASE_URI)

engine = create_engine(DATABASE_URI)

postgresql://postgres:password@localhost:5432/COVID19_db


In [3]:
# Set URL's

# REALTIME DOWNLOADS
url_covid = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'
# url_worldometer = 'https://www.worldometers.info/coronavirus/coronavirus-age-sex-demographics/'
url_covid_states = 'https://raw.githubusercontent.com/COVID19Tracking/covid-tracking-data/master/data/states_daily_4pm_et.csv'

# MANUALLY DOWNLOAD FIRST - url_covid_ind_cases = 'https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset#COVID19_line_list_data.csv
# url_covid_ind_cases = os.path.join(base_output_path, 'COVID19_open_line_list.csv')
url_covid_ind_cases = os.path.join(base_output_path, 'COVID19_line_list_data.csv')
url_tests_and_hospital = os.path.join(base_output_path, 'us_states_covid19_daily.csv')
url_states = os.path.join(base_output_path, 'states.csv')
url_census = os.path.join(base_output_path, 'acs2017_census_tract_data.csv')

# url_BoL_stats

In [4]:
# Find last time covid_data was loaded
query_str = open('sql/max_db_date.sql')
query_text = ""
for text in query_str:
    query_text = query_text + text

rs = engine.execute(query_text)

for i in rs:
    last_db_date = (i[0])
print(last_db_date)

2020-03-28


# Worldometer data (age, gender, pre-conditions)

In [5]:
# Get Age, Gender and PreCondition Statistics
# df_worldometer_stats = pd.read_html(url_worldometer, header = 0)
# df_age = df_worldometer_stats[0].iloc[:,0:3].fillna(0)
# df_gender = df_worldometer_stats[1].iloc[:,0:2].fillna(0)
# df_precondition = df_worldometer_stats[2].iloc[:,0:3].fillna(0)

# df_age = df_age.rename(columns={'AGE':'age','DEATH RATE confirmed cases':'pct_deaths_confirmed','DEATH RATE all cases':'pct_deaths_all'})
# df_age['pct_deaths_confirmed'] = df_age['pct_deaths_confirmed'].astype(str).str[:-1]
# df_age['pct_deaths_confirmed'] = pd.to_numeric(df_age['pct_deaths_confirmed']).fillna(0)
# df_age['pct_deaths_all'] = df_age['pct_deaths_all'].str.replace('no fatalities','0')
# df_age['pct_deaths_all'] = pd.to_numeric(df_age['pct_deaths_all'].astype(str).str[:-1]).fillna(0)

# df_gender = df_gender.rename(columns={'SEX':'gender','DEATH RATE confirmed cases':'pct_deaths_confirmed'})
# df_gender['pct_deaths_confirmed'] = pd.to_numeric(df_gender['pct_deaths_confirmed'].astype(str).str[:-1])

# df_precondition = df_precondition.rename(columns={'PRE-EXISTING CONDITION':'precondition','DEATH RATE confirmed cases':'pct_deaths_confirmed','DEATH RATE all cases':'pct_deaths_all'})
# df_precondition['pct_deaths_confirmed'] = pd.to_numeric(df_precondition['pct_deaths_confirmed'].astype(str).str[:-1]).fillna(0)
# df_precondition['pct_deaths_all'] = pd.to_numeric(df_precondition['pct_deaths_all'].astype(str).str[:-1])

In [6]:
# df_age
# output_path = os.path.join(base_output_path, "age_data.csv")
# df_age.to_csv(output_path)
url_age = os.path.join(base_output_path, 'age_data.csv')
df_age = pd.read_csv(url_age)
df_age

Unnamed: 0,age,pct_deaths_confirmed,pct_deaths_all
0,80+ years old,21.9,14.8
1,70-79 years old,0.0,8.0
2,60-69 years old,0.0,3.6
3,50-59 years old,0.0,1.3
4,40-49 years old,0.0,0.4
5,30-39 years old,0.0,0.2
6,20-29 years old,0.0,0.2
7,10-19 years old,0.0,0.2
8,0-9 years old,0.0,0.0


In [7]:
# df_gender
# output_path = os.path.join(base_output_path, "gender_data.csv")
# df_gender.to_csv(output_path
url_gender = os.path.join(base_output_path, 'gender_data.csv')
df_gender = pd.read_csv(url_gender)
df_gender

Unnamed: 0,gender,pct_deaths_confirmed
0,Male,4.7
1,Female,2.8


In [8]:
# df_precondition
# output_path = os.path.join(base_output_path, "precondition_data.csv")
# df_precondition.to_csv(output_path)
url_precondition = os.path.join(base_output_path, 'precondition_data.csv')
df_precondition = pd.read_csv(url_precondition)
df_precondition

Unnamed: 0,precondition,pct_deaths_confirmed,pct_deaths_all
0,Cardiovascular disease,13.2,10.5
1,Diabetes,9.2,7.3
2,Chronic respiratory disease,8.0,6.3
3,Hypertension,8.4,6.0
4,Cancer,7.6,5.6
5,no pre-existing conditions,0.0,0.9


In [9]:
engine.execute( '''TRUNCATE TABLE age_data''' )
df_age.to_sql(name='age_data', con=engine, if_exists='append', index=False)

engine.execute( '''TRUNCATE TABLE gender_data''' )
df_gender.to_sql(name='gender_data', con=engine, if_exists='append', index=False)

engine.execute( '''TRUNCATE TABLE precondition_data''' )
df_precondition.to_sql(name='precondition_data', con=engine, if_exists='append', index=False)

# Census Data

In [10]:
# Census data
df_census = pd.read_csv(url_census)
df_census.head()

Unnamed: 0,TractId,State,County,TotalPop,Men,Women,Hispanic,White,Black,Native,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,1001020100,Alabama,Autauga County,1845,899,946,2.4,86.3,5.2,0.0,...,0.5,0.0,2.1,24.5,881,74.2,21.2,4.5,0.0,4.6
1,1001020200,Alabama,Autauga County,2172,1167,1005,1.1,41.6,54.5,0.0,...,0.0,0.5,0.0,22.2,852,75.9,15.0,9.0,0.0,3.4
2,1001020300,Alabama,Autauga County,3385,1533,1852,8.0,61.4,26.5,0.6,...,1.0,0.8,1.5,23.1,1482,73.3,21.1,4.8,0.7,4.7
3,1001020400,Alabama,Autauga County,4267,2001,2266,9.6,80.3,7.1,0.5,...,1.5,2.9,2.1,25.9,1849,75.8,19.7,4.5,0.0,6.1
4,1001020500,Alabama,Autauga County,9965,5054,4911,0.9,77.5,16.4,0.0,...,0.8,0.3,0.7,21.0,4787,71.4,24.1,4.5,0.0,2.3


In [11]:
engine.execute( '''TRUNCATE TABLE census_data''' )
df_census.to_sql(name='census_data', con=engine, if_exists='append', index=False)

# States and Emergency Dates

In [12]:
# Get States and Emergency Dates data
df_states = pd.read_csv(url_states)
df_states.head()

Unnamed: 0.1,Unnamed: 0,state_name,state,emergency_date
0,1,Alabama,AL,2020-03-13
1,2,Alaska,AK,2020-03-11
2,3,Arizona,AZ,2020-03-11
3,4,Arkansas,AR,2020-03-11
4,5,California,CA,2020-03-04


In [13]:
engine.execute( '''TRUNCATE TABLE states_data''' )
df_states.to_sql(name='states_data', con=engine, if_exists='append', index=False)

# Number of tests performed and people hospitalized per state

In [14]:
# Test data statistics
df_tests_and_hospital = pd.read_csv(url_tests_and_hospital).fillna(0)

df_tests_and_hospital['date'] = pd.to_datetime(df_tests_and_hospital['date'],format='%Y%m%d')
df_tests_and_hospital['dateChecked'] = pd.to_datetime(df_tests_and_hospital['dateChecked']).dt.date
df_tests_and_hospital.head()

Unnamed: 0,date,state,positive,negative,pending,hospitalized,death,total,dateChecked,totalTestResults,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease
0,2020-03-25,AK,42.0,1649.0,0.0,1.0,1.0,1691,2020-03-25,1691,1.0,1.0,663.0,6.0,669.0
1,2020-03-25,AL,283.0,2529.0,0.0,0.0,0.0,2812,2020-03-25,2812,0.0,0.0,423.0,68.0,491.0
2,2020-03-25,AR,280.0,1437.0,0.0,22.0,2.0,1717,2020-03-25,1717,2.0,0.0,490.0,62.0,552.0
3,2020-03-25,AS,0.0,0.0,0.0,0.0,0.0,0,2020-03-25,0,0.0,0.0,0.0,0.0,0.0
4,2020-03-25,AZ,450.0,323.0,53.0,8.0,6.0,826,2020-03-25,773,1.0,0.0,10.0,93.0,103.0


In [15]:
engine.execute( '''TRUNCATE TABLE tests_and_hospital_data''' )
df_tests_and_hospital.to_sql(name='tests_and_hospital_data', con=engine, if_exists='append', index=False)

In [16]:
# Test data statistics
df_covid_states = pd.DataFrame()
df_covid_states = pd.read_csv(url_covid_states).fillna(0)

df_covid_states['date'] = pd.to_datetime(df_covid_states['date'],format='%Y%m%d')
df_covid_states['dateChecked'] = pd.to_datetime(df_covid_states['dateChecked']).dt.date
df_covid_states = df_covid_states.rename(columns = {'total':'dontuse_total','dateChecked':'check_date','totalTestResults':'total_tests','deathIncrease':'death_inc','hospitalizedIncrease':'hospital_inc','negativeIncrease':'neg_inc','positiveIncrease':'pos_inc','totalTestResultsIncrease':'tot_tests_inc'})
df_covid_states.head()

Unnamed: 0,date,state,positive,negative,pending,hospitalized,death,dontuse_total,hash,check_date,total_tests,fips,death_inc,hospital_inc,neg_inc,pos_inc,tot_tests_inc
0,2020-03-29,AK,102.0,3232.0,0.0,6.0,2.0,3334,d4c0789e67f59e98176a9ea96200ed348161c6d4,2020-03-29,3334,2,0.0,1.0,396.0,17.0,413.0
1,2020-03-29,AL,806.0,4184.0,0.0,0.0,4.0,4990,9dbf0b598d35897b1f6857899d0a834990f4ec51,2020-03-29,4990,1,1.0,0.0,0.0,110.0,110.0
2,2020-03-29,AR,426.0,3027.0,0.0,48.0,6.0,3453,2c8ed5059d37cc0aa0d20f4f3066a64db930c6c5,2020-03-29,3453,5,1.0,0.0,89.0,22.0,111.0
3,2020-03-29,AS,0.0,0.0,0.0,0.0,0.0,0,f2050b47409f64a09bb4e959284c05d25eca8832,2020-03-29,0,60,0.0,0.0,0.0,0.0,0.0
4,2020-03-29,AZ,919.0,12953.0,0.0,78.0,17.0,13872,14deca609d3762fb4b92807785e2b9c7015661e6,2020-03-29,13872,4,2.0,78.0,5498.0,46.0,5544.0


In [17]:
# engine.execute( '''TRUNCATE TABLE covid_data_all''' )
df_covid_states.to_sql(name='covid_data_states', con=engine, if_exists='append', index=False)

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "hash" of relation "covid_data_states" does not exist
LINE 1: ...ive, pending, hospitalized, death, dontuse_total, hash, chec...
                                                             ^

[SQL: INSERT INTO covid_data_states (date, state, positive, negative, pending, hospitalized, death, dontuse_total, hash, check_date, total_tests, fips, death_inc, hospital_inc, neg_inc, pos_inc, tot_tests_inc) VALUES (%(date)s, %(state)s, %(positive)s, %(negative)s, %(pending)s, %(hospitalized)s, %(death)s, %(dontuse_total)s, %(hash)s, %(check_date)s, %(total_tests)s, %(fips)s, %(death_inc)s, %(hospital_inc)s, %(neg_inc)s, %(pos_inc)s, %(tot_tests_inc)s)]
[parameters: ({'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'AK', 'positive': 102.0, 'negative': 3232.0, 'pending': 0.0, 'hospitalized': 6.0, 'death': 2.0, 'dontuse_total': 3334, 'hash': 'd4c0789e67f59e98176a9ea96200ed348161c6d4', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 3334, 'fips': 2, 'death_inc': 0.0, 'hospital_inc': 1.0, 'neg_inc': 396.0, 'pos_inc': 17.0, 'tot_tests_inc': 413.0}, {'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'AL', 'positive': 806.0, 'negative': 4184.0, 'pending': 0.0, 'hospitalized': 0.0, 'death': 4.0, 'dontuse_total': 4990, 'hash': '9dbf0b598d35897b1f6857899d0a834990f4ec51', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 4990, 'fips': 1, 'death_inc': 1.0, 'hospital_inc': 0.0, 'neg_inc': 0.0, 'pos_inc': 110.0, 'tot_tests_inc': 110.0}, {'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'AR', 'positive': 426.0, 'negative': 3027.0, 'pending': 0.0, 'hospitalized': 48.0, 'death': 6.0, 'dontuse_total': 3453, 'hash': '2c8ed5059d37cc0aa0d20f4f3066a64db930c6c5', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 3453, 'fips': 5, 'death_inc': 1.0, 'hospital_inc': 0.0, 'neg_inc': 89.0, 'pos_inc': 22.0, 'tot_tests_inc': 111.0}, {'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'AS', 'positive': 0.0, 'negative': 0.0, 'pending': 0.0, 'hospitalized': 0.0, 'death': 0.0, 'dontuse_total': 0, 'hash': 'f2050b47409f64a09bb4e959284c05d25eca8832', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 0, 'fips': 60, 'death_inc': 0.0, 'hospital_inc': 0.0, 'neg_inc': 0.0, 'pos_inc': 0.0, 'tot_tests_inc': 0.0}, {'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'AZ', 'positive': 919.0, 'negative': 12953.0, 'pending': 0.0, 'hospitalized': 78.0, 'death': 17.0, 'dontuse_total': 13872, 'hash': '14deca609d3762fb4b92807785e2b9c7015661e6', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 13872, 'fips': 4, 'death_inc': 2.0, 'hospital_inc': 78.0, 'neg_inc': 5498.0, 'pos_inc': 46.0, 'tot_tests_inc': 5544.0}, {'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'CA', 'positive': 5708.0, 'negative': 20549.0, 'pending': 64400.0, 'hospitalized': 1034.0, 'death': 123.0, 'dontuse_total': 90657, 'hash': 'c364ec885909accfd4baf8e2d329903900870ba9', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 26257, 'fips': 6, 'death_inc': 22.0, 'hospital_inc': 0.0, 'neg_inc': 0.0, 'pos_inc': 1065.0, 'tot_tests_inc': 1065.0}, {'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'CO', 'positive': 2061.0, 'negative': 11215.0, 'pending': 0.0, 'hospitalized': 274.0, 'death': 44.0, 'dontuse_total': 13276, 'hash': '10769183d6c8ae4f67d7694c1e90e053315457ad', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 13276, 'fips': 8, 'death_inc': 13.0, 'hospital_inc': 35.0, 'neg_inc': 1273.0, 'pos_inc': 327.0, 'tot_tests_inc': 1600.0}, {'date': datetime.datetime(2020, 3, 29, 0, 0), 'state': 'CT', 'positive': 1993.0, 'negative': 9907.0, 'pending': 0.0, 'hospitalized': 404.0, 'death': 34.0, 'dontuse_total': 11900, 'hash': 'a2fc8b02ed8f3a41030ae22ddb222af3d3c53a8e', 'check_date': datetime.date(2020, 3, 29), 'total_tests': 11900, 'fips': 9, 'death_inc': 7.0, 'hospital_inc': 231.0, 'neg_inc': 2798.0, 'pos_inc': 702.0, 'tot_tests_inc': 3500.0}  ... displaying 10 of 1317 total bound parameter sets ...  {'date': datetime.datetime(2020, 3, 4, 0, 0), 'state': 'WA', 'positive': 39.0, 'negative': 0.0, 'pending': 0.0, 'hospitalized': 0.0, 'death': 0.0, 'dontuse_total': 39, 'hash': '36ea36bcf282dcc480c83b6f2561810b009bc951', 'check_date': datetime.date(2020, 3, 4), 'total_tests': 39, 'fips': 53, 'death_inc': 0.0, 'hospital_inc': 0.0, 'neg_inc': 0.0, 'pos_inc': 0.0, 'tot_tests_inc': 0.0}, {'date': datetime.datetime(2020, 3, 4, 0, 0), 'state': 'WI', 'positive': 1.0, 'negative': 19.0, 'pending': 6.0, 'hospitalized': 0.0, 'death': 0.0, 'dontuse_total': 26, 'hash': '0f8cc0d051348c50905a5d5b43ba3c5e23897523', 'check_date': datetime.date(2020, 3, 4), 'total_tests': 20, 'fips': 55, 'death_inc': 0.0, 'hospital_inc': 0.0, 'neg_inc': 0.0, 'pos_inc': 0.0, 'tot_tests_inc': 0.0})]
(Background on this error at: http://sqlalche.me/e/f405)

# Individual Case Study

In [18]:
# Get data about individual cases)
df_ind_cases = pd.read_csv(url_covid_ind_cases)
df_ind_cases = df_ind_cases.rename(columns={'wuhan(0)_not_wuhan(1)':'wuhan_1_or_0', 
                                            'reporting date': 'reporting_date','Unnamed: 3':'unnamed_3',
                                           'If_onset_approximated':'if_onset_approximated', 'visiting Wuhan':'visit_wuhan',
                                           'from Wuhan':'from_wuhan','Unnamed: 21':'unnamed_21','Unnamed: 22':'unnamed_22',
                                           'Unnamed: 23':'unnamed_23','Unnamed: 24':'unnamed_24','Unnamed: 25':'unnamed_25','Unnamed: 26':'unnamed_26'})
df_ind_cases.head()

Unnamed: 0,id,case_in_country,reporting_date,unnamed_3,summary,location,country,gender,age,symptom_onset,...,recovered,symptom,source,link,unnamed_21,unnamed_22,unnamed_23,unnamed_24,unnamed_25,unnamed_26
0,1,,1/20/2020,,First confirmed imported COVID-19 pneumonia pa...,"Shenzhen, Guangdong",China,male,66.0,01/03/20,...,0,,Shenzhen Municipal Health Commission,http://wjw.sz.gov.cn/wzx/202001/t20200120_1898...,,,,,,
1,2,,1/20/2020,,First confirmed imported COVID-19 pneumonia pa...,Shanghai,China,female,56.0,1/15/2020,...,0,,Official Weibo of Shanghai Municipal Health Co...,https://www.weibo.com/2372649470/IqogQhgfa?fro...,,,,,,
2,3,,1/21/2020,,First confirmed imported cases in Zhejiang: pa...,Zhejiang,China,male,46.0,01/04/20,...,0,,Health Commission of Zhejiang Province,http://www.zjwjw.gov.cn/art/2020/1/21/art_1202...,,,,,,
3,4,,1/21/2020,,new confirmed imported COVID-19 pneumonia in T...,Tianjin,China,female,60.0,,...,0,,人民日报官方微博,https://m.weibo.cn/status/4463235401268457?,,,,,,
4,5,,1/21/2020,,new confirmed imported COVID-19 pneumonia in T...,Tianjin,China,male,58.0,,...,0,,人民日报官方微博,https://m.weibo.cn/status/4463235401268457?,,,,,,


In [None]:
# engine.execute( '''TRUNCATE TABLE individual_case_data''' )
df_ind_cases.to_sql(name='individual_case_data', con=engine, if_exists='append', index=False)

# COVID-19 Data from Johns Hopkins

In [None]:
# Read data into Dataframes
df_covid_phase0 = pd.DataFrame()
df_covid_phase1 = pd.DataFrame()
df_covid_phase2 = pd.DataFrame()
df_covid_phase3 = pd.DataFrame()
df_covid_phase4 = pd.DataFrame() 

# Covid data starts at 01-22-2020.csv save in format 2020-02-01 - LOOP over all / just new ones
# Date format has changed 3 times so date ranges to be processed:
# Phase 0: 2020-01-22 - 2020-01-31 (Country/Region = Country)
# Phase 1: 2020-02-01 - 2020-02-29 (Country/Region = Country, Province/State=US County)
# Phase 2: 2020-03-01 - 2020-03-09 (Country/Region = Country, Province/State=US County, NEW: Lat Long)
# Phase 3: 2020-03-10 - 2020-03-21 (Country/Region = Country, Province/State=STATE!!!!! - no more county level)
# Phase 4: 2020-03-22 - onwards (NEW: Admin2 (=County), Active)

# Reset last_db_date only for inital load. Then leave it to setting at start
last_db_date = datetime.strptime('01-22-2020',"%m-%d-%Y").date()
yesterday_date = datetime.today().date() + timedelta(days=-1)
loop_date = last_db_date

print("Last Date in DB:", last_db_date)
print("Today's Date:", yesterday_date)

while loop_date <= yesterday_date:
    
    # Change to date format used in .csv files
    file_date = datetime.strftime(loop_date, "%m-%d-%Y")
    
    # Create URL and get data
    url_covid_file = url_covid + file_date + ".csv"
    print("Now processing: " + url_covid_file)
    df_covid_loop = pd.read_csv(url_covid_file)
    df_covid_loop['short_date'] = loop_date
    
    # Create output path for each file and save file locally then append to overall dataframe
    output_path = os.path.join(base_output_path, "df_covid_" + file_date + ".csv")
    df_covid_loop.to_csv(output_path)
    
    # Split 5 different formats based on date
    if loop_date < datetime.strptime('02-01-2020',"%m-%d-%Y").date(): 
        df_covid_phase0 = df_covid_phase0.append(df_covid_loop)
    elif loop_date <= datetime.strptime('02-29-2020',"%m-%d-%Y").date(): 
        df_covid_phase1 = df_covid_phase1.append(df_covid_loop)
    elif loop_date < datetime.strptime('03-10-2020',"%m-%d-%Y").date():
        df_covid_phase2 = df_covid_phase2.append(df_covid_loop)
    elif loop_date < datetime.strptime('03-22-2020',"%m-%d-%Y").date():
        df_covid_phase3 = df_covid_phase3.append(df_covid_loop)
    else:
        df_covid_phase4 = df_covid_phase4.append(df_covid_loop)
        
        
    # Get the next file
    loop_date = loop_date + timedelta(days=1)

In [None]:
df_covid_phase0.head()

In [None]:
df_covid_phase0.fillna(0, inplace=True)
df_covid_phase0['Confirmed'] = df_covid_phase0['Confirmed'].astype(int)
df_covid_phase0['Deaths'] = df_covid_phase0['Deaths'].astype(int)
df_covid_phase0['Recovered'] = df_covid_phase0['Recovered'].astype(int)
df_covid_phase0 = df_covid_phase0.rename(columns = {'Province/State':'province_state','Country/Region':'country_region', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered'})
df_covid_phase0['active'] = 0
df_covid_phase0['latitude'] = 0
df_covid_phase0['longitude'] = 0
df_covid_phase0['us_county'] = ""

df_covid_0 = df_covid_phase0[['province_state','country_region','confirmed','deaths','recovered','active','short_date','latitude','longitude','us_county']]

df_covid_0.head()

In [None]:
# engine.execute( '''TRUNCATE TABLE covid_data_0''' )    
df_covid_0.to_sql(name='covid_data_0', con=engine, if_exists='append', index=False)    

In [None]:
df_covid_phase1.fillna(0, inplace=True)
df_covid_phase1['Confirmed'] = df_covid_phase1['Confirmed'].astype(int)
df_covid_phase1['Deaths'] = df_covid_phase1['Deaths'].astype(int)
df_covid_phase1['Recovered'] = df_covid_phase1['Recovered'].astype(int)
df_covid_phase1 = df_covid_phase1.rename(columns = {'Province/State':'province_state','Country/Region':'country_region', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered'})
df_covid_phase1['active'] = 0
df_covid_phase1['latitude'] = 0
df_covid_phase1['longitude'] = 0
df_covid_phase1['us_county'] = ""

df_covid_1 = df_covid_phase1[['province_state','country_region','confirmed','deaths','recovered','active','short_date','latitude','longitude','us_county']]

df_covid_1.head()

In [None]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase1''' )    
df_covid_1.to_sql(name='covid_data_1', con=engine, if_exists='append', index=False)   

In [None]:
df_covid_phase2.head()

In [None]:
df_covid_phase2.fillna(0, inplace=True)
df_covid_phase2['Confirmed'] = df_covid_phase2['Confirmed'].astype(int)
df_covid_phase2['Deaths'] = df_covid_phase2['Deaths'].astype(int)
df_covid_phase2['Recovered'] = df_covid_phase2['Recovered'].astype(int)
df_covid_phase2 = df_covid_phase2.rename(columns = {'Province/State':'province_state','Country/Region':'country_region', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered','Latitude':'latitude','Longitude':'longitude'})
df_covid_phase2['active'] = 0
df_covid_phase2['us_county'] = ""

df_covid_2 = df_covid_phase2[['province_state','country_region','confirmed','deaths','recovered','active','short_date','latitude','longitude','us_county']]

df_covid_2.head()

In [None]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase2''' )     
df_covid_2.to_sql(name='covid_data_2', con=engine, if_exists='append', index=False)   

In [None]:
df_covid_phase3.head()

In [None]:
df_covid_phase3.fillna(0, inplace=True)
df_covid_phase3['Confirmed'] = df_covid_phase3['Confirmed'].astype(int)
df_covid_phase3['Deaths'] = df_covid_phase3['Deaths'].astype(int)
df_covid_phase3['Recovered'] = df_covid_phase3['Recovered'].astype(int)
df_covid_phase3 = df_covid_phase3.rename(columns = {'Province/State':'province_state','Country/Region':'country_region', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered','Latitude':'latitude','Longitude':'longitude'})
df_covid_phase3['active'] = 0
df_covid_phase3['us_county'] = ""

df_covid_3 = df_covid_phase3[['province_state','country_region','confirmed','deaths','recovered','active','short_date','latitude','longitude','us_county']]

df_covid_3.head()

In [None]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase3''' )     
df_covid_3.to_sql(name='covid_data_3', con=engine, if_exists='append', index=False)   

In [None]:
df_covid_phase4.head()

In [None]:
df_covid_phase4.fillna(0, inplace=True)
df_covid_phase4['Confirmed'] = df_covid_phase4['Confirmed'].astype(int)
df_covid_phase4['Deaths'] = df_covid_phase4['Deaths'].astype(int)
df_covid_phase4['Recovered'] = df_covid_phase4['Recovered'].astype(int)
df_covid_phase4['Active'] = df_covid_phase4['Active'].astype(int)
df_covid_phase4 = df_covid_phase4.rename(columns = {'Province_State':'province_state','Country_Region':'country_region', 'Confirmed':'confirmed','Deaths':'deaths','Active':'active','Recovered':'recovered','Lat':'latitude','Long_':'longitude','Admin2':'us_county'})

df_covid_4 = df_covid_phase4[['province_state','country_region','confirmed','deaths','recovered','active','short_date','latitude','longitude','us_county']]

df_covid_4.head()     

In [None]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase3''' )        
df_covid_4.to_sql(name='covid_data_4', con=engine, if_exists='append', index=False) 

In [None]:
df_case_study_1 = pd.read_sql('select * from case_study_1_v', con=engine)
df_case_study_1.head()

In [None]:
df_covid_and_census_by_state_v = pd.read_sql('select * from covid_and_census_by_state_v', con=engine)
df_covid_and_census_by_state_v.head()