In [1]:
import pandas as pd
import os
from datetime import datetime, timedelta
from sqlalchemy import create_engine

# Setup Steps

In [2]:
# Set where .csv files needs to be saved
base_output_path = "../../data"

# Create a POSTGRES database with the name 'COVID19_db'
# Replace username:password if it's not set to postgres:postgres
DATABASE_URI = os.environ.get('DATABASE_URL', '') or "postgresql://postgres:postgres@localhost:5432/COVID19_db"
print(DATABASE_URI)

engine = create_engine(DATABASE_URI)

postgresql://postgres:postgres@localhost:5432/COVID19_db


In [3]:
# Set URL's

# REALTIME DOWNLOADS
url_covid = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'
url_worldometer = 'https://www.worldometers.info/coronavirus/coronavirus-age-sex-demographics/'

# MANUALLY DOWNLOAD FIRST - url_covid_ind_cases = 'https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset#COVID19_line_list_data.csv
url_covid_ind_cases = os.path.join(base_output_path, 'COVID19_open_line_list.csv')
url_tests_and_hospital = os.path.join(base_output_path, 'us_states_covid19_daily.csv')
url_states = os.path.join(base_output_path, 'states.csv')
url_census = os.path.join(base_output_path, 'acs2017_census_tract_data.csv')

# url_BoL_stats

In [4]:
# Find last time covid_data was loaded
query_str = open('sql/max_db_date.sql')
query_text = ""
for text in query_str:
    query_text = query_text + text

rs = engine.execute(query_text)

for i in rs:
    last_db_date = (i[0])
print(last_db_date)

None


# Worldometer data (age, gender, pre-conditions)

In [5]:
# Get Age, Gender and PreCondition Statistics
df_worldometer_stats = pd.read_html(url_worldometer, header = 0)
df_age = df_worldometer_stats[0].iloc[:,0:3].fillna(0)
df_gender = df_worldometer_stats[1].iloc[:,0:2].fillna(0)
df_precondition = df_worldometer_stats[2].iloc[:,0:3].fillna(0)

df_age = df_age.rename(columns={'AGE':'age','DEATH RATE confirmed cases':'pct_deaths_confirmed','DEATH RATE all cases':'pct_deaths_all'})
df_age['pct_deaths_confirmed'] = df_age['pct_deaths_confirmed'].astype(str).str[:-1]
df_age['pct_deaths_confirmed'] = pd.to_numeric(df_age['pct_deaths_confirmed']).fillna(0)
df_age['pct_deaths_all'] = df_age['pct_deaths_all'].str.replace('no fatalities','0')
df_age['pct_deaths_all'] = pd.to_numeric(df_age['pct_deaths_all'].astype(str).str[:-1]).fillna(0)

df_gender = df_gender.rename(columns={'SEX':'gender','DEATH RATE confirmed cases':'pct_deaths_confirmed'})
df_gender['pct_deaths_confirmed'] = pd.to_numeric(df_gender['pct_deaths_confirmed'].astype(str).str[:-1])

df_precondition = df_precondition.rename(columns={'PRE-EXISTING CONDITION':'precondition','DEATH RATE confirmed cases':'pct_deaths_confirmed','DEATH RATE all cases':'pct_deaths_all'})
df_precondition['pct_deaths_confirmed'] = pd.to_numeric(df_precondition['pct_deaths_confirmed'].astype(str).str[:-1]).fillna(0)
df_precondition['pct_deaths_all'] = pd.to_numeric(df_precondition['pct_deaths_all'].astype(str).str[:-1])

Unnamed: 0,precondition,pct_deaths_confirmed,pct_deaths_all
0,Cardiovascular disease,13.2,10.5
1,Diabetes,9.2,7.3
2,Chronic respiratory disease,8.0,6.3
3,Hypertension,8.4,6.0
4,Cancer,7.6,5.6
5,no pre-existing conditions,0.0,0.9


In [6]:
df_age

Unnamed: 0,age,pct_deaths_confirmed,pct_deaths_all
0,80+ years old,21.9,14.8
1,70-79 years old,0.0,8.0
2,60-69 years old,0.0,3.6
3,50-59 years old,0.0,1.3
4,40-49 years old,0.0,0.4
5,30-39 years old,0.0,0.2
6,20-29 years old,0.0,0.2
7,10-19 years old,0.0,0.2
8,0-9 years old,0.0,0.0


In [7]:
df_gender

Unnamed: 0,gender,pct_deaths_confirmed
0,Male,4.7
1,Female,2.8


In [8]:
df_precondition

Unnamed: 0,precondition,pct_deaths_confirmed,pct_deaths_all
0,Cardiovascular disease,13.2,10.5
1,Diabetes,9.2,7.3
2,Chronic respiratory disease,8.0,6.3
3,Hypertension,8.4,6.0
4,Cancer,7.6,5.6
5,no pre-existing conditions,0.0,0.9


In [9]:
engine.execute( '''TRUNCATE TABLE age_data''' )
df_age.to_sql(name='age_data', con=engine, if_exists='append', index=False)

engine.execute( '''TRUNCATE TABLE gender_data''' )
df_gender.to_sql(name='gender_data', con=engine, if_exists='append', index=False)

engine.execute( '''TRUNCATE TABLE precondition_data''' )
df_precondition.to_sql(name='precondition_data', con=engine, if_exists='append', index=False)

# Census Data

In [10]:
# Census data
df_census = pd.read_csv(url_census)
df_census.head()

Unnamed: 0,TractId,State,County,TotalPop,Men,Women,Hispanic,White,Black,Native,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,1001020100,Alabama,Autauga County,1845,899,946,2.4,86.3,5.2,0.0,...,0.5,0.0,2.1,24.5,881,74.2,21.2,4.5,0.0,4.6
1,1001020200,Alabama,Autauga County,2172,1167,1005,1.1,41.6,54.5,0.0,...,0.0,0.5,0.0,22.2,852,75.9,15.0,9.0,0.0,3.4
2,1001020300,Alabama,Autauga County,3385,1533,1852,8.0,61.4,26.5,0.6,...,1.0,0.8,1.5,23.1,1482,73.3,21.1,4.8,0.7,4.7
3,1001020400,Alabama,Autauga County,4267,2001,2266,9.6,80.3,7.1,0.5,...,1.5,2.9,2.1,25.9,1849,75.8,19.7,4.5,0.0,6.1
4,1001020500,Alabama,Autauga County,9965,5054,4911,0.9,77.5,16.4,0.0,...,0.8,0.3,0.7,21.0,4787,71.4,24.1,4.5,0.0,2.3


In [12]:
engine.execute( '''TRUNCATE TABLE census_data''' )
df_census.to_sql(name='census_data', con=engine, if_exists='append', index=False)

# States and Emergency Dates

In [13]:
# Get States and Emergency Dates data
df_states = pd.read_csv(url_states)
df_states.head()

Unnamed: 0.1,Unnamed: 0,state_name,state,emergency_date
0,1,Alabama,AL,2020-03-13
1,2,Alaska,AK,2020-03-11
2,3,Arizona,AZ,2020-03-11
3,4,Arkansas,AR,2020-03-11
4,5,California,CA,2020-03-04


In [14]:
engine.execute( '''TRUNCATE TABLE states_data''' )
df_states.to_sql(name='states_data', con=engine, if_exists='append', index=False)

# Number of tests performed and people hospitalized per state

In [15]:
# Test data statistics
df_tests_and_hospital = pd.read_csv(url_tests_and_hospital).fillna(0)

df_tests_and_hospital['date'] = pd.to_datetime(df_tests_and_hospital['date'],format='%Y%m%d')
df_tests_and_hospital['dateChecked'] = pd.to_datetime(df_tests_and_hospital['dateChecked']).dt.date
df_tests_and_hospital.head()

Unnamed: 0,date,state,positive,negative,pending,hospitalized,death,total,dateChecked,totalTestResults,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease
0,2020-03-25,AK,42.0,1649.0,0.0,1.0,1.0,1691,2020-03-25,1691,1.0,1.0,663.0,6.0,669.0
1,2020-03-25,AL,283.0,2529.0,0.0,0.0,0.0,2812,2020-03-25,2812,0.0,0.0,423.0,68.0,491.0
2,2020-03-25,AR,280.0,1437.0,0.0,22.0,2.0,1717,2020-03-25,1717,2.0,0.0,490.0,62.0,552.0
3,2020-03-25,AS,0.0,0.0,0.0,0.0,0.0,0,2020-03-25,0,0.0,0.0,0.0,0.0,0.0
4,2020-03-25,AZ,450.0,323.0,53.0,8.0,6.0,826,2020-03-25,773,1.0,0.0,10.0,93.0,103.0


In [16]:
engine.execute( '''TRUNCATE TABLE tests_and_hospital_data''' )
df_tests_and_hospital.to_sql(name='tests_and_hospital_data', con=engine, if_exists='append', index=False)

# Individual Case Study

In [17]:
# Get data about individual cases)
df_ind_cases = pd.read_csv(url_covid_ind_cases)
df_ind_cases = df_ind_cases.rename(columns={'wuhan(0)_not_wuhan(1)':'wuhan_1_or_0'})
df_ind_cases.head()

Unnamed: 0,ID,age,sex,city,province,country,wuhan_1_or_0,latitude,longitude,geo_resolution,...,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44
0,1.0,30,male,"Chaohu City, Hefei City",Anhui,China,1.0,31.64696,117.7166,admin3,...,,,,,,,,,,
1,2.0,47,male,"Baohe District, Hefei City",Anhui,China,1.0,31.77863,117.3319,admin3,...,,,,,,,,,,
2,3.0,49,male,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,...,,,,,,,,,,
3,4.0,47,female,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,...,,,,,,,,,,
4,5.0,50,female,"Feidong County, Hefei City",Anhui,China,1.0,32.00123,117.5681,admin3,...,,,,,,,,,,


In [18]:
engine.execute( '''TRUNCATE TABLE individual_case_data''' )
df_ind_cases.to_sql(name='individual_case_data', con=engine, if_exists='append', index=False)

# COVID-19 Data from Johns Hopkins

In [19]:
# Read data into Dataframes
df_covid_phase0 = pd.DataFrame()
df_covid_phase1 = pd.DataFrame()
df_covid_phase2 = pd.DataFrame()
df_covid_phase3 = pd.DataFrame()
df_covid_phase4 = pd.DataFrame() 

# Covid data starts at 01-22-2020.csv save in format 2020-02-01 - LOOP over all / just new ones
# Date format has changed 3 times so date ranges to be processed:
# Phase 0: 2020-01-22 - 2020-01-31 (Country/Region = Country)
# Phase 1: 2020-02-01 - 2020-02-29 (Country/Region = Country, Province/State=US County)
# Phase 2: 2020-03-01 - 2020-03-09 (Country/Region = Country, Province/State=US County, NEW: Lat Long)
# Phase 3: 2020-03-10 - 2020-03-21 (Country/Region = Country, Province/State=STATE!!!!! - no more county level)
# Phase 4: 2020-03-22 - onwards (NEW: Admin2 (=County), Active)

# Reset last_db_date only for inital load. Then leave it to setting at start
last_db_date = datetime.strptime('01-22-2020',"%m-%d-%Y").date()
yesterday_date = datetime.today().date() + timedelta(days=-1)
loop_date = last_db_date

print("Last Date in DB:", last_db_date)
print("Today's Date:", yesterday_date)

while loop_date <= yesterday_date:
    
    # Change to date format used in .csv files
    file_date = datetime.strftime(loop_date, "%m-%d-%Y")
    
    # Create URL and get data
    url_covid_file = url_covid + file_date + ".csv"
    print("Now processing: " + url_covid_file)
    df_covid_loop = pd.read_csv(url_covid_file)
    df_covid_loop['short_date'] = loop_date
    
    # Create output path for each file and save file locally then append to overall dataframe
    output_path = os.path.join(base_output_path, "df_covid_" + file_date + ".csv")
    df_covid_loop.to_csv(output_path)
    
    # Split 5 different formats based on date
    if loop_date < datetime.strptime('02-01-2020',"%m-%d-%Y").date(): 
        df_covid_phase0 = df_covid_phase0.append(df_covid_loop)
    elif loop_date < datetime.strptime('03-01-2020',"%m-%d-%Y").date(): 
        df_covid_phase1 = df_covid_phase1.append(df_covid_loop)
    elif loop_date < datetime.strptime('03-10-2020',"%m-%d-%Y").date():
        df_covid_phase2 = df_covid_phase2.append(df_covid_loop)
    elif loop_date < datetime.strptime('03-22-2020',"%m-%d-%Y").date():
        df_covid_phase3 = df_covid_phase3.append(df_covid_loop)
    else:
        df_covid_phase4 = df_covid_phase4.append(df_covid_loop)
        
        
    # Get the next file
    loop_date = loop_date + timedelta(days=1)

Last Date in DB: 2020-01-22
Today's Date: 2020-03-25
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-23-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-24-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-25-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-26-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-27-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_cov

Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-19-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-20-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-21-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-22-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-23-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-24-2020.csv
Now processing: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-25-2020.csv

In [23]:
df_covid_phase0['Province/State'].fillna("Unknown", inplace=True)
df_covid_phase0.head()

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,short_date
0,Anhui,Mainland China,1/22/2020 17:00,1.0,,,2020-01-22
1,Beijing,Mainland China,1/22/2020 17:00,14.0,,,2020-01-22
2,Chongqing,Mainland China,1/22/2020 17:00,6.0,,,2020-01-22
3,Fujian,Mainland China,1/22/2020 17:00,1.0,,,2020-01-22
4,Gansu,Mainland China,1/22/2020 17:00,,,,2020-01-22


In [24]:
df_covid_phase0.fillna(0, inplace=True)
df_covid_phase0['Confirmed'] = df_covid_phase0['Confirmed'].astype(int)
df_covid_phase0['Deaths'] = df_covid_phase0['Deaths'].astype(int)
df_covid_phase0['Recovered'] = df_covid_phase0['Recovered'].astype(int)
df_covid_phase0 = df_covid_phase0.rename(columns = {'Province/State':'state_name','Country/Region':'country', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered'})
df_covid_phase0 = df_covid_phase0[['country','state_name','short_date','confirmed','deaths','recovered']]

df_covid_phase0.head()
# except Exception as e: print("ERROR: ", e, "\n>>>> If Error message = Last Update, all is fine - nothing to process")

Unnamed: 0,country,state_name,short_date,confirmed,deaths,recovered
0,Mainland China,Anhui,2020-01-22,1,0,0
1,Mainland China,Beijing,2020-01-22,14,0,0
2,Mainland China,Chongqing,2020-01-22,6,0,0
3,Mainland China,Fujian,2020-01-22,1,0,0
4,Mainland China,Gansu,2020-01-22,0,0,0


In [25]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase0''' )    
df_covid_phase0.to_sql(name='covid_data_phase0', con=engine, if_exists='append', index=False)    
# df_temp = df_covid_phase0[df_covid_phase0['country']=='US']

# df_temp.head()

In [26]:
df_covid_phase0.head()

Unnamed: 0,country,state_name,short_date,confirmed,deaths,recovered
0,Mainland China,Anhui,2020-01-22,1,0,0
1,Mainland China,Beijing,2020-01-22,14,0,0
2,Mainland China,Chongqing,2020-01-22,6,0,0
3,Mainland China,Fujian,2020-01-22,1,0,0
4,Mainland China,Gansu,2020-01-22,0,0,0


In [27]:
df_covid_phase1['Province/State'].fillna("Unknown", inplace=True)
df_covid_phase1['Confirmed'] = df_covid_phase1['Confirmed'].astype(int)
df_covid_phase1['Deaths'] = df_covid_phase1['Deaths'].astype(int)
df_covid_phase1['Recovered'] = df_covid_phase1['Recovered'].astype(int)
df_covid_phase1 = df_covid_phase1.rename(columns = {'Province/State':'state_name','Country/Region':'country','Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered'})
df_covid_phase1 = df_covid_phase1[['country','state_name','short_date','confirmed','deaths','recovered']]

In [28]:
df_covid_phase1.head()

Unnamed: 0,country,state_name,short_date,confirmed,deaths,recovered
0,Mainland China,Hubei,2020-02-01,7153,249,168
1,Mainland China,Zhejiang,2020-02-01,599,0,21
2,Mainland China,Guangdong,2020-02-01,535,0,14
3,Mainland China,Henan,2020-02-01,422,2,3
4,Mainland China,Hunan,2020-02-01,389,0,8


In [29]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase1''' )    
df_covid_phase1.to_sql(name='covid_data_phase1', con=engine, if_exists='append', index=False)   

In [30]:
df_covid_phase2.head()

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude,short_date
0,Hubei,Mainland China,2020-03-01T10:13:19,66907,2761,31536,30.9756,112.2707,2020-03-01
1,,South Korea,2020-03-01T23:43:03,3736,17,30,36.0,128.0,2020-03-01
2,,Italy,2020-03-01T23:23:02,1694,34,83,43.0,12.0,2020-03-01
3,Guangdong,Mainland China,2020-03-01T14:13:18,1349,7,1016,23.3417,113.4244,2020-03-01
4,Henan,Mainland China,2020-03-01T14:13:18,1272,22,1198,33.882,113.614,2020-03-01


In [31]:
df_covid_phase2['Province/State'].fillna("Unknown", inplace=True)
df_covid_phase2['county']= df_covid_phase2["Province/State"]
df_covid_phase2['Confirmed'] = df_covid_phase2['Confirmed'].astype(int)
df_covid_phase2['Deaths'] = df_covid_phase2['Deaths'].astype(int)
df_covid_phase2['Recovered'] = df_covid_phase2['Recovered'].astype(int)
df_covid_phase2 = df_covid_phase2.rename(columns = {'Province/State':'state_name','Country/Region':'country', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered','Latitude':'latitude','Longitude':'longitude'})
df_covid_phase2 = df_covid_phase2[['country','state_name','short_date','confirmed','deaths','recovered','county','latitude','longitude']]

In [32]:
df_covid_phase2.head()

Unnamed: 0,country,state_name,short_date,confirmed,deaths,recovered,county,latitude,longitude
0,Mainland China,Hubei,2020-03-01,66907,2761,31536,Hubei,30.9756,112.2707
1,South Korea,Unknown,2020-03-01,3736,17,30,Unknown,36.0,128.0
2,Italy,Unknown,2020-03-01,1694,34,83,Unknown,43.0,12.0
3,Mainland China,Guangdong,2020-03-01,1349,7,1016,Guangdong,23.3417,113.4244
4,Mainland China,Henan,2020-03-01,1272,22,1198,Henan,33.882,113.614


In [33]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase2''' )     
df_covid_phase2.to_sql(name='covid_data_phase2', con=engine, if_exists='append', index=False)   
    


In [34]:
df_temp = df_covid_phase3[df_covid_phase3['Country/Region']=='US']

df_temp.head()

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude,short_date
30,Washington,US,2020-03-10T22:13:11,267,23,1,47.4009,-121.4905,2020-03-10
36,New York,US,2020-03-10T17:13:27,173,0,0,42.1657,-74.9481,2020-03-10
40,California,US,2020-03-10T19:13:28,144,2,2,36.1162,-119.6816,2020-03-10
49,Massachusetts,US,2020-03-10T22:13:11,92,0,1,42.2302,-71.5301,2020-03-10
65,Diamond Princess,US,2020-03-10T02:33:04,46,0,0,35.4437,139.638,2020-03-10


In [35]:
df_covid_phase3['Province/State'].fillna("Unknown", inplace=True)
df_covid_phase3['Confirmed'] = df_covid_phase3['Confirmed'].astype(int)
df_covid_phase3['Deaths'] = df_covid_phase3['Deaths'].astype(int)
df_covid_phase3['Recovered'] = df_covid_phase3['Recovered'].astype(int)
df_covid_phase3 = df_covid_phase3.rename(columns = {'Province/State':'state_name','Country/Region':'country', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered','Latitude':'latitude','Longitude':'longitude'})
df_covid_phase3 = df_covid_phase3[['country','state_name','short_date','confirmed','deaths','recovered','latitude','longitude']]
df_covid_phase3.head()


Unnamed: 0,country,state_name,short_date,confirmed,deaths,recovered,latitude,longitude
0,Mainland China,Hubei,2020-03-10,67760,3024,47743,30.9756,112.2707
1,Italy,Unknown,2020-03-10,10149,631,724,43.0,12.0
2,Iran (Islamic Republic of),Unknown,2020-03-10,8042,291,2731,32.0,53.0
3,Republic of Korea,Unknown,2020-03-10,7513,54,247,36.0,128.0
4,France,Unknown,2020-03-10,1784,33,12,47.0,2.0


In [36]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase3''' )     
df_covid_phase3.to_sql(name='covid_data_phase3', con=engine, if_exists='append', index=False)   
 

In [37]:
   
df_covid_phase4

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,short_date
0,36061.0,New York City,New York,US,3/22/20 23:45,40.767273,-73.971526,9654,63,0,0,"New York City, New York, US",2020-03-22
1,36059.0,Nassau,New York,US,3/22/20 23:45,40.740665,-73.589419,1900,4,0,0,"Nassau, New York, US",2020-03-22
2,36119.0,Westchester,New York,US,3/22/20 23:45,41.162784,-73.757417,1873,0,0,0,"Westchester, New York, US",2020-03-22
3,36103.0,Suffolk,New York,US,3/22/20 23:45,40.883201,-72.801217,1034,9,0,0,"Suffolk, New York, US",2020-03-22
4,36087.0,Rockland,New York,US,3/22/20 23:45,41.150279,-74.025605,455,1,0,0,"Rockland, New York, US",2020-03-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3415,,,,Uzbekistan,2020-03-25 23:33:04,41.377491,64.585262,60,0,0,60,Uzbekistan,2020-03-25
3416,,,,Venezuela,2020-03-25 23:33:04,6.423800,-66.589700,91,0,15,76,Venezuela,2020-03-25
3417,,,,Vietnam,2020-03-25 23:33:04,14.058324,108.277199,141,0,17,124,Vietnam,2020-03-25
3418,,,,Zambia,2020-03-25 23:33:04,-13.133897,27.849332,12,0,0,12,Zambia,2020-03-25


In [38]:
df_covid_phase4 = df_covid_phase4.rename(columns = {'Province_State':'state_name','Country_Region':'country', 'Confirmed':'confirmed','Deaths':'deaths','Recovered':'recovered','Lat':'latitude','Long_':'longitude','Admin2':'county','Active':'active'})
df_covid_phase4 = df_covid_phase4[['country','state_name','county','short_date','confirmed','deaths','recovered','active','latitude','longitude']]
df_covid_phase4.head()
  
      

Unnamed: 0,country,state_name,county,short_date,confirmed,deaths,recovered,active,latitude,longitude
0,US,New York,New York City,2020-03-22,9654,63,0,0,40.767273,-73.971526
1,US,New York,Nassau,2020-03-22,1900,4,0,0,40.740665,-73.589419
2,US,New York,Westchester,2020-03-22,1873,0,0,0,41.162784,-73.757417
3,US,New York,Suffolk,2020-03-22,1034,9,0,0,40.883201,-72.801217
4,US,New York,Rockland,2020-03-22,455,1,0,0,41.150279,-74.025605


In [39]:
# df_temp = df_covid_phase4[df_covid_phase4['county']=='Los Angeles']

# df_temp.head()

In [40]:
# engine.execute( '''TRUNCATE TABLE covid_data_phase3''' )        
df_covid_phase4.to_sql(name='covid_data_phase4', con=engine, if_exists='append', index=False) 