# Google Mobility Data
# NYT COVID Cases & Death Data


In [1]:
# Dependencies and Setup
import json
import os
import pandas as pd
import urllib.request
import requests
from config import user, pcode
from sqlalchemy import create_engine

## Store Google CSV into DataFrame

In [2]:
csv_file = "Resources/google_mob_US.csv"
google_data_df = pd.read_csv(csv_file)
google_data_df.head()

Unnamed: 0.1,Unnamed: 0,State,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential
0,0,Alabama,2020-02-15,5.0,2.0,39.0,7.0,2.0,-1.0
1,1,Alabama,2020-02-16,0.0,-2.0,-7.0,3.0,-1.0,1.0
2,2,Alabama,2020-02-17,3.0,0.0,17.0,7.0,-17.0,4.0
3,3,Alabama,2020-02-18,-4.0,-3.0,-11.0,-1.0,1.0,2.0
4,4,Alabama,2020-02-19,4.0,1.0,6.0,4.0,1.0,0.0


### Create a new dataframe with select columns

In [3]:
# Create a dataframe with only the columns you need from the original dataframe:
google_df = google_data_df[["State", "date", "retail_and_recreation",
                            "grocery_and_pharmacy", "parks"]].copy()
google_df

Unnamed: 0,State,date,retail_and_recreation,grocery_and_pharmacy,parks
0,Alabama,2020-02-15,5.0,2.0,39.0
1,Alabama,2020-02-16,0.0,-2.0,-7.0
2,Alabama,2020-02-17,3.0,0.0,17.0
3,Alabama,2020-02-18,-4.0,-3.0,-11.0
4,Alabama,2020-02-19,4.0,1.0,6.0
...,...,...,...,...,...
456457,Wyoming,2020-07-29,,,
456458,Wyoming,2020-07-30,,,
456459,Wyoming,2020-07-31,,,
456460,Wyoming,2020-08-03,,,


In [4]:
google_us = google_df.rename(columns = {"State":'states', "date":'date', "retail_and_recreation":'retail_recreation',
                            "grocery_and_pharmacy":'grocery_pharmacy', "parks":'parks'})
google_us

Unnamed: 0,states,date,retail_recreation,grocery_pharmacy,parks
0,Alabama,2020-02-15,5.0,2.0,39.0
1,Alabama,2020-02-16,0.0,-2.0,-7.0
2,Alabama,2020-02-17,3.0,0.0,17.0
3,Alabama,2020-02-18,-4.0,-3.0,-11.0
4,Alabama,2020-02-19,4.0,1.0,6.0
...,...,...,...,...,...
456457,Wyoming,2020-07-29,,,
456458,Wyoming,2020-07-30,,,
456459,Wyoming,2020-07-31,,,
456460,Wyoming,2020-08-03,,,


In [5]:
grouped_google_df = pd.DataFrame(google_us.groupby("states").sum())
grouped_google_df.head()

Unnamed: 0_level_0,retail_recreation,grocery_pharmacy,parks
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,-55224.0,34067.0,51333.0
Alaska,257.0,5424.0,28191.0
Arizona,-43306.0,-4715.0,7218.0
Arkansas,-25510.0,34334.0,56619.0
California,-221017.0,-13869.0,80799.0


In [18]:
google_df = grouped_google_df.reset_index()
google_df.head()


Unnamed: 0,states,retail_recreation,grocery_pharmacy,parks
0,Alabama,-55224.0,34067.0,51333.0
1,Alaska,257.0,5424.0,28191.0
2,Arizona,-43306.0,-4715.0,7218.0
3,Arkansas,-25510.0,34334.0,56619.0
4,California,-221017.0,-13869.0,80799.0


## Store NYT COVID cases and deaths CSV into DataFrame

In [7]:
csv_file = "Resources/COVID-states.csv"
covid_data_df = pd.read_csv(csv_file)
covid_data_df.head()

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [8]:
covid_us = covid_data_df.rename(columns = {"state":'states', "fips": 'fips', "date":'date', "cases":'cases',
                            "deaths":'deaths'})
covid_us.head()

Unnamed: 0,date,states,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [9]:
grouped_covid_df = pd.DataFrame(covid_us.groupby("states").sum())
grouped_covid_df.head()

Unnamed: 0_level_0,fips,cases,deaths
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,194,9593940,190913
Alaska,390,420666,2930
Arizona,964,16491289,396924
Arkansas,980,4741962,62148
California,1452,55553636,1181987


In [10]:
covid = grouped_covid_df.reset_index()
covid.head()

Unnamed: 0,states,fips,cases,deaths
0,Alabama,194,9593940,190913
1,Alaska,390,420666,2930
2,Arizona,964,16491289,396924
3,Arkansas,980,4741962,62148
4,California,1452,55553636,1181987


In [11]:
covid.states=covid.states.astype("string")

In [12]:
covid.dtypes

states    string
fips       int64
cases      int64
deaths     int64
dtype: object

### Connect to local database

In [13]:
rds_connection_string = f"{user}:{pcode}@localhost:5432/mobility_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [14]:
engine.table_names()

['google_data', 'covid_data']

### Use pandas to load csv converted DataFrame into database

In [15]:
google_df.to_sql(name='google_data', con=engine, if_exists='append', index=False)

In [19]:
covid.to_sql(name='covid_data', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the tables

In [22]:
pd.read_sql_query('select * from google_data', con=engine).head(10)

Unnamed: 0,states,retail_recreation,grocery_pharmacy,parks
0,Alabama,-55224.0,34067.0,51333.0
1,Alaska,257.0,5424.0,28191.0
2,Arizona,-43306.0,-4715.0,7218.0
3,Arkansas,-25510.0,34334.0,56619.0
4,California,-221017.0,-13869.0,80799.0
5,Colorado,-76556.0,-10593.0,97934.0
6,Connecticut,-26590.0,-7578.0,69877.0
7,Delaware,-8072.0,-1543.0,25724.0
8,District of Columbia,-7925.0,-3410.0,-5776.0
9,Florida,-168348.0,-49278.0,-91964.0


In [23]:
pd.read_sql_query('select * from covid_data', con=engine).head(10)

Unnamed: 0,states,fips,cases,deaths
0,Alabama,194,9593940,190913
1,Alaska,390,420666,2930
2,Arizona,964,16491289,396924
3,Arkansas,980,4741962,62148
4,California,1452,55553636,1181987
5,Colorado,1616,6043344,252657
6,Connecticut,1791,7220547,628760
7,Delaware,1960,1935382,71818
8,District of Columbia,2200,1653719,79177
9,Florida,2472,46474159,879877
