In [None]:
%cd ../

from src.requirements import *
from src.ppca import PPCA
from src.utils import *
%matplotlib inline
os.mkdir('plots')

## ******************************************************************************************************************************
## OPEN CONFIG FILE
## ******************************************************************************************************************************

with open('configFile.json') as json_data_file:
    configFile = json.load(json_data_file)

## ******************************************************************************************************************************
## SET BIG QUERY CREDENTIALS
## ******************************************************************************************************************************

SERVICE_ACCOUNT_FILE = configFile['config']['BQ_key_file']
client_bq = bigquery.Client.from_service_account_json(SERVICE_ACCOUNT_FILE)
bq_dataset_id = configFile['config']['bq_dataset']
bq_dataset_ref = client_bq.dataset(bq_dataset_id)

## ******************************************************************************************************************************
## CARTO
## ******************************************************************************************************************************

carto_username = configFile['config']['CARTO']['username']
carto_API = configFile['config']['CARTO']['API_key']

creds = Credentials(carto_username, carto_API)
set_default_credentials(creds)

%cd etl/

rc('mathtext', default='regular')

In [None]:
vars_mob = ['retail_and_recreation_percent_change_from_baseline',
                'grocery_and_pharmacy_percent_change_from_baseline',
                'workplaces_percent_change_from_baseline']

## Join Google mobility report and COVID-10 county-level data

In [None]:
q_cases = """
    SELECT a.active,
            a.cases,
            a.deaths,
            a.discharged, 
            a.growthFactor,
            a.hospitalized, 
            a.icu,a.recovered,
            a.tested, 
            b.population,
            a.do_date as date, 
            b.name as sub_region_2,
            b.geoid as geoid,
            SUBSTR(b.geoid, 6, 7) as geoid_state,
            b.geom FROM `carto-do-public-data.coronadatascraper.covid19_summarycases_glo_coronalocations_v1_daily_v1` a
    JOIN `carto-do-public-data.coronadatascraper.geography_glo_coronalocations_v1` b
    ON a.geoid=b.geoid
    WHERE b.country LIKE 'US' and b.level=4
"""
df_cases = client_bq.query(q_cases).to_dataframe()
df_cases['date']= pd.to_datetime(df_cases['date']) 
df_cases['geoid_state'] = df_cases['geoid_state'].apply(lambda x: x[:2])

In [None]:
df_fips_states = pd.read_csv('../data/FIPS_states.csv', dtype = {'fips_state':str})

In [None]:
df_cases = df_cases.merge(df_fips_states[['sub_region_1','fips_state']], 
                          how = 'left', 
                          left_on = 'geoid_state',right_on = 'fips_state').drop(['geoid_state','fips_state'], axis = 1)

In [None]:
q_mob="""
    SELECT *
    FROM `bigquery-public-data.covid19_google_mobility.mobility_report` 
    WHERE country_region LIKE 'United States'
"""
df_mob = client_bq.query(q_mob).to_dataframe()
df_mob['date']= pd.to_datetime(df_mob['date'])
df_mob = df_mob.merge(df_cases[['sub_region_1','sub_region_2','geoid','geom']].drop_duplicates(), on = ['sub_region_1','sub_region_2'])

In [None]:
df_mob.date.max()

In [None]:
df_cases.date.max()

In [None]:
df = df_cases.merge(df_mob, on = ['sub_region_1','sub_region_2', 'geoid','geom','date'], how = 'right')
df['date'] = df['date'].apply(lambda x: x.strftime("%Y-%m-%d"))
df.geoid=df.geoid.apply(lambda x: x.replace('fips:',''))

### Upload result to Bigquery

In [None]:
df_bq = df.where(pd.notnull(df), None)

In [None]:
table_name = bq_dataset_ref.project + '.' + bq_dataset_ref.dataset_id + '.epi_mobility_county'
client_bq.delete_table(table_name, not_found_ok=True)  
upload_df_to_bigquery(df_bq, 'epi_mobility_county', bq_dataset_ref, client_bq)