# Smart Cookies - Location Exposure Extract
#### based on - C3.ai COVID-19 Data Lake Quickstart in Python
#### Location Exposure doco: https://c3.ai/covid-19-api-documentation/#tag/LocationExposure

Loading Dependencies Below:

In [53]:
import requests
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib
from scipy.stats import gamma
import numpy as np

Ensuring that the most recent version of pandas (>= 1.0.0)

In [54]:
print("pandas version", pd.__version__)
assert pd.__version__[0] >= "1", "To use this notebook, upgrade to the newest version of pandas. See https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html for details."

pandas version 1.1.4


<a id="helpers"></a>
## Helper methods for accessing the API

The helper methods in `c3aidatalake.py` convert a JSON response from the C3.ai APIs to a Pandas DataFrame. You may wish to view the code in `c3aidatalake.py` before running the code below. 

`c3aidatalake.py` is a API file provided by C3.ai

In [55]:
import c3aidatalake 

### Depends on output file from Daphne's notebook: outbreaklocation_by_county
Note: For testing, limited to Texas only and the first 100 counties.

In [56]:
# get the location exposure data for a pair of counties, return it as a df
def eachlocationExposures(locationTarget , locationVisited):

    # try:
    exposure = c3aidatalake.read_data_json(
        "locationexposure",
        "getlocationexposures",
            {
                "spec":
                {
                    "locationTarget": locationTarget,
                    "locationVisited": locationVisited,
                    "start": "2020-09-01",
                    "end": "2020-10-31"
                }
            }
        )
    # except:
    #     print('Error calling eachlocationExposures("' + locationTarget + ', "' + locationVisited + '")')

    lex_df = pd.json_normalize(exposure["locationExposures"]["value"])
    # print ('Fetched location exposures for:' + locationTarget + ' - ' + locationVisited + '. Rows returned: ' + str(len(lex_df)))
    
    if len(lex_df) > 0:
        device_counts_df = pd.json_normalize(exposure["deviceCounts"]["value"])
        eachlocationExposures_df = pd.merge(lex_df, device_counts_df, how='left', left_on=['locationTarget','timestamp'], right_on=['locationTarget','timestamp'])
    else:
        eachlocationExposures_df= lex_df

    eachlocationExposures_df = eachlocationExposures_df.rename({'value_x':'value_Location_exposure_index', 'value_y':'value_Device_count'}, axis='columns')
    # print(eachlocationExposures_df)
    
    return eachlocationExposures_df


In [57]:
counties_df = pd.read_csv('outbreaklocation_by_county.csv')

counties_df = counties_df[counties_df['id'].str.contains("_Texas_")]

# counties_df = counties_df.head(5)


for each_id_Target in counties_df['id']:
    all_lex = []
    for each_id_Visited in counties_df['id']:
        if each_id_Target != each_id_Visited:
            try:
                lex = eachlocationExposures(each_id_Target, each_id_Visited )
                all_lex.append(lex)
            except:
                print('Error calling eachlocationExposures("' + each_id_Target + ', "' + each_id_Visited + '")')

    all_lex_df = pd.concat(all_lex, ignore_index=True)
    # print(all_lex_df)
    all_lex_df.to_csv('locationExposure/'+ each_id_Target + '.csv')
    print('Wrote file for: ' + each_id_Target + ' with ' + str(len(all_lex_df)) + ' rows.')

print ('Finished!')


Wrote file for: Anderson_Texas_UnitedStates with 51 rows.
Wrote file for: Andrews_Texas_UnitedStates with 51 rows.
Wrote file for: Angelina_Texas_UnitedStates with 51 rows.
Wrote file for: Aransas_Texas_UnitedStates with 51 rows.
Wrote file for: Archer_Texas_UnitedStates with 0 rows.
Finished!
