In [1]:
# Dependencies
# import numpy as np
import pandas as pd
import datetime as dt
import requests
import json

# Extract & Clean Data

## Extract Data

### API Tracking data from CDC

#### Max temp days

In [2]:
# URL for GET requests to retrieve data
version = "v1"
measureId = 423 # Requires a valid measureID
stratificationLevelId = 82 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = 1 # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = 4 # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
MaxTempDays_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}?TemperatureHeatIndexId=1&RelativeThresholdId=1" #{getFullCoreHolder}

response = requests.get(MaxTempDays_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

#for index, element in enumerate(data):
#  print(index, ":", element)

In [3]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    county = bob["geo"]
    year = bob["temporal"]
    days = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "County": county,
        "Year": year,
        "Extreme Heat Days" : days
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

EHD = pd.DataFrame(summary_list) 
EHD

Unnamed: 0,County,Year,Extreme Heat Days
0,Apache,2011,19
1,Apache,2012,19
2,Apache,2013,18
3,Apache,2014,5
4,Apache,2015,18
...,...,...,...
160,Yuma,2017,21
161,Yuma,2018,19
162,Yuma,2019,22
163,Yuma,2020,36


In [5]:
# Export the cleaned DataFrame as a CSV file. 
EHD.to_csv("data/ExtremeHeatDays.csv", encoding='utf8', index=False)

#### ED visits for over 35

In [6]:
# URL for GET requests to retrieve data
version = "v1"
measureId = 108 # Requires a valid measureID
stratificationLevelId = 2 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = "all" # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = "all" # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
Hosp35_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}"

#https://ephtracking.cdc.gov/apigateway/api/v1/getCoreHolder/108/2/all/all/1/2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011/0/0

response = requests.get(Hosp35_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

In [7]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    state = bob["parentGeoId"]
    county = bob["geo"]
    year = bob["temporal"]
    rate = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "State" : state,
        "County": county,
        "Year": year,
        "Age Adjusted Rate ED" : rate
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

ED35 = pd.DataFrame(summary_list)  
ED35 #results all states

Unnamed: 0,State,County,Year,Age Adjusted Rate ED
0,04,Apache,2011,22.2
1,04,Apache,2012,25.9
2,04,Apache,2013,21.7
3,04,Apache,2014,17.5
4,04,Apache,2015,23.1
...,...,...,...,...
16308,55,Wood,2017,42.3
16309,55,Wood,2018,41.0
16310,55,Wood,2019,41.0
16311,55,Wood,2020,33.0


In [9]:
#select rows where 'state' is AZ, 04
ED35 = ED35.loc[ED35['State'] == "04"]
ED35 = ED35.drop(columns=["State"])

In [11]:
ED35.to_csv("data/EDRates35.csv", encoding='utf8', index=False)

### HRI Data from Arizona Department of Health Services (AZDHS)

#### ED visits

In [13]:
# File to Load
# Read the ED Visits data into a Pandas DataFrame
ed_visits_df = pd.read_csv('data/AZ-EDVisits.csv')

# Get a brief summary of the ED visits DataFrame.
# ed_visits_df.info()

In [15]:
#drop duplicates & rows with GeogID 'ALL'
ed_visits_df = ed_visits_df.drop_duplicates(keep='first')
ed_visits_df = ed_visits_df[ed_visits_df['GeogID']!= 'ALL']
# ed_visits_df.head()

In [16]:
#Pull Necessary columns and rename
ed_visits_df = ed_visits_df[['Name', 'Year', 'Value']]
ed_visits_df.rename(columns={'Name': 'county', 'Year': 'year', 'Value': 'ed_rate'}, inplace=True)
# ed_visits_df.head()

Unnamed: 0,county,year,ed_rate
0,NAVAJO,2011,17.72
2,PIMA,2011,20.12
4,PINAL,2011,37.73
6,SANTA CRUZ,2011,17.72
8,YAVAPAI,2011,12.76


In [18]:
#sort and reset index
ed_visits_df = ed_visits_df.sort_values(by=['year', 'county'])
ed_visits_df = ed_visits_df.reset_index(drop=True)
#ed_visits_df.head(20)

In [19]:
# Export the cleaned DataFrame as a CSV file. 
ed_visits_df.to_csv("data/cleaned-AZ-EDVisits.csv", encoding='utf8', index=False)

#### Hospitalization Data

In [22]:
# Read the Hospitalization data into a Pandas DataFrame
hosp_df = pd.read_csv('data/AZ_Hospitalizations.csv')

# drop duplicates & rows with GeogID 'ALL'
hosp_df = hosp_df.drop_duplicates(keep='first')
hosp_df = hosp_df[hosp_df['GeogID']!= 'ALL']

# Pull Necessary columns and rename
hosp_df = hosp_df[['Name', 'Year', 'Value']]
hosp_df.rename(columns={'Name': 'county', 'Year': 'year', 'Value': 'hosp_rate'}, inplace=True)

# sort and reset index
hosp_df = hosp_df.sort_values(by=['year', 'county'])
hosp_df = hosp_df.reset_index(drop=True)

hosp_df.head()

Unnamed: 0,county,year,hosp_rate
0,GILA,2011,0.0
1,GREENLEE,2011,0.0
2,MARICOPA,2011,8.73
3,MOHAVE,2011,11.43
4,PIMA,2011,4.79


In [23]:
# Export the cleaned DataFrame as a CSV file. 
hosp_df.to_csv("data/cleaned_AZ_Hospitalizations.csv", encoding='utf8', index=False)

#### Vulnerability Data

In [33]:
# Read the Vulnerability data into a Pandas DataFrame
vulnerability_df = pd.read_csv('data/AZ_Vulnerability.csv')

# drop duplicates & rows with GeogID 'ALL'
vulnerability_df = vulnerability_df.drop_duplicates(keep='first')
vulnerability_df = vulnerability_df[vulnerability_df['GeogID']!= 'ALL']

# Pull Necessary columns and rename
vulnerability_df = vulnerability_df[['Name', 'Year', 'Value']]
vulnerability_df.rename(columns={'Name': 'county', 'Year': 'year', 'Value': 'SVI'}, inplace=True)

# sort and reset index
vulnerability_df = vulnerability_df.sort_values(by=['year', 'county'])
vulnerability_df = vulnerability_df.reset_index(drop=True)
vulnerability_df.head()

Unnamed: 0,county,year,SVI
0,APACHE,2010,0.993
1,COCHISE,2010,0.864
2,COCONINO,2010,0.819
3,GILA,2010,0.801
4,GRAHAM,2010,0.984


In [34]:
# Export the cleaned DataFrame as a CSV file. 
vulnerability_df.to_csv("data/cleaned_AZ_Vulnerability.csv", encoding='utf8', index=False)

## Merge Data into CSV

In [35]:
# Combine the data into a single dataset.  
rates_df = pd.merge(ed_visits_df, hosp_df, left_on=['county','year'], right_on = ['county','year'])
#rates_df.head()

# Combine the data into a single dataset.  
df2 = pd.merge(rates_df, vulnerability_df, left_on=['county','year'], right_on = ['county','year'])
df2.head()


Unnamed: 0,county,year,ed_rate,hosp_rate,SVI
0,COCHISE,2014,18.16,4.81,0.906
1,GILA,2014,21.05,0.0,0.882
2,MARICOPA,2014,24.46,6.04,0.669
3,MOHAVE,2014,75.08,8.25,0.877
4,PIMA,2014,18.8,5.25,0.84


In [36]:
# Export the cleaned DataFrame as a CSV file. 
df2.to_csv("data/HRI.csv", encoding='utf8', index=False)