In [5]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt
from pprint import pprint

In [2]:
# Dependencies
# import numpy as np
import pandas as pd
import datetime as dt
import requests
import json

# Extract & Clean Data

### API Tracking data

#### Max temp days

In [3]:

# URL for GET requests to retrieve data
version = "v1"
measureId = 423 # Requires a valid measureID
stratificationLevelId = 82 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = 1 # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = 4 # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
MaxTempDays_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}?TemperatureHeatIndexId=1&RelativeThresholdId=1" #{getFullCoreHolder}

response = requests.get(MaxTempDays_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

for index, element in enumerate(data):
  print(index, ":", element)

0 : {'id': '6634743535', 'geographicTypeId': 2, 'geo': 'Apache', 'geoId': '04001', 'geoAbbreviation': 'CountyAbbreviation', 'parentGeographicTypeId': 1, 'parentGeo': 'Arizona', 'parentGeoId': '04', 'parentGeoAbbreviation': 'AZ', 'calculationType': 'Counts', 'temporalTypeId': 1, 'temporal': '2011', 'temporalDescription': 'Single Year', 'temporalColumnName': 'ReportYear', 'temporalRollingColumnName': 'RollingYearCount', 'temporalId': 2011, 'minimumTemporal': None, 'minimumTemporalId': None, 'parentTemporalTypeId': None, 'parentTemporalType': None, 'parentTemporal': None, 'parentTemporalId': None, 'year': '2011', 'dataValue': '19', 'displayValue': '19', 'groupById': '1', 'noDataId': -1, 'hatchingId': -1, 'hatching': None, 'suppressionFlag': '0', 'noDataBreakGroup': 0, 'confidenceIntervalLow': None, 'confidenceIntervalHigh': None, 'confidenceIntervalName': None, 'standardError': None, 'standardErrorName': None, 'secondaryValue': None, 'secondaryValueName': None, 'descriptiveValue': None, '

In [6]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    county = bob["geo"]
    year = bob["temporal"]
    days = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "County": county,
        "Year": year,
        "Extreme Heat Days" : days
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

EHD = pd.DataFrame(summary_list) 
EHD

Unnamed: 0,County,Year,Extreme Heat Days
0,Apache,2011,19
1,Apache,2012,19
2,Apache,2013,18
3,Apache,2014,5
4,Apache,2015,18
...,...,...,...
160,Yuma,2017,21
161,Yuma,2018,19
162,Yuma,2019,22
163,Yuma,2020,36


In [7]:
# Export the cleaned DataFrame as a CSV file. 
EHD.to_csv("../data/ExtremeHeatDays.csv", encoding='utf8', index=False)

#### ED visits for over 35

In [8]:

# URL for GET requests to retrieve data
version = "v1"
measureId = 108 # Requires a valid measureID
stratificationLevelId = 2 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = "all" # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = "all" # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
Hosp35_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}"

#https://ephtracking.cdc.gov/apigateway/api/v1/getCoreHolder/108/2/all/all/1/2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011/0/0

response = requests.get(Hosp35_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

In [16]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    state = bob["parentGeoId"]
    county = bob["geo"]
    year = bob["temporal"]
    rate = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "State" : state,
        "County": county,
        "Year": year,
        "Age Adjusted Rate ED" : rate
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

ED35 = pd.DataFrame(summary_list)  #results all states

In [17]:
#select rows where 'state' is AZ, 04
ED35 = ED35.loc[ED35['State'] == "04"]

In [18]:
ED35.to_csv("../data/EDRates35.csv", encoding='utf8', index=False)

### HRI Data

In [7]:
# File to Load
    # Read the ED Visits data into a Pandas DataFrame
ed_visits_df = pd.read_csv('../data/AZ-EDVisits.csv')
Hosp_df = pd.read_csv('../data/AZ_Hospitalizations.csv')
SVI_df = pd.read_csv('../data/AZ_Vulnerability.csv')
HeatDays_df = pd.read_csv('../data/AZ-NoHeatDays.csv')

In [8]:
#Pull Necessary columns and rename
ed_visits_df = ed_visits_df[['Name', 'Year', 'GeogID', 'Value']]
ed_visits_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'ED_rates'}, inplace=True)
ed_visits_df.head()

Unnamed: 0,county,year,county_id,ED_rates
0,NAVAJO,2011,4017,17.72
1,NAVAJO,2011,4017,17.72
2,PIMA,2011,4019,20.12
3,PIMA,2011,4019,20.12
4,PINAL,2011,4021,37.73


In [9]:
ed_visits_df = ed_visits_df.drop_duplicates(keep='first')
ed_visits_df.head()

Unnamed: 0,county,year,county_id,ED_rates
0,NAVAJO,2011,4017,17.72
2,PIMA,2011,4019,20.12
4,PINAL,2011,4021,37.73
6,SANTA CRUZ,2011,4023,17.72
8,YAVAPAI,2011,4025,12.76


In [10]:
Hosp_df = Hosp_df[['Name', 'Year', 'GeogID', 'Value']]
Hosp_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'Hosp_rates'}, inplace=True)
Hosp_df.head()

Unnamed: 0,county,year,county_id,Hosp_rates
0,GREENLEE,2011,4011,0.0
1,GREENLEE,2011,4011,0.0
2,MARICOPA,2011,4013,8.73
3,MARICOPA,2011,4013,8.73
4,MOHAVE,2011,4015,11.43


In [11]:
Hosp_df = Hosp_df.drop_duplicates(keep='first')
Hosp_df.head()

Unnamed: 0,county,year,county_id,Hosp_rates
0,GREENLEE,2011,4011,0.0
2,MARICOPA,2011,4013,8.73
4,MOHAVE,2011,4015,11.43
6,PIMA,2011,4019,4.79
8,PINAL,2012,4021,8.48


In [12]:
SVI_df = SVI_df[['Name', 'Year', 'GeogID', 'Value']]
SVI_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'SVI'}, inplace=True)

SVI_df.head()

Unnamed: 0,county,year,county_id,SVI
0,APACHE,2010,4001,0.993
1,COCHISE,2010,4003,0.864
2,COCONINO,2010,4005,0.819
3,GILA,2010,4007,0.801
4,GRAHAM,2010,4009,0.984


In [13]:
HeatDays_df = HeatDays_df[['Name', 'Year', 'GeogID', 'Value']]
HeatDays_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'HeatDays'}, inplace=True)

HeatDays_df.head()

Unnamed: 0,county,year,county_id,HeatDays
0,APACHE,2011,4001,7.0
1,COCHISE,2011,4003,89.0
2,COCONINO,2011,4005,2.0
3,GILA,2011,4007,81.0
4,GRAHAM,2011,4009,121.0


### Merge data to one csv

In [14]:
# Combine the data into a single dataset.  
rates_df = pd.merge(ed_visits_df, Hosp_df, left_on=['county','year'], right_on = ['county','year'])

# Remove county IDs
rates_df = rates_df.drop(columns=["county_id_x", "county_id_y"])
rates_df

Unnamed: 0,county,year,ED_rates,Hosp_rates
0,PIMA,2011,20.12,4.79
1,PINAL,2011,37.73,13.18
2,YUMA,2011,81.43,5.31
3,ARIZONA,2011,30.36,7.36
4,GREENLEE,2013,0.00,0.00
...,...,...,...,...
82,PINAL,2021,42.39,9.93
83,SANTA CRUZ,2021,40.66,0.00
84,YAVAPAI,2021,28.35,2.81
85,YUMA,2021,82.70,21.39


In [15]:
# Combine the data into a single dataset.  
df2 = pd.merge(rates_df, SVI_df, how= "left",  on=['county','year'])
df2


# Remove county IDs
df2 = df2.drop(columns=["county_id"])

In [18]:
# Combine the data into a single dataset.  
df3= pd.merge(df2, HeatDays_df, how= "left",  on=['county','year'])
df3


# Remove county IDs
df3 = df3.drop(columns=["county_id"])

df3

Unnamed: 0,county,year,ED_rates,Hosp_rates,SVI,HeatDays
0,PIMA,2011,20.12,4.79,,127.0
1,PINAL,2011,37.73,13.18,,139.0
2,YUMA,2011,81.43,5.31,,140.0
3,ARIZONA,2011,30.36,7.36,,92.0
4,GREENLEE,2013,0.00,0.00,,77.0
...,...,...,...,...,...,...
82,PINAL,2021,42.39,9.93,,
83,SANTA CRUZ,2021,40.66,0.00,,
84,YAVAPAI,2021,28.35,2.81,,
85,YUMA,2021,82.70,21.39,,


In [20]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 87 entries, 0 to 86
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   county      87 non-null     object 
 1   year        87 non-null     int64  
 2   ED_rates    87 non-null     float64
 3   Hosp_rates  87 non-null     float64
 4   SVI         17 non-null     float64
 5   HeatDays    48 non-null     float64
dtypes: float64(4), int64(1), object(1)
memory usage: 4.8+ KB


In [27]:
# Export the cleaned DataFrame as a CSV file. 
df3.to_csv("../data/HRI.csv", encoding='utf8', index=False)

# SQLite data to SQLAlchemy

In [3]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func, distinct

In [None]:
# create engine to hawaii.sqlite
engine = create_engine("sqlite:///data/HRI.sqlite")

In [None]:
Base = automap_base() 
# reflect an existing database into a new model
Base.prepare(autoload_with=engine) 
# reflect the tables

In [None]:
# View all of the classes that automap found
Base.classes.keys() 

In [None]:
# Save references to each table
ED = Base.classes.ED
Hospitalizations = Base.classes.hospitalizations
HeatDays = Base.classes.hospitalizations
SVI = Base.classes.SVI

In [None]:
# Create our session (link) from Python to the DB
session = Session(bind=engine)

In [None]:
session.close()