In [1]:
# Dependencies
# import numpy as np
import pandas as pd
import datetime as dt
import requests
import json

# Extract: Data Pull

## Extract Data & Clean

### API Tracking data from CDC

#### Max temp days

In [2]:
# URL for GET requests to retrieve data
version = "v1"
measureId = 423 # Requires a valid measureID
stratificationLevelId = 82 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = 1 # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = 4 # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
MaxTempDays_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}?TemperatureHeatIndexId=1&RelativeThresholdId=1" #{getFullCoreHolder}

response = requests.get(MaxTempDays_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

#for index, element in enumerate(data):
#  print(index, ":", element)

In [3]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    county = bob["geo"]
    year = bob["temporal"]
    days = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "county": county,
        "year": year,
        "ext_heat_days" : days
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

EHD = pd.DataFrame(summary_list) 
EHD

Unnamed: 0,county,year,ext_heat_days
0,Apache,2011,19
1,Apache,2012,19
2,Apache,2013,18
3,Apache,2014,5
4,Apache,2015,18
...,...,...,...
160,Yuma,2017,21
161,Yuma,2018,19
162,Yuma,2019,22
163,Yuma,2020,36


In [4]:
EHD = EHD.reset_index()
EHD.head()

Unnamed: 0,index,county,year,ext_heat_days
0,0,Apache,2011,19
1,1,Apache,2012,19
2,2,Apache,2013,18
3,3,Apache,2014,5
4,4,Apache,2015,18


In [5]:
EHD.rename(columns={'index':'key'}, inplace=True)
EHD.head()

Unnamed: 0,key,county,year,ext_heat_days
0,0,Apache,2011,19
1,1,Apache,2012,19
2,2,Apache,2013,18
3,3,Apache,2014,5
4,4,Apache,2015,18


In [6]:
# Export the cleaned DataFrame as a CSV file. 
EHD.to_csv("data/ExtremeHeatDays.csv", encoding='utf8', index=False)

#### ED visits for over 35

In [7]:
# URL for GET requests to retrieve data
version = "v1"
measureId = 108 # Requires a valid measureID
stratificationLevelId = 2 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = "all" # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = "all" # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
Hosp35_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}"

#https://ephtracking.cdc.gov/apigateway/api/v1/getCoreHolder/108/2/all/all/1/2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011/0/0

response = requests.get(Hosp35_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

In [8]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    state = bob["parentGeoId"]
    county = bob["geo"]
    year = bob["temporal"]
    rate = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "State" : state,
        "County": county,
        "Year": year,
        "Age Adjusted Rate ED" : rate
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

ED35 = pd.DataFrame(summary_list)  
ED35 #results all states

Unnamed: 0,State,County,Year,Age Adjusted Rate ED
0,04,Apache,2011,22.2
1,04,Apache,2012,25.9
2,04,Apache,2013,21.7
3,04,Apache,2014,17.5
4,04,Apache,2015,23.1
...,...,...,...,...
16308,55,Wood,2017,42.3
16309,55,Wood,2018,41.0
16310,55,Wood,2019,41.0
16311,55,Wood,2020,33.0


In [9]:
#select rows where 'state' is AZ, 04
ED35 = ED35.loc[ED35['State'] == "04"]
ED35 = ED35.drop(columns=["State"])

In [10]:
ED35.rename(columns={'County': 'county', 'Year': 'year', 'Age Adjusted Rate ED': 'ed_rate_35'}, inplace=True)
ED35.head()

Unnamed: 0,county,year,ed_rate_35
0,Apache,2011,22.2
1,Apache,2012,25.9
2,Apache,2013,21.7
3,Apache,2014,17.5
4,Apache,2015,23.1


In [11]:
ED35 = ED35.reset_index()
ED35.head()

Unnamed: 0,index,county,year,ed_rate_35
0,0,Apache,2011,22.2
1,1,Apache,2012,25.9
2,2,Apache,2013,21.7
3,3,Apache,2014,17.5
4,4,Apache,2015,23.1


In [12]:
ED35.rename(columns={'index':'key'}, inplace=True)
ED35.head()

Unnamed: 0,key,county,year,ed_rate_35
0,0,Apache,2011,22.2
1,1,Apache,2012,25.9
2,2,Apache,2013,21.7
3,3,Apache,2014,17.5
4,4,Apache,2015,23.1


In [13]:
ED35.to_csv("data/EDRates35.csv", encoding='utf8', index=False)

## Putting dataframes to wide

In [14]:
# EHD is long
EHD_long = EHD

# Make wide data
EHD_wide = EHD_long.pivot(index= 'year', columns= 'county', values = 'ext_heat_days')

In [15]:
# EHD is long
ED35_long = ED35

# Make wide data
ED35_wide = ED35_long.pivot(index= 'year', columns= 'county', values = 'ed_rate_35')

In [18]:
ED35_wide.to_csv("data/EDRates35_wide.csv", encoding='utf8', index='year')
EHD_wide.to_csv("data/ExtremeHeatDays_wide.csv", encoding='utf8', index='year')

In [19]:
test = pd.read_csv('data/ExtremeHeatDays_wide.csv')
test.head()

Unnamed: 0,year,Apache,Cochise,Coconino,Gila,Graham,Greenlee,La Paz,Maricopa,Mohave,Navajo,Pima,Pinal,Santa Cruz,Yavapai,Yuma
0,2011,19,34,17,30,28,25,22,28,18,21,32,31,37,24,25
1,2012,19,25,20,27,22,22,15,21,14,20,27,24,29,27,17
2,2013,18,19,16,16,14,17,13,13,18,16,14,13,17,19,12
3,2014,5,4,4,3,3,4,3,2,4,3,4,2,6,3,6
4,2015,18,14,19,15,13,14,20,16,15,19,13,15,12,17,22


# Load: SQLite database

In [12]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func, distinct

# Import modules to declare columns and column data types
from sqlalchemy import Column, Integer, String, Float, Boolean

from pathlib import Path

In [13]:
Path('data/hri.sqlite').touch()

In [14]:
# create engine to HRI.sqlite
engine = create_engine("sqlite:///data/hri.sqlite")

In [20]:

engine.execute('''CREATE TABLE extreme_heat_days (key int PRIMARY KEY, county text, year int, ext_heat_days float)''')
engine.execute('''CREATE TABLE ed_visit_35 (key int PRIMARY KEY, county text, year int, ed_rate_35 float)''')


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7faf315571c0>

In [21]:
# load the data into a Pandas DataFrame and write to sqlite table

extreme_heat_days = pd.read_csv('data/ExtremeHeatDays.csv')
extreme_heat_days.to_sql('extreme_heat_days', engine, if_exists='append', index = False)

ed_visit_35 = pd.read_csv('data/EDRates35.csv')
ed_visit_35.to_sql('ed_visit_35', engine, if_exists='append', index = False)

165

In [22]:
# Reflect Database into ORM classes
Base = automap_base()
Base.prepare(autoload_with=engine)
Base.classes.keys() #['ed_visit_35', 'extreme_heat_days']

['ed_visit_35', 'extreme_heat_days']