In [1]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt
from pprint import pprint

In [2]:
# Dependencies
# import numpy as np
import pandas as pd
import datetime as dt
import requests
import json

# Extract & Clean Data

### API Tracking data

#### Max temp days

In [3]:
# URL for GET requests to retrieve data
version = "v1"
measureId = 423 # Requires a valid measureID
stratificationLevelId = 82 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = 1 # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = 4 # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
MaxTempDays_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}?TemperatureHeatIndexId=1&RelativeThresholdId=1" #{getFullCoreHolder}

response = requests.get(MaxTempDays_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

#for index, element in enumerate(data):
#  print(index, ":", element)

In [4]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    county = bob["geo"]
    year = bob["temporal"]
    days = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "County": county,
        "Year": year,
        "Extreme Heat Days" : days
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

EHD = pd.DataFrame(summary_list) 
EHD

Unnamed: 0,County,Year,Extreme Heat Days
0,Apache,2011,19
1,Apache,2012,19
2,Apache,2013,18
3,Apache,2014,5
4,Apache,2015,18
...,...,...,...
160,Yuma,2017,21
161,Yuma,2018,19
162,Yuma,2019,22
163,Yuma,2020,36


In [5]:
# Export the cleaned DataFrame as a CSV file. 
EHD.to_csv("../data/ExtremeHeatDays.csv", encoding='utf8', index=False)

#### ED visits for over 35

In [6]:

# URL for GET requests to retrieve data
version = "v1"
measureId = 108 # Requires a valid measureID
stratificationLevelId = 2 # Requires a valid stratificationLevelId.
geographicTypeIdFilter = "all" # Requires a valid geographicTypeID or ALL.
geographicItemsFilter = "all" # Filter to retrieve only certain geographicItems.
temporalTypeIdFilter = 1 # Requires a valid temporal type id matching temporal items (e.g., 1)
temporalItemsFilter = "2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011" # Requires valid temporal items entries (e.g., 2020) separated by comma.
isSmoothed = 0 # 0 not smoothed- most data isn't
getFullCoreHolder = 0 # Requires either a one or zero (true or false) for fetching the full core holder. 


# Build the endpoint URL
Hosp35_url = f"https://ephtracking.cdc.gov/apigateway/api/{version}/getCoreHolder/{measureId}/{stratificationLevelId}/{geographicTypeIdFilter}/{geographicItemsFilter}/{temporalTypeIdFilter}/{temporalItemsFilter}/{isSmoothed}/{getFullCoreHolder}"

#https://ephtracking.cdc.gov/apigateway/api/v1/getCoreHolder/108/2/all/all/1/2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011/0/0

response = requests.get(Hosp35_url).json()

# Isolate tableResult for easy reading
data = response["tableResult"]

In [7]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    state = bob["parentGeoId"]
    county = bob["geo"]
    year = bob["temporal"]
    rate = bob["dataValue"]
    
    # create summary dictionary
    summary_dict = {
        "State" : state,
        "County": county,
        "Year": year,
        "Age Adjusted Rate ED" : rate
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

ED35 = pd.DataFrame(summary_list)  
ED35 #results all states

Unnamed: 0,State,County,Year,Age Adjusted Rate ED
0,04,Apache,2011,22.2
1,04,Apache,2012,25.9
2,04,Apache,2013,21.7
3,04,Apache,2014,17.5
4,04,Apache,2015,23.1
...,...,...,...,...
16308,55,Wood,2017,42.3
16309,55,Wood,2018,41.0
16310,55,Wood,2019,41.0
16311,55,Wood,2020,33.0


In [8]:
#select rows where 'state' is AZ, 04
ED35 = ED35.loc[ED35['State'] == "04"]

ED35 = ED35.drop(columns=["State"])

In [9]:
ED35.to_csv("../data/EDRates35.csv", encoding='utf8', index=False)

### HRI Data from Arizona Department of Health Services (AZDHS)

In [10]:
# File to Load
    # Read the ED Visits data into a Pandas DataFrame
ed_visits_df = pd.read_csv('../data/AZ-EDVisits.csv')
Hosp_df = pd.read_csv('../data/AZ_Hospitalizations.csv')
SVI_df = pd.read_csv('../data/AZ_Vulnerability.csv')
# HeatDays_df = pd.read_csv('../data/AZ-NoHeatDays.csv') #removed

In [11]:
#Pull Necessary columns and rename
ed_visits_df = ed_visits_df[['Name', 'Year', 'GeogID', 'Value']]
ed_visits_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'ED_rates'}, inplace=True)
ed_visits_df.head()

Unnamed: 0,county,year,county_id,ED_rates
0,NAVAJO,2011,4017,17.72
1,NAVAJO,2011,4017,17.72
2,PIMA,2011,4019,20.12
3,PIMA,2011,4019,20.12
4,PINAL,2011,4021,37.73


In [12]:
ed_visits_df = ed_visits_df.drop_duplicates(keep='first')
ed_visits_df.head()

Unnamed: 0,county,year,county_id,ED_rates
0,NAVAJO,2011,4017,17.72
2,PIMA,2011,4019,20.12
4,PINAL,2011,4021,37.73
6,SANTA CRUZ,2011,4023,17.72
8,YAVAPAI,2011,4025,12.76


In [13]:
Hosp_df = Hosp_df[['Name', 'Year', 'GeogID', 'Value']]
Hosp_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'Hosp_rates'}, inplace=True)
Hosp_df.head()

Unnamed: 0,county,year,county_id,Hosp_rates
0,GREENLEE,2011,4011,0.0
1,GREENLEE,2011,4011,0.0
2,MARICOPA,2011,4013,8.73
3,MARICOPA,2011,4013,8.73
4,MOHAVE,2011,4015,11.43


In [14]:
Hosp_df = Hosp_df.drop_duplicates(keep='first')
Hosp_df.head()

Unnamed: 0,county,year,county_id,Hosp_rates
0,GREENLEE,2011,4011,0.0
2,MARICOPA,2011,4013,8.73
4,MOHAVE,2011,4015,11.43
6,PIMA,2011,4019,4.79
8,PINAL,2012,4021,8.48


In [15]:
SVI_df = SVI_df[['Name', 'Year', 'GeogID', 'Value']]
SVI_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'SVI'}, inplace=True)

SVI_df.head()

Unnamed: 0,county,year,county_id,SVI
0,APACHE,2010,4001,0.993
1,COCHISE,2010,4003,0.864
2,COCONINO,2010,4005,0.819
3,GILA,2010,4007,0.801
4,GRAHAM,2010,4009,0.984


In [16]:
#HeatDays_df = HeatDays_df[['Name', 'Year', 'GeogID', 'Value']]
#HeatDays_df.rename(columns={'Name': 'county', 'Year': 'year', 'GeogID': 'county_id', 'Value': 'HeatDays'}, inplace=True)

#HeatDays_df.head()

### Merge data to one csv

In [17]:
# Combine the data into a single dataset.  
rates_df = pd.merge(ed_visits_df, Hosp_df, left_on=['county','year'], right_on = ['county','year'])

# Remove county IDs
rates_df = rates_df.drop(columns=["county_id_x", "county_id_y"])
rates_df

Unnamed: 0,county,year,ED_rates,Hosp_rates
0,PIMA,2011,20.12,4.79
1,PINAL,2011,37.73,13.18
2,YUMA,2011,81.43,5.31
3,ARIZONA,2011,30.36,7.36
4,GREENLEE,2013,0.00,0.00
...,...,...,...,...
82,PINAL,2021,42.39,9.93
83,SANTA CRUZ,2021,40.66,0.00
84,YAVAPAI,2021,28.35,2.81
85,YUMA,2021,82.70,21.39


In [18]:
# Combine the data into a single dataset.  
df2 = pd.merge(rates_df, SVI_df, how= "left",  on=['county','year'])
df2


# Remove county IDs
df2 = df2.drop(columns=["county_id"])

In [19]:
df2

Unnamed: 0,county,year,ED_rates,Hosp_rates,SVI
0,PIMA,2011,20.12,4.79,
1,PINAL,2011,37.73,13.18,
2,YUMA,2011,81.43,5.31,
3,ARIZONA,2011,30.36,7.36,
4,GREENLEE,2013,0.00,0.00,
...,...,...,...,...,...
82,PINAL,2021,42.39,9.93,
83,SANTA CRUZ,2021,40.66,0.00,
84,YAVAPAI,2021,28.35,2.81,
85,YUMA,2021,82.70,21.39,


In [20]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 87 entries, 0 to 86
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   county      87 non-null     object 
 1   year        87 non-null     int64  
 2   ED_rates    87 non-null     float64
 3   Hosp_rates  87 non-null     float64
 4   SVI         17 non-null     float64
dtypes: float64(3), int64(1), object(1)
memory usage: 4.1+ KB


In [21]:
# Export the cleaned DataFrame as a CSV file. 
df2.to_csv("../data/HRI.csv", encoding='utf8', index=False)

## Geojson counties

In [22]:
import json


In [23]:
with open('AZ-04-arizona-counties (1).json') as f:
  response = json.load(f)
  
response

{'type': 'Topology',
 'transform': {'scale': [0.0017670553225065367, 0.0012461030136843383],
  'translate': [-114.81337059380196, 31.33168393952881]},
 'arcs': [[[837, 941], [433, 0], [207, 2]],
  [[1477, 943], [233, 0], [249, -5], [110, 8], [169, 0], [10, 2], [220, 0]],
  [[2468, 948], [0, -69]],
  [[2468, 879], [0, -558]],
  [[2468, 321], [-343, -4], [-60, 0], [1, -164], [-115, -1], [0, -77]],
  [[1951, 75], [-566, 254], [-548, 240]],
  [[837, 569], [0, 372]],
  [[2468, 321], [-5, -94], [0, -226]],
  [[2463, 1], [-347, -1], [-165, 75]],
  [[1844, 2520],
   [30, 21],
   [17, -29],
   [-26, -26],
   [54, 6],
   [2, -48],
   [12, 39],
   [84, 22],
   [58, -63],
   [29, 17],
   [78, -66],
   [62, -38],
   [37, 27],
   [19, -30]],
  [[2300, 2352], [0, -210], [423, 0], [0, -340]],
  [[2723, 1802],
   [0, -89],
   [-95, -12],
   [-97, -19],
   [-57, -37],
   [-9, -69],
   [32, -78],
   [-28, -3]],
  [[2469, 1495],
   [-80, -49],
   [-23, 7],
   [-52, -58],
   [-31, -69],
   [-99, 237],
   [

In [24]:
for key in response:
    print(key, '->', response[key])

type -> Topology
transform -> {'scale': [0.0017670553225065367, 0.0012461030136843383], 'translate': [-114.81337059380196, 31.33168393952881]}
arcs -> [[[837, 941], [433, 0], [207, 2]], [[1477, 943], [233, 0], [249, -5], [110, 8], [169, 0], [10, 2], [220, 0]], [[2468, 948], [0, -69]], [[2468, 879], [0, -558]], [[2468, 321], [-343, -4], [-60, 0], [1, -164], [-115, -1], [0, -77]], [[1951, 75], [-566, 254], [-548, 240]], [[837, 569], [0, 372]], [[2468, 321], [-5, -94], [0, -226]], [[2463, 1], [-347, -1], [-165, 75]], [[1844, 2520], [30, 21], [17, -29], [-26, -26], [54, 6], [2, -48], [12, 39], [84, 22], [58, -63], [29, 17], [78, -66], [62, -38], [37, 27], [19, -30]], [[2300, 2352], [0, -210], [423, 0], [0, -340]], [[2723, 1802], [0, -89], [-95, -12], [-97, -19], [-57, -37], [-9, -69], [32, -78], [-28, -3]], [[2469, 1495], [-80, -49], [-23, 7], [-52, -58], [-31, -69], [-99, 237], [-11, 50], [-37, 100]], [[2136, 1713], [-64, 170], [-39, -61], [-37, 45], [-58, 95], [10, 38], [-45, 77], [3, 66

In [25]:
#only want objects down to geometries
data = response["objects"]['cb_2015_arizona_county_20m']['geometries']

# drill further down
#data

for index, element in enumerate(data):
  print(index, ":", element)



0 : {'arcs': [[0, 1, 2, 3, 4, 5, 6]], 'type': 'Polygon', 'properties': {'STATEFP': '04', 'COUNTYFP': '019', 'COUNTYNS': '00025446', 'AFFGEOID': '0500000US04019', 'GEOID': '04019', 'NAME': 'Pima', 'LSAD': '06', 'ALAND': 23794325411, 'AWATER': 5273054}}
1 : {'arcs': [[7, 8, -5]], 'type': 'Polygon', 'properties': {'STATEFP': '04', 'COUNTYFP': '023', 'COUNTYNS': '00040472', 'AFFGEOID': '0500000US04023', 'GEOID': '04023', 'NAME': 'Santa Cruz', 'LSAD': '06', 'ALAND': 3203593349, 'AWATER': 3088047}}
2 : {'arcs': [[9, 10, 11, 12, 13, 14]], 'type': 'Polygon', 'properties': {'STATEFP': '04', 'COUNTYFP': '007', 'COUNTYNS': '00040471', 'AFFGEOID': '0500000US04007', 'GEOID': '04007', 'NAME': 'Gila', 'LSAD': '06', 'ALAND': 12323041468, 'AWATER': 97111975}}
3 : {'arcs': [[15, 16, 17, -7, 18]], 'type': 'Polygon', 'properties': {'STATEFP': '04', 'COUNTYFP': '027', 'COUNTYNS': '00023901', 'AFFGEOID': '0500000US04027', 'GEOID': '04027', 'NAME': 'Yuma', 'LSAD': '06', 'ALAND': 14281191792, 'AWATER': 131713

In [26]:
# Create an empty list to hold summaries
summary_list = []

# loop through the data
for bob in data:
    county = bob["properties"]['NAME']
    arcs = bob["arcs"]
    
    # create summary dictionary
    summary_dict = {
        "County": county,
        "Polygons": arcs,
    }
    
    # Append the sumary dict to the list
    summary_list.append(summary_dict)

In [27]:
summary_list

[{'County': 'Pima', 'Polygons': [[0, 1, 2, 3, 4, 5, 6]]},
 {'County': 'Santa Cruz', 'Polygons': [[7, 8, -5]]},
 {'County': 'Gila', 'Polygons': [[9, 10, 11, 12, 13, 14]]},
 {'County': 'Yuma', 'Polygons': [[15, 16, 17, -7, 18]]},
 {'County': 'Cochise', 'Polygons': [[-4, 19, 20, 21, -8]]},
 {'County': 'Yavapai', 'Polygons': [[22, -15, 23, 24, 25]]},
 {'County': 'La Paz', 'Polygons': [[26, 27, -25, 28, -17, 29]]},
 {'County': 'Mohave', 'Polygons': [[30, 31, -26, -28, 32]]},
 {'County': 'Coconino', 'Polygons': [[33, 34, -10, -23, -32]]},
 {'County': 'Greenlee', 'Polygons': [[35, 36, -21, 37]]},
 {'County': 'Graham', 'Polygons': [[38, 39, -38, -20, -3, 40, -12]]},
 {'County': 'Pinal', 'Polygons': [[-13, -41, -2, 41]]},
 {'County': 'Maricopa', 'Polygons': [[-29, -24, -14, -42, -1, -18]]},
 {'County': 'Navajo', 'Polygons': [[42, 43, -39, -11, -35]]},
 {'County': 'Apache', 'Polygons': [[44, -36, -40, -44, 45]]}]

In [28]:
df = pd.DataFrame(summary_list) 
df

Unnamed: 0,County,Polygons
0,Pima,"[[0, 1, 2, 3, 4, 5, 6]]"
1,Santa Cruz,"[[7, 8, -5]]"
2,Gila,"[[9, 10, 11, 12, 13, 14]]"
3,Yuma,"[[15, 16, 17, -7, 18]]"
4,Cochise,"[[-4, 19, 20, 21, -8]]"
5,Yavapai,"[[22, -15, 23, 24, 25]]"
6,La Paz,"[[26, 27, -25, 28, -17, 29]]"
7,Mohave,"[[30, 31, -26, -28, 32]]"
8,Coconino,"[[33, 34, -10, -23, -32]]"
9,Greenlee,"[[35, 36, -21, 37]]"


# SQLite data to SQLAlchemy

In [37]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func, distinct

# Import modules to declare columns and column data types
from sqlalchemy import Column, Integer, String, Float, Boolean

from pathlib import Path
import pandas as pd

In [38]:
Path('hri.sqlite').touch()


In [39]:
# create engine to hawaii.sqlite
engine = create_engine("sqlite:///HRI.sqlite", echo=True)

In [40]:
engine.execute('''CREATE TABLE hri (county text, year int, ED_rates float, Hosp_rates float, SVI float, HeatDays float)''')
engine.execute('''CREATE TABLE ed_visit (county text, year int, ed_rate float)''')
engine.execute('''CREATE TABLE hospitalization (county text, year int, hosp_rate float)''')
engine.execute('''CREATE TABLE vulnerability (county text, year int, vul_rate float)''')
engine.execute('''CREATE TABLE extreme_heat_days (county text, year int, ext_heat_days float)''')
engine.execute('''CREATE TABLE ed_visit_35 (county text, year int, ed_rate_35 float)''')


2023-06-11 16:46:25,685 INFO sqlalchemy.engine.Engine CREATE TABLE hri (county text, year int, ED_rates float, Hosp_rates float, SVI float, HeatDays float)
2023-06-11 16:46:25,685 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-06-11 16:46:25,692 INFO sqlalchemy.engine.Engine COMMIT
2023-06-11 16:46:25,694 INFO sqlalchemy.engine.Engine CREATE TABLE ed_visit (county text, year int, ed_rate float)
2023-06-11 16:46:25,695 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-06-11 16:46:25,700 INFO sqlalchemy.engine.Engine COMMIT
2023-06-11 16:46:25,703 INFO sqlalchemy.engine.Engine CREATE TABLE hospitalization (county text, year int, hosp_rate float)
2023-06-11 16:46:25,704 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-06-11 16:46:25,709 INFO sqlalchemy.engine.Engine COMMIT
2023-06-11 16:46:25,714 INFO sqlalchemy.engine.Engine CREATE TABLE vulnerability (county text, year int, vul_rate float)
2023-06-11 16:46:25,714 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-06-11 16:46:25,719 INFO sqla

  engine.execute('''CREATE TABLE hri (county text, year int, ED_rates float, Hosp_rates float, SVI float, HeatDays float)''')


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x1c697e062e0>

In [45]:
# load the data into a Pandas DataFrame and write to sqlite table
hri = pd.read_csv('../data/HRI.csv')
hri.to_sql('hri', engine, if_exists='append', index = False)

ed_visit = pd.read_csv('../data/cleaned-AZ-EDVisits.csv')
ed_visit.to_sql('ed_rate', engine, if_exists='append', index = False)

hospital = pd.read_csv('../data/cleaned_AZ_Hospitalizations.csv')
hospital.to_sql('hospitalization', engine, if_exists='append', index = False)

vulnerability = pd.read_csv('../data/cleaned_AZ_Vulnerability.csv')
vulnerability.to_sql('svi', engine, if_exists='append', index = False)

extreme_heat_days = pd.read_csv('../data/ExtremeHeatDays.csv')
extreme_heat_days.to_sql('extreme_heat_days', engine, if_exists='append', index = False)

ed_visit_35 = pd.read_csv('../data/EDRates35.csv')
ed_visit_35.to_sql('ed_visit_35', engine, if_exists='append', index = False)

2023-06-11 16:55:27,799 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("hri")
2023-06-11 16:55:27,800 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-06-11 16:55:27,802 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-06-11 16:55:27,805 INFO sqlalchemy.engine.Engine INSERT INTO hri (county, year, "ED_rates", "Hosp_rates", "SVI") VALUES (?, ?, ?, ?, ?)
2023-06-11 16:55:27,806 INFO sqlalchemy.engine.Engine [generated in 0.00181s] (('PIMA', 2011, 20.12, 4.79, None), ('PINAL', 2011, 37.73, 13.18, None), ('YUMA', 2011, 81.43, 5.31, None), ('ARIZONA', 2011, 30.36, 7.36, None), ('GREENLEE', 2013, 0.0, 0.0, None), ('GILA', 2011, 29.87, 0.0, None), ('MARICOPA', 2011, 31.31, 8.73, None), ('MOHAVE', 2011, 49.41, 11.43, None)  ... displaying 10 of 87 total bound parameter sets ...  ('YUMA', 2021, 82.7, 21.39, None), ('ARIZONA', 2021, 33.88, 9.87, None))
2023-06-11 16:55:27,809 INFO sqlalchemy.engine.Engine COMMIT
2023-06-11 16:55:27,822 INFO sqlalchemy.engine.Engine PRAGMA main.table_info

OperationalError: (sqlite3.OperationalError) table extreme_heat_days has no column named extreme_heat_days
[SQL: INSERT INTO extreme_heat_days ("County", "Year", extreme_heat_days) VALUES (?, ?, ?)]
[parameters: (('Apache', 2011, 19), ('Apache', 2012, 19), ('Apache', 2013, 18), ('Apache', 2014, 5), ('Apache', 2015, 18), ('Apache', 2016, 16), ('Apache', 2017, 16), ('Apache', 2018, 31)  ... displaying 10 of 165 total bound parameter sets ...  ('Yuma', 2020, 36), ('Yuma', 2021, 24))]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [34]:
Base = automap_base() 
# reflect an existing database into a new model
Base.prepare(autoload_with=engine) 
# reflect the tables

In [35]:
# View all of the classes that automap found
Base.classes.keys() 

[]

In [None]:
# Save references to each table
ED = Base.classes.ED
Hospitalizations = Base.classes.hospitalizations
SVI = Base.classes.SVI

In [None]:
# Create our session (link) from Python to the DB
session = Session(bind=engine)

In [None]:
session.close()

# Analysis & Visualizations