In [1]:
# Administrative stuff:
import os

# For Data manipulation:
import pandas as pd
import numpy as np
import random
import re

# Nice to haves
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
# import googlemaps
import plotly.graph_objects as go

## dynamic text updating
from IPython.display import Markdown as md

## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Getting Data from the Cleaning File

In [2]:
%store -r air_data
%store -r air_ken
%store -r air_ken_2019
%store -r health_county_names
%store -r health_data
%store -r health_ken
%store -r health_ken_2019
%store -r health_ken_2019_clean
%store -r health_ken_2019_fm
%store -r so2
%store -r so2_county_names
%store -r so2_ken
%store -r airq_0

In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


# Merging air and Health Data First

In [4]:
air_ken_2019.head(2)
health_ken_2019_fm.head(2)

Unnamed: 0,index,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0
1,327,Kentucky,Boone,2019,239,222,17,0,0,0,0,80,49,39,0,0,239,0,0


Unnamed: 0,index,Year,CountyName,CountyFIPS,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,TotalPopulation,Geolocation,LocationID,MeasureId
0,3596,2019,Adair,21001,Health Risk Behaviors,No leisure-time physical activity among adults...,%,Crude prevalence,44.8,3016,POINT (-85.15821669 37.13121962),21001970300,LPA
2,3598,2019,Anderson,21005,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,26.6,5372,POINT (-84.92306901 38.02049683),21005950201,DEPRESSION


In [5]:
# Initial merge: 4282432 rows × 23 columns
# Next: 6768 rows 

# Merging:

merged_ah = pd.merge(air_ken_2019, health_ken_2019_fm, how = "left", left_on = "County", right_on = "CountyName",suffixes=('_air', '_health'))
merged_ah.head(2)

Unnamed: 0,index_air,State,County,Year_air,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10,index_health,Year_health,CountyName,CountyFIPS,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,TotalPopulation,Geolocation,LocationID,MeasureId
0,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,774350,2019,Bell,21013,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,30.6,1896,POINT (-83.53196181 36.73527758),21013960400,DEPRESSION
1,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,809745,2019,Bell,21013,Health Status,Physical health not good for >=14 days among a...,%,Crude prevalence,25.1,1896,POINT (-83.53196181 36.73527758),21013960400,PHLTH


In [34]:
air_ken_2019.shape
health_ken_2019_fm.shape
merged_ah.shape

(27, 19)

(13272, 13)

(6900, 32)

In [6]:
merged_ah["County"].value_counts()

Jefferson     2280
Fayette        984
Campbell       300
Warren         288
Daviess        276
Hardin         264
Boone          252
Pike           228
Christian      216
McCracken      204
Bullitt        204
Pulaski        168
Oldham         168
Boyd           156
Henderson      132
Bell           108
Jessamine      108
Greenup        108
Perry           96
Carter          84
Morgan          60
Simpson         48
Hancock         36
Edmonson        36
Trigg           36
Washington      36
Livingston      24
Name: County, dtype: int64

In [7]:
merged_ah[merged_ah["County"] == "Jefferson"]

Unnamed: 0,index_air,State,County,Year_air,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10,index_health,Year_health,CountyName,CountyFIPS,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,TotalPopulation,Geolocation,LocationID,MeasureId
3156,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,3649,2019,Jefferson,21111,Health Outcomes,Cancer (excluding skin cancer) among adults ag...,%,Crude prevalence,6.1,2924,POINT (-85.78202288 38.27661851),21111000200,CANCER
3157,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,3651,2019,Jefferson,21111,Prevention,Current lack of health insurance among adults ...,%,Crude prevalence,17.2,2447,POINT (-85.78253076 38.2614617),21111002300,ACCESS2
3158,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,3654,2019,Jefferson,21111,Health Risk Behaviors,Current smoking among adults aged >=18 years,%,Crude prevalence,15.9,6253,POINT (-85.52422538 38.234524),21111010402,CSMOKING
3159,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,3655,2019,Jefferson,21111,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,21.3,3078,POINT (-85.64078951 38.24198879),21111010601,DEPRESSION
3160,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,825740,2019,Jefferson,21111,Prevention,Current lack of health insurance among adults ...,%,Crude prevalence,7.7,5222,POINT (-85.56243511 38.15331331),21111011519,ACCESS2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5431,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,841817,2019,Jefferson,21111,Health Risk Behaviors,No leisure-time physical activity among adults...,%,Crude prevalence,51.4,2540,POINT (-85.80521343 38.24337925),21111001000,LPA
5432,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,841822,2019,Jefferson,21111,Health Status,Mental health not good for >=14 days among adu...,%,Crude prevalence,10.9,4078,POINT (-85.57559354 38.25625142),21111010102,MHLTH
5433,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,841823,2019,Jefferson,21111,Prevention,Visits to doctor for routine checkup within th...,%,Crude prevalence,81.3,6220,POINT (-85.58511499 38.17750746),21111011110,CHECKUP
5434,340,Kentucky,Jefferson,2019,365,200,163,2,0,0,0,136,70,49,0,10,164,191,0,841829,2019,Jefferson,21111,Health Status,Physical health not good for >=14 days among a...,%,Crude prevalence,10.2,1949,POINT (-85.69251985 38.2569818),21111007601,PHLTH


In [8]:
air_ken_2019[air_ken_2019["County"] == "Bell"]
health_ken_2019_clean[health_ken_2019_clean["CountyName"] == "Bell"]

Unnamed: 0,index,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0


Unnamed: 0,index,Year,CountyName,CountyFIPS,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,TotalPopulation,Geolocation,LocationID,MeasureId
3,3599,2019,Bell,21013,Health Outcomes,Stroke among adults aged >=18 years,%,Crude prevalence,5.8,5150,POINT (-83.73909778 36.76433928),21013960200,STROKE
4,3600,2019,Bell,21013,Health Outcomes,Stroke among adults aged >=18 years,%,Crude prevalence,5.5,2381,POINT (-83.61861461 36.71356868),21013960300,STROKE
5,3601,2019,Bell,21013,Health Outcomes,Stroke among adults aged >=18 years,%,Crude prevalence,5.3,2849,POINT (-83.72538629 36.61758576),21013960700,STROKE
191,774350,2019,Bell,21013,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,30.6,1896,POINT (-83.53196181 36.73527758),21013960400,DEPRESSION
255,809745,2019,Bell,21013,Health Status,Physical health not good for >=14 days among a...,%,Crude prevalence,25.1,1896,POINT (-83.53196181 36.73527758),21013960400,PHLTH
265,809759,2019,Bell,21013,Health Outcomes,Coronary heart disease among adults aged >=18 ...,%,Crude prevalence,11.9,1896,POINT (-83.53196181 36.73527758),21013960400,CHD
284,809781,2019,Bell,21013,Health Outcomes,Coronary heart disease among adults aged >=18 ...,%,Crude prevalence,10.6,3139,POINT (-83.81829927 36.66805338),21013961100,CHD
311,809818,2019,Bell,21013,Health Risk Behaviors,Current smoking among adults aged >=18 years,%,Crude prevalence,33.3,3139,POINT (-83.81829927 36.66805338),21013961100,CSMOKING
319,809827,2019,Bell,21013,Prevention,Visits to doctor for routine checkup within th...,%,Crude prevalence,80.7,4747,POINT (-83.76654247 36.59758829),21013960600,CHECKUP
366,809884,2019,Bell,21013,Health Outcomes,Current asthma among adults aged >=18 years,%,Crude prevalence,11.8,3771,POINT (-83.76484451 36.64167333),21013960800,CASTHMA


In [9]:
merged_ah.head()

Unnamed: 0,index_air,State,County,Year_air,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10,index_health,Year_health,CountyName,CountyFIPS,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,TotalPopulation,Geolocation,LocationID,MeasureId
0,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,774350,2019,Bell,21013,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,30.6,1896,POINT (-83.53196181 36.73527758),21013960400,DEPRESSION
1,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,809745,2019,Bell,21013,Health Status,Physical health not good for >=14 days among a...,%,Crude prevalence,25.1,1896,POINT (-83.53196181 36.73527758),21013960400,PHLTH
2,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,809818,2019,Bell,21013,Health Risk Behaviors,Current smoking among adults aged >=18 years,%,Crude prevalence,33.3,3139,POINT (-83.81829927 36.66805338),21013961100,CSMOKING
3,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,809827,2019,Bell,21013,Prevention,Visits to doctor for routine checkup within th...,%,Crude prevalence,80.7,4747,POINT (-83.76654247 36.59758829),21013960600,CHECKUP
4,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,809884,2019,Bell,21013,Health Outcomes,Current asthma among adults aged >=18 years,%,Crude prevalence,11.8,3771,POINT (-83.76484451 36.64167333),21013960800,CASTHMA


In [10]:
so2_ken.head(2)

Unnamed: 0,State Code,County Code,Site Num,Parameter Code,POC,Latitude,Longitude,Datum,Parameter Name,Sample Duration,Pollutant Standard,Date Local,Units of Measure,Event Type,Observation Count,Observation Percent,Arithmetic Mean,1st Max Value,1st Max Hour,AQI,Method Code,Method Name,Local Site Name,Address,State Name,County Name,City Name,CBSA Name,Date of Last Change
97694,21,19,17,42401,1,38.45934,-82.64041,WGS84,Sulfur dioxide,1 HOUR,SO2 1-hour 2010,2019-01-01,Parts per billion,,23,96.0,0.0,0.0,0,0.0,100.0,INSTRUMENTAL - ULTRAVIOLET FLUORESCENCE,ASHLAND PRIMARY (FIVCO),"FIVCO HEALTH DEPARTMENT, 2924 HOLT STREET",Kentucky,Boyd,Ashland,"Huntington-Ashland, WV-KY-OH",2021-11-01
97695,21,19,17,42401,1,38.45934,-82.64041,WGS84,Sulfur dioxide,1 HOUR,SO2 1-hour 2010,2019-01-02,Parts per billion,,23,96.0,0.0,0.0,0,0.0,100.0,INSTRUMENTAL - ULTRAVIOLET FLUORESCENCE,ASHLAND PRIMARY (FIVCO),"FIVCO HEALTH DEPARTMENT, 2924 HOLT STREET",Kentucky,Boyd,Ashland,"Huntington-Ashland, WV-KY-OH",2021-11-01


In [11]:
so2_ken["Parameter Name"].value_counts()

Sulfur dioxide    8845
Name: Parameter Name, dtype: int64

In [12]:
so2_ken["Units of Measure"].value_counts()

Parts per billion    8845
Name: Units of Measure, dtype: int64

In [13]:
so2_ken["Sample Duration"].value_counts()

1 HOUR          4423
3-HR BLK AVG    4422
Name: Sample Duration, dtype: int64

In [14]:
so2_ken["Pollutant Standard"].value_counts()

SO2 1-hour 2010    4423
SO2 3-hour 1971    4422
Name: Pollutant Standard, dtype: int64

In [15]:
so2_ken["AQI"].value_counts()

0.0      1970
1.0      1227
3.0       550
4.0       198
6.0       128
7.0        64
9.0        53
10.0       37
11.0       21
13.0       17
14.0       16
16.0       14
17.0        9
19.0        8
20.0        7
21.0        6
24.0        6
23.0        5
33.0        4
30.0        4
36.0        4
37.0        4
26.0        4
54.0        4
29.0        4
44.0        3
27.0        3
52.0        3
31.0        3
49.0        3
64.0        2
46.0        2
50.0        2
106.0       2
60.0        2
69.0        2
39.0        2
40.0        2
105.0       2
34.0        2
61.0        2
51.0        2
95.0        1
114.0       1
84.0        1
41.0        1
76.0        1
111.0       1
86.0        1
70.0        1
59.0        1
103.0       1
101.0       1
47.0        1
89.0        1
56.0        1
74.0        1
94.0        1
57.0        1
82.0        1
120.0       1
116.0       1
Name: AQI, dtype: int64

# pg 10 of AQI report (https://www.airnow.gov/sites/default/files/2018-04/aqi_brochure_02_14_0.pdf) 


We are using this for defining categories for sulfur aqi in categories

In [16]:
so2_ken_f=so2_ken.dropna(subset=["AQI"],how='all',inplace=False)

In [17]:
so2_ken.shape


(8845, 29)

In [19]:
so2_ken_f.shape


(4423, 29)

In [20]:
filter_method = lambda x: 'Good' if x < 50 else 'Moderate' if (x > 50 and x <= 100) else 'Unhealthy for Sensitive Groups' if (x > 100 and x <= 150) else "NA" if (x == "NaN") else "Unhealthy"



so2_ken_f["AQI_category"] = so2_ken_f["AQI"].apply(filter_method)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  so2_ken_f["AQI_category"] = so2_ken_f["AQI"].apply(filter_method)


In [21]:
so2_ken_f[so2_ken_f["AQI_category"]=="Unhealthy"].tail(12)

Unnamed: 0,State Code,County Code,Site Num,Parameter Code,POC,Latitude,Longitude,Datum,Parameter Name,Sample Duration,Pollutant Standard,Date Local,Units of Measure,Event Type,Observation Count,Observation Percent,Arithmetic Mean,1st Max Value,1st Max Hour,AQI,Method Code,Method Name,Local Site Name,Address,State Name,County Name,City Name,CBSA Name,Date of Last Change,AQI_category
102373,21,101,1011,42401,1,37.654381,-87.511427,NAD83,Sulfur dioxide,1 HOUR,SO2 1-hour 2010,2019-07-13,Parts per billion,,23,96.0,7.347826,35.0,12,50.0,100.0,INSTRUMENTAL - ULTRAVIOLET FLUORESCENCE,Sebree SO2 DRR Site,"Alcan Aluminum Road, 1.0 Miles South of Moss &...",Kentucky,Henderson,Not in a city,"Evansville, IN-KY",2021-11-01,Unhealthy
102375,21,101,1011,42401,1,37.654381,-87.511427,NAD83,Sulfur dioxide,1 HOUR,SO2 1-hour 2010,2019-07-15,Parts per billion,,23,96.0,4.521739,35.0,13,50.0,100.0,INSTRUMENTAL - ULTRAVIOLET FLUORESCENCE,Sebree SO2 DRR Site,"Alcan Aluminum Road, 1.0 Miles South of Moss &...",Kentucky,Henderson,Not in a city,"Evansville, IN-KY",2021-11-01,Unhealthy


#Merge with ah dataset

In [22]:
merged_ahs = pd.merge(merged_ah, so2_ken_f, how = "left", left_on = "County", right_on = "County Name",suffixes=('_ah', '_Sulfur'))
merged_ahs.head(2)

Unnamed: 0,index_air,State,County,Year_air,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10,index_health,Year_health,CountyName,CountyFIPS,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,TotalPopulation,Geolocation,LocationID,MeasureId,State Code,County Code,Site Num,Parameter Code,POC,Latitude,Longitude,Datum,Parameter Name,Sample Duration,Pollutant Standard,Date Local,Units of Measure,Event Type,Observation Count,Observation Percent,Arithmetic Mean,1st Max Value,1st Max Hour,AQI,Method Code,Method Name,Local Site Name,Address,State Name,County Name,City Name,CBSA Name,Date of Last Change,AQI_category
0,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,774350,2019,Bell,21013,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,30.6,1896,POINT (-83.53196181 36.73527758),21013960400,DEPRESSION,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0,809745,2019,Bell,21013,Health Status,Physical health not good for >=14 days among a...,%,Crude prevalence,25.1,1896,POINT (-83.53196181 36.73527758),21013960400,PHLTH,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [23]:
merged_ah.shape
merged_ahs.shape

(6900, 32)

(3334068, 62)

In [28]:
so2_ken_f.head(3)

Unnamed: 0,State Code,County Code,Site Num,Parameter Code,POC,Latitude,Longitude,Datum,Parameter Name,Sample Duration,Pollutant Standard,Date Local,Units of Measure,Event Type,Observation Count,Observation Percent,Arithmetic Mean,1st Max Value,1st Max Hour,AQI,Method Code,Method Name,Local Site Name,Address,State Name,County Name,City Name,CBSA Name,Date of Last Change,AQI_category
97694,21,19,17,42401,1,38.45934,-82.64041,WGS84,Sulfur dioxide,1 HOUR,SO2 1-hour 2010,2019-01-01,Parts per billion,,23,96.0,0.0,0.0,0,0.0,100.0,INSTRUMENTAL - ULTRAVIOLET FLUORESCENCE,ASHLAND PRIMARY (FIVCO),"FIVCO HEALTH DEPARTMENT, 2924 HOLT STREET",Kentucky,Boyd,Ashland,"Huntington-Ashland, WV-KY-OH",2021-11-01,Good
97695,21,19,17,42401,1,38.45934,-82.64041,WGS84,Sulfur dioxide,1 HOUR,SO2 1-hour 2010,2019-01-02,Parts per billion,,23,96.0,0.0,0.0,0,0.0,100.0,INSTRUMENTAL - ULTRAVIOLET FLUORESCENCE,ASHLAND PRIMARY (FIVCO),"FIVCO HEALTH DEPARTMENT, 2924 HOLT STREET",Kentucky,Boyd,Ashland,"Huntington-Ashland, WV-KY-OH",2021-11-01,Good
97696,21,19,17,42401,1,38.45934,-82.64041,WGS84,Sulfur dioxide,1 HOUR,SO2 1-hour 2010,2019-01-03,Parts per billion,,23,96.0,0.0,0.0,0,0.0,100.0,INSTRUMENTAL - ULTRAVIOLET FLUORESCENCE,ASHLAND PRIMARY (FIVCO),"FIVCO HEALTH DEPARTMENT, 2924 HOLT STREET",Kentucky,Boyd,Ashland,"Huntington-Ashland, WV-KY-OH",2021-11-01,Good


In [29]:
so2_ken_f.groupby(["AQI_category"]).agg('County Name')

<pandas.core.groupby.generic.SeriesGroupBy object at 0x7fd38929f370>

In [32]:
so2_ken_f.groupby(["AQI_category"])["County Name"].count()

AQI_category
Good                              4382
Moderate                            29
Unhealthy                            2
Unhealthy for Sensitive Groups      10
Name: County Name, dtype: int64