# Cal-CRAI Metric Calculation for: Built Environment / Housing Vacancy & Quality
This notebook calculates one metric sourced from the Environmental Protection Agency

* Percentage of days with unhealthy or worse air quality days per county between 1980-2022

In [54]:
import pandas as pd
import os
import sys

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import (
    pull_csv_from_directory, upload_csv_aws
)

In [44]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/natural_systems/ecosystem_condition/epa/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'natural_epa_air_quality.csv'


In [46]:
# read in air quality data (already for state of CA)
air_quality = pd.read_csv('natural_epa_air_quality.csv')
print(len(air_quality))
air_quality.head(5)
os.remove('natural_epa_air_quality.csv')


2206


In [47]:
# drop duplicates, original dataset had a repeat year
filtered_air_quality = air_quality.drop_duplicates(subset=['Year', 'County'])
filtered_air_quality.head(5)

Unnamed: 0.1,Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,0,California,Alameda,1997,365,324,35,6,0,0,0,147,53,38,3,160,195,0,7
1,1,California,Amador,1997,365,270,70,22,3,0,0,197,90,40,18,0,346,0,1
2,2,California,Butte,1997,365,322,42,1,0,0,0,105,51,34,24,125,203,0,13
3,3,California,Calaveras,1997,365,238,95,29,2,1,0,207,100,43,5,0,355,0,5
4,4,California,Colusa,1997,365,287,66,12,0,0,0,136,71,39,0,0,334,0,31


In [48]:
# Create df that holds desired data variables
columns_to_sum = ['Days with AQI', 
                    'Unhealthy for Sensitive Groups Days',
                    'Unhealthy Days',
                    'Very Unhealthy Days',
                    'Hazardous Days'
                    ]
# Group data by county and sum desired columns for the temporal range of the dataset (1980-2022)
ca_county_unhealthy_days = filtered_air_quality.groupby('County')[columns_to_sum].sum().reset_index()
print(len(ca_county_unhealthy_days))
ca_county_unhealthy_days.head(5)

58


Unnamed: 0,County,Days with AQI,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days
0,Alameda,15585,529,168,31,0
1,Alpine,168,14,8,2,0
2,Amador,10942,614,125,10,0
3,Butte,15510,915,144,10,5
4,Calaveras,10242,720,205,13,1


In [49]:
# Create new column counting total unhealthy air quality days
ca_county_unhealthy_days['Total Unhealthy AQI Days'] = (
    ca_county_unhealthy_days['Unhealthy for Sensitive Groups Days'] + 
    ca_county_unhealthy_days['Unhealthy Days'] + 
    ca_county_unhealthy_days['Very Unhealthy Days'] + 
    ca_county_unhealthy_days['Hazardous Days']
)
ca_county_unhealthy_days.head(5)

Unnamed: 0,County,Days with AQI,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Total Unhealthy AQI Days
0,Alameda,15585,529,168,31,0,728
1,Alpine,168,14,8,2,0,24
2,Amador,10942,614,125,10,0,749
3,Butte,15510,915,144,10,5,1074
4,Calaveras,10242,720,205,13,1,939


In [50]:
# Calculate CRI metric
ca_county_unhealthy_days['CRI Metric'] = (
    ca_county_unhealthy_days['Total Unhealthy AQI Days'] / ca_county_unhealthy_days['Days with AQI']
)
ca_county_unhealthy_days['CRI Metric Percentage'] = ca_county_unhealthy_days['CRI Metric'] * 100
ca_county_unhealthy_days.head(5)

Unnamed: 0,County,Days with AQI,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Total Unhealthy AQI Days,CRI Metric,CRI Metric Percentage
0,Alameda,15585,529,168,31,0,728,0.046712,4.671158
1,Alpine,168,14,8,2,0,24,0.142857,14.285714
2,Amador,10942,614,125,10,0,749,0.068452,6.845184
3,Butte,15510,915,144,10,5,1074,0.069246,6.924565
4,Calaveras,10242,720,205,13,1,939,0.091681,9.168131


In [51]:
# Saving metric df to .csv file
ca_county_unhealthy_days.to_csv('natural_air_quality_metric.csv')

In [53]:
# upload final csv file to aws
bucket_name = 'ca-climate-index'
file_name = ['natural_air_quality_metric.csv']
directory = '3_fair_data/index_data'

#@append_metadata
upload_csv_aws(file_name, bucket_name, directory)
os.remove('natural_air_quality_metric.csv')

natural_air_quality_metric.csv uploaded to AWS
