## Cal-CRAI Metric Calculation for: Built Environment / PSPS event frequency
* Public Safety Power Shutoff (PSPS) event frequency

In [None]:
import pandas as pd
import os
import sys
import numpy as np
import boto3
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/built_environment/utilities/pse_health_energy/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

In [None]:
# read in food access data (already subsetted for CA)
power_shutoff_data = pd.read_csv('public_safety_power_shutoff_frequency.csv')
print(len(power_shutoff_data))
power_shutoff_data = power_shutoff_data.rename(columns={'Fips':'GEOID'})
# os.remove('public_safety_power_shutoff_frequency.csv')

In [None]:
power_shutoff_data

### It is using older tract data, so we will join it with 2017 Tract data first

In [None]:
# read in CA census tiger file
old_census_path = "s3://ca-climate-index/0_map_data/tl_2017_06_tract/"
ca_old = gpd.read_file(old_census_path)
ca_old['GEOID'] = pd.to_numeric(ca_old.GEOID)
ca_old = ca_old[["GEOID","geometry"]]

In [None]:
old_tract_power_shutoff_data = pd.merge(ca_old, power_shutoff_data, on="GEOID")
old_tract_power_shutoff_data = gpd.GeoDataFrame(old_tract_power_shutoff_data, geometry="geometry")

In [None]:
# read in CA census tiger file
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"

ca_boundaries = gpd.read_file(census_shp_dir)
# need to rename columns so we don't have any duplicates in the final geodatabase
column_names = ca_boundaries.columns
new_column_names = ["USCB_"+column for column in column_names if column != "geometry"]
ca_boundaries = ca_boundaries.rename(columns=dict(zip(column_names, new_column_names)))
# drop unnecessary columns
ca_boundaries = ca_boundaries[["geometry","USCB_GEOID"]]
ca_boundaries

In [None]:
# need to convert to an area-preserving CRS for distance calculations
old_tract_power_shutoff_data = old_tract_power_shutoff_data.to_crs(crs=3857) 
ca_boundaries = ca_boundaries.to_crs(crs=3857) 

In [None]:
# first find the tracts which have not changed from 2010 to 2017
# find the indices which correspond to the new boundaries
unchanged_tracts_ca = pd.to_numeric(ca_boundaries['USCB_GEOID']).isin(pd.to_numeric(old_tract_power_shutoff_data['GEOID']))
ca_boundaries[unchanged_tracts_ca]

In [None]:
# now find the indices which correspond to the original data
unchanged_tracts_old = pd.to_numeric(old_tract_power_shutoff_data['GEOID']).isin(pd.to_numeric(ca_boundaries['USCB_GEOID']))
original_df = old_tract_power_shutoff_data[unchanged_tracts_old]
original_df["USCB_GEOID"] = original_df["GEOID"].apply(lambda x: '{0:0>11}'.format(x))
original_df

In [None]:
# now we only have to join the remaining tracts
mapped_df = gpd.sjoin_nearest(
    ca_boundaries[~unchanged_tracts_ca], 
    old_tract_power_shutoff_data[~unchanged_tracts_old], 
    how="inner", distance_col="distances", 
    max_distance=5000
)
mapped_df

In [None]:
# then concatenate the sjoined tracts with the unchanged ones
joined_df = pd.concat([original_df,mapped_df])
joined_df

In [None]:
data_vars = ['out_freq_s']
for col in data_vars:
    non_numeric = joined_df[~joined_df[col].apply(lambda x: pd.to_numeric(x, errors='coerce')).notnull()]
    if not non_numeric.empty:
        print(f"Non-numeric values found in column '{col}':")
        display(non_numeric)
for col in data_vars:
    joined_df[col] = pd.to_numeric(joined_df[col], errors='coerce')

In [None]:
data_vars = ['out_freq_s']
# now take the average of the tracts which now exist in the new tract
joined_avg_df = joined_df.groupby(['USCB_GEOID','geometry'])[data_vars].mean().reset_index()
power_shutoff_new_tracts = gpd.GeoDataFrame(joined_avg_df, geometry='geometry')
power_shutoff_new_tracts = power_shutoff_new_tracts.drop(columns={'geometry'})
power_shutoff_new_tracts

In [None]:
nan_checking = pd.isna(power_shutoff_new_tracts['out_freq_s'])

In [None]:
nan_rows = power_shutoff_new_tracts[nan_checking]
print(nan_rows)

In [None]:
power_shutoff_new_tracts.to_csv('built_power_shutoffs_metric.csv', index=False)

In [None]:
#@append_metadata
def power_shutoff_upload(input_csv, export=False, varname=''):
    '''
    Uploads the calculated Public Safety Power Shutoff (PSPS) metric to S3 bucket. The metrics is:
    Frequency of PSPS events per California census tract.

    Data for this metric was sourced from PSE Healthy Energy at:
    https://www.psehealthyenergy.org/work/california-public-safety-power-shutoff-interactive-map/ from the 
    PSPS Duration by Census Tract section

    Methods
    -------
    The data was from older census tracts, so we merged it with 2017 California Tiger shape files first.
    The data was then set to Cal-CRAI standardized coordinate reference system (CRS) 4269.
    Data was then spatially joined to the nearest 2021 census tract data.
    Data were then grouped spatially and had the PSPS frequency data averaged per census tracts.
    
    Parameters
    ----------
    input_csv: string
        csv PSPS data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI PSPS metric to AWS
        True = will upload resulting df containing CAL CRAI PSPS metric to AWS

    Script
    ------
    built_power_shutoff.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: source data and destination tracts both reprojected to CRS 3857.')
    print('Data transformation: unchanged tracts isolated to preserve original data.')
    print('Data transformation: new tracts filled by averaging the adjacent original tracts.')
    print('Data transformation: original data merged with spatially averaged ("new") data.')
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [None]:
input_csv = 'built_power_shutoffs_metric.csv'
varname = 'built_pse_power_shutoff'

power_shutoff_upload(input_csv, export=True, varname='test')