## This notebook creates the following metrics within the Society & Economy domain sourced from CalEnviroScreen:
* Age-adjusted emergency department visits for asthma per 10,000 people
* Age-adjusted emergency department visits for myocardial infarction per 10,000 people
* % of live, singleton births < 5.5 pounds (non-twin, including premature)
* % of population 25 and older with less than a high school education
* % of households where all members 14 and older have some difficult speaking English
* % of population living below 2x federal poverty level
* % of population > 16 years old unemployed and eligible for the workforce
* % of households which are low-income and housing-burdened
* number of impaired waterbodies 

In [1]:
import pandas as pd
import os
import sys
import math
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [2]:
# pull .xlsx from aws
enviroscreen_excel = 's3://ca-climate-index/1_pull_data/society_economy/vulnerable_populations/ca_enviro_screen/calenviroscreen.xlsx'
enviroscreen_data = pd.read_excel(enviroscreen_excel)

In [3]:
enviroscreen_data

Unnamed: 0,Census Tract,Total Population,California County,ZIP,Approximate Location,Longitude,Latitude,CES 4.0 Score,CES 4.0 Percentile,CES 4.0 Percentile Range,...,Linguistic Isolation Pctl,Poverty,Poverty Pctl,Unemployment,Unemployment Pctl,Housing Burden,Housing Burden Pctl,Pop. Char.,Pop. Char. Score,Pop. Char. Pctl
0,6019001100,2780,Fresno,93706,Fresno,-119.781696,36.709695,93.183570,100.000000,95-100% (highest scores),...,79.374746,76.0,98.919598,12.8,93.831338,30.3,91.039290,93.155109,9.663213,99.722642
1,6077000700,4680,San Joaquin,95206,Stockton,-121.287873,37.943173,86.653790,99.987393,95-100% (highest scores),...,95.533902,73.2,98.391960,19.8,99.206143,31.2,92.281369,93.165408,9.664281,99.735250
2,6037204920,2751,Los Angeles,90023,Los Angeles,-118.197497,34.017500,82.393909,99.974786,95-100% (highest scores),...,81.553661,62.6,93.391960,6.4,61.530453,20.3,63.967047,83.751814,8.687785,95.789208
3,6019000700,3664,Fresno,93706,Fresno,-119.827707,36.734535,81.327940,99.962179,95-100% (highest scores),...,78.711598,65.7,95.351759,15.7,97.345133,35.4,96.413181,94.641227,9.817371,99.886536
4,6019000200,2689,Fresno,93706,Fresno,-119.805504,36.735491,80.745476,99.949571,95-100% (highest scores),...,86.561104,72.7,98.304020,13.7,95.288912,32.7,94.157161,95.398873,9.895964,99.949571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8030,6107004000,582,Tulare,93257,Porterville,-118.983849,36.038061,,,,...,,79.6,99.422111,,,,,,,
8031,6109985202,2509,Tuolumne,95327,Unincorporated Tuolumne County area,-120.537071,37.891939,,,,...,,,,,,,,,,
8032,6111001206,778,Ventura,93001,Unincorporated Ventura County area,-119.371944,34.343903,,,,...,,17.1,27.349246,,,24.4,78.466413,,,
8033,6111003012,675,Ventura,93036,Oxnard,-119.180105,34.235076,,,,...,99.553390,96.7,100.000000,,,,,,,


In [4]:
enviroscreen_data.columns

Index(['Census Tract', 'Total Population', 'California County', 'ZIP',
       'Approximate Location', 'Longitude', 'Latitude', 'CES 4.0 Score',
       'CES 4.0 Percentile', 'CES 4.0 Percentile Range', 'Ozone', 'Ozone Pctl',
       'PM2.5', 'PM2.5 Pctl', 'Diesel PM', 'Diesel PM Pctl', 'Drinking Water',
       'Drinking Water Pctl', 'Lead', 'Lead Pctl', 'Pesticides',
       'Pesticides Pctl', 'Tox. Release', 'Tox. Release Pctl', 'Traffic',
       'Traffic Pctl', 'Cleanup Sites', 'Cleanup Sites Pctl',
       'Groundwater Threats', 'Groundwater Threats Pctl', 'Haz. Waste',
       'Haz. Waste Pctl', 'Imp. Water Bodies', 'Imp. Water Bodies Pctl',
       'Solid Waste', 'Solid Waste Pctl', 'Pollution Burden',
       'Pollution Burden Score', 'Pollution Burden Pctl', 'Asthma',
       'Asthma Pctl', 'Low Birth Weight', 'Low Birth Weight Pctl',
       'Cardiovascular Disease', 'Cardiovascular Disease Pctl', 'Education',
       'Education Pctl', 'Linguistic Isolation', 'Linguistic Isolation Pctl',

In [5]:
metric_enviroscreen_data = enviroscreen_data[['Census Tract', 
                                              'California County', 
                                              'Total Population', 
                                                'Asthma',
                                                'Low Birth Weight', 
                                                'Cardiovascular Disease', 
                                                'Education', 
                                                'Linguistic Isolation',
                                                'Poverty',
                                                'Unemployment', 
                                                'Housing Burden', 
                                                'Imp. Water Bodies'
                                                ]]

### Pulling in 2021 census population data, can only use for one of our metrics (if desired) as the others are already in percentages from 2019 data

In [6]:
county_tract_pop = "s3://ca-climate-index/0_map_data/ca_tract_county_population.csv"
county_tract_pop = pd.read_csv(county_tract_pop)
county_tract_pop = county_tract_pop.rename(columns={'TRACT': 'Census Tract'})
county_tract_pop = county_tract_pop.drop('Unnamed: 0', axis=1)
county_tract_pop

Unnamed: 0,Census Tract,COUNTYFP,County,Total Population 2021
0,6085504321,85,Santa Clara,5412
1,6085504410,85,Santa Clara,4124
2,6085507003,85,Santa Clara,3074
3,6085507004,85,Santa Clara,3926
4,6085502204,85,Santa Clara,3242
...,...,...,...,...
9124,6059001303,59,Orange,6515
9125,6059001304,59,Orange,3565
9126,6059001401,59,Orange,4756
9127,6013367200,13,Contra Costa,5869


In [7]:
# Adding 2021 population column to our enviroscreen data merged based on census tract
merged_df = pd.merge(county_tract_pop, metric_enviroscreen_data, on='Census Tract', how='left')
merged_df

Unnamed: 0,Census Tract,COUNTYFP,County,Total Population 2021,California County,Total Population,Asthma,Low Birth Weight,Cardiovascular Disease,Education,Linguistic Isolation,Poverty,Unemployment,Housing Burden,Imp. Water Bodies
0,6085504321,85,Santa Clara,5412,Santa Clara,5574.0,25.79,6.01,9.05,12.2,21.1,17.5,5.0,11.7,0.0
1,6085504410,85,Santa Clara,4124,Santa Clara,4724.0,24.24,5.03,8.70,22.0,21.1,23.1,8.3,17.8,0.0
2,6085507003,85,Santa Clara,3074,,,,,,,,,,,
3,6085507004,85,Santa Clara,3926,,,,,,,,,,,
4,6085502204,85,Santa Clara,3242,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9124,6059001303,59,Orange,6515,Orange,5884.0,47.28,4.17,15.01,20.6,10.2,28.3,6.6,7.6,6.0
9125,6059001304,59,Orange,3565,Orange,3982.0,46.82,4.96,14.86,26.9,15.8,46.9,14.7,17.7,6.0
9126,6059001401,59,Orange,4756,Orange,4495.0,47.28,4.91,15.01,19.8,13.3,37.6,8.6,25.6,0.0
9127,6013367200,13,Contra Costa,5869,Contra Costa,6042.0,107.76,6.39,18.28,24.6,10.5,37.1,6.0,20.7,2.0


In [8]:
# Adding 2021 population column to our enviroscreen data merged based on census tract
merged_df = pd.merge(county_tract_pop, metric_enviroscreen_data, on='Census Tract', how='left')

# Columns to fill NaN values
columns_to_fill = [
                    'Asthma',
                    'Low Birth Weight', 
                    'Cardiovascular Disease', 
                    'Education', 
                    'Linguistic Isolation',
                    'Poverty',
                    'Unemployment', 
                    'Housing Burden', 
                    'Imp. Water Bodies',
                    ]

# Add a new column indicating whether a value was originally NaN
original_na_flag_column = 'Original_NA_Flag'
merged_df[original_na_flag_column] = np.where(merged_df[columns_to_fill].isna().any(axis=1), 1, 0)

# Compute average values for each column grouped by 'County'
average_values_by_county = merged_df.groupby('County')[columns_to_fill].transform('mean')

# Fill NaN values in each column with the corresponding average value of that column for the respective 'County'
for column in columns_to_fill:
    na_mask = merged_df[column].isna()
    merged_df.loc[na_mask, column] = average_values_by_county.loc[na_mask, column]

print(len(merged_df))
merged_df.head(5)

9129


Unnamed: 0,Census Tract,COUNTYFP,County,Total Population 2021,California County,Total Population,Asthma,Low Birth Weight,Cardiovascular Disease,Education,Linguistic Isolation,Poverty,Unemployment,Housing Burden,Imp. Water Bodies,Original_NA_Flag
0,6085504321,85,Santa Clara,5412,Santa Clara,5574.0,25.79,6.01,9.05,12.2,21.1,17.5,5.0,11.7,0.0,0
1,6085504410,85,Santa Clara,4124,Santa Clara,4724.0,24.24,5.03,8.7,22.0,21.1,23.1,8.3,17.8,0.0,0
2,6085507003,85,Santa Clara,3074,,,33.249401,5.023595,9.67988,12.325826,11.064134,17.793413,4.223708,14.783133,2.356287,1
3,6085507004,85,Santa Clara,3926,,,33.249401,5.023595,9.67988,12.325826,11.064134,17.793413,4.223708,14.783133,2.356287,1
4,6085502204,85,Santa Clara,3242,,,33.249401,5.023595,9.67988,12.325826,11.064134,17.793413,4.223708,14.783133,2.356287,1


## Code below to check the averages per county

In [9]:
# Prompt the user to input the county name
county_name = input("Enter the name of the county: ")

# Filter the dataframe for the specified county
county_data = merged_df[merged_df['County'] == county_name]

# Print out the average values for the specified county
print(f"Average values for {county_name}:")
for column in columns_to_fill:
    avg_value = county_data[column].mean()
    print(f"{column}: {avg_value}")


Average values for Fresno:
Asthma: 81.77613095238097
Low Birth Weight: 5.9941818181818185
Cardiovascular Disease: 13.860714285714288
Education: 24.634131736526946
Linguistic Isolation: 10.581987577639751
Poverty: 46.05357142857143
Unemployment: 9.042073170731708
Housing Burden: 18.93855421686747
Imp. Water Bodies: 0.5178571428571428


In [10]:
missing_count = merged_df['County'].isna().sum()
print("Number of missing entries in the California County column:", missing_count)


Number of missing entries in the California County column: 0


### Function Call
The function below creates new df's for each metric listed below. Some metrics are already in percent from the 2019 data, so those columns are renamed and retained for Cal-CRAI metric. df's are saved as csv's named off of their metric column:

ones that are already in percent from 2019 data
* % of live, singleton births < 5.5 pounds (non-twin, including premature)
* % of population 25 and older with less than a high school education
* % of households where all members 14 and older have some difficult speaking English
* % of population living below 2x federal poverty level
* % of population > 16 years old unemployed and eligible for the workforce
* % of households which are low-income and housing-burdened

ones that have a sum we do not want as a percentage
* number of impaired waterbodies

The function can also calculate metric per 10,000 people for metrics that have a 'sum of' column rather than pre-baked in percentages:

metrics that have been calculated for metrics per 10,000 have columns for 2019 and 2021 populations
* Age-adjusted emergency department visits for asthma per 10,000 people
* Age-adjusted emergency department visits for myocardial infarction per 10,000 people

Asthma and cardiovascular percentage can be calculated with 2019 and 2021 as the CalEnviroscreen values are 'Age-adjusted rate of emergency department visits for asthma/cardiovascular disease'

In [9]:
import pandas as pd
import numpy as np
import boto3

def upload_csv_aws(file_names, bucket_name, directory):
    s3_client = boto3.client('s3')
    for file_name in file_names:
        s3_client.upload_file(file_name, bucket_name, f"{directory}/{file_name}")
        print(f"Uploaded {file_name} to s3://{bucket_name}/{directory}/{file_name}")

def calenviroscreen_metric_calc(columns_to_process, calculate_per_10000=False, varname=""):

    '''
    Calculates the following metrics sourced from CalEnviroScreen:
    * % of live, singleton births < 5.5 pounds (non-twin, including premature)
    * % of population 25 and older with less than a high school education
    * % of households where all members 14 and older have some difficult speaking English
    * % of population living below 2x federal poverty level
    * % of population > 16 years old unemployed and eligible for the workforce
    * % of households which are low-income and housing-burdened
    * Age-adjusted emergency department visits for asthma per 10,000 people
    * Age-adjusted emergency department visits for myocardial infarction per 10,000 people
    * Number of impaired waterbodies

    Note
    --------
    Each of the above metrics is calculated separately; please see the corresponding 
    variable name (the same as the filename for this document) to know which one this 
    particular metadata document describes. 
  
    Methods
    --------
    Relevant data columns were isolated and renamed to align with Cal-CRAI metrics.
    2021 American Community Survey population data was merged into the
    data so metrics could be calculated with updated population (where applicable).
    Extra tracts that were merged in were given the average value for each metric based on 
    the county they reside in.
    This averaging was also done for missing data in otherwise populated tracts.
    Metrics with % calculations were largely untouched as CalEnviroScreen data had
    those metrics calculated for 2019.
    Metrics with emergency department visits had their values adjusted to reflect
    number of visits per 10,000 people per tract with 2019 and 2021 population data.

    Parameters
    ------------
    columns_to_process: list
        list of columns that contain desired metric data
    calculate_per_10000: boolean
        if true, adds columns with calculations for # of visits per 10,000 people
        if false, retains the column but renames to 2019
    varname: string
        Final metric name.

    Script
    ------
    cal_enviroscreen_metrics.ipynb

    Note
    ------
    This function assumes users have configured the AWS CLI such that their access key / 
    secret key pair are stored in ~/.aws/credentials. 
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''

    # pull .xlsx from aws
    enviroscreen_excel = 's3://ca-climate-index/1_pull_data/society_economy/vulnerable_populations/ca_enviro_screen/calenviroscreen.xlsx'
    enviroscreen_data = pd.read_excel(enviroscreen_excel)
    
    print('Data transformation: isolating columns relevant to Cal-CRAI metrics.')

    metric_enviroscreen_data = enviroscreen_data[['Census Tract', 
                                                'California County', 
                                                'Total Population', 
                                                'Asthma',
                                                'Low Birth Weight', 
                                                'Cardiovascular Disease', 
                                                'Education', 
                                                'Linguistic Isolation',
                                                'Poverty',
                                                'Unemployment', 
                                                'Housing Burden', 
                                                'Imp. Water Bodies'
                                                ]]

    county_tract_pop = "s3://ca-climate-index/0_map_data/ca_tract_county_population.csv"
    county_tract_pop = pd.read_csv(county_tract_pop)
    county_tract_pop = county_tract_pop.rename(columns={'TRACT': 'Census Tract'})
    county_tract_pop = county_tract_pop.drop('Unnamed: 0', axis=1)
    county_tract_pop

    print('Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.')
    print('Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.')

    # Adding 2021 population column to our enviroscreen data merged based on census tract
    merged_df = pd.merge(county_tract_pop, metric_enviroscreen_data, on='Census Tract', how='left')

    # Columns to fill NaN values
    columns_to_fill = [
                        'Asthma',
                        'Low Birth Weight', 
                        'Cardiovascular Disease', 
                        'Education', 
                        'Linguistic Isolation',
                        'Poverty',
                        'Unemployment', 
                        'Housing Burden', 
                        'Imp. Water Bodies'
                        ]

    # Add a new column indicating whether a value was originally NaN
    original_na_flag_column = 'Original_NA_Flag'
    merged_df[original_na_flag_column] = np.where(merged_df[columns_to_fill].isna().any(axis=1), 1, 0)

    # Compute average values for each column grouped by 'County'
    average_values_by_county = merged_df.groupby('County')[columns_to_fill].transform('mean')

    # Fill NaN values in each column with the corresponding average value of that column for the respective 'County'
    for column in columns_to_fill:
        na_mask = merged_df[column].isna()
        merged_df.loc[na_mask, column] = average_values_by_county.loc[na_mask, column]
        
    # Move our merged 2019 pop column towards the front
    merged_df = merged_df.rename(columns={'Total Population': 'Total Population 2019'})
    column_to_move = 'Total Population 2019'
    col = merged_df.pop(column_to_move)
    merged_df.insert(3, column_to_move, col)

    print('Data transformation: renaming columns to reflect calculation year.')
    print('Data transformation: adding calculation columns for metrics with emergency department visits.')
        
    # List to store generated CSV file names
    csv_file_names = []
        
    for column in columns_to_process:
        # Create new DataFrame
        new_df = merged_df[['Census Tract', 'County', 'Total Population 2019']].copy()
            
        # Create new column name
        if column == 'Imp. Water Bodies':
            new_column_name = 'sum_' + column.replace(' ', '_').replace('.', '')
        else:
            new_column_name = column.replace(' ', '_')
            if calculate_per_10000:
                new_column_name += '_related_ED_visits_2019'
                new_column_name_per_10000_people_2019 = new_column_name.replace('_2019', '_per_10000_people_2019')
                new_column_name_per_10000_people_2021 = new_column_name.replace('_2019', '_per_10000_people_2021')
            else:
                new_column_name += '_percent_2019'

        # Lowercase the column name
        new_column_name = new_column_name.lower()
    
        # Add new column with the calculated name
        if not calculate_per_10000:
            new_df[new_column_name] = merged_df[column]
        else:
            new_df['Total Population 2021'] = merged_df['Total Population 2021']  # Only add this column if calculating percentage
            new_df[column] = merged_df[column]
            new_df[new_column_name_per_10000_people_2019] = (merged_df[column] / merged_df['Total Population 2019']) * 10000
            new_df[new_column_name_per_10000_people_2021] = (merged_df[column] / merged_df['Total Population 2021']) * 10000
        
        if not calculate_per_10000:
            # Define CSV file name based on the new column name
            csv_filename = 'society_vulnerable_' + column.replace(' ', '_').replace('.','').lower() + '_metric.csv'
        else:
            # Define CSV file name based on the new column name
            csv_filename = 'society_vulnerable_' + column.replace(' ', '_').replace('.','').lower() + '_metric.csv'

        # Save the DataFrame to CSV
        new_df.columns = new_df.columns.str.lower()
        new_df.to_csv(csv_filename, index=False)
        
        print(f"Saved DataFrame to: {csv_filename}")

        # Append CSV filename to the list
        csv_file_names.append(csv_filename)
        # Output or further process new DataFrame
        display(new_df)

        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        upload_csv_aws([csv_filename], bucket_name, directory)
        print('')

# Calling function for both metric calc types

In [12]:
# Columns to loop through that dont need percentages calculated
columns_to_process_no_10000 = [
    'Low Birth Weight',
    'Education',
    'Linguistic Isolation',
    'Poverty',
    'Unemployment',
    'Housing Burden',
    'Imp. Water Bodies'
]
varnames = [
            'society_calenviroscreen_birth_rate' # society_calenviroscreen_birth_weight
            'society_calenviroscreen_low_education', # society_calenviroscreen_education_below_HS
            'society_calenviroscreen_nonenglish_speakers',
            'society_calenviroscreen_below_poverty_level',
            'society_calenviroscreen_unemployment',
            'society_calenviroscreen_housing_burdened',
            'society_calenviroscreen_impaired_waterbodies'
            ]
# Calculate metric without percentages
for col, var in list(zip(columns_to_process_no_10000, varnames)):
    calenviroscreen_metric_calc([col], calculate_per_10000=False, varname='test')

varnames = ['society_calenviroscreen_emergency_dept_visits',
            'society_calenviroscreen_emergency_dept_myocardial_visits'
]

# Columns to loop through that include calculating percentages
columns_to_process_per_10000 = [
    'Asthma',
    'Cardiovascular Disease'
]
# Calculate percentages
for col, var in list(zip(columns_to_process_per_10000, varnames)):
    calenviroscreen_metric_calc([col], calculate_per_10000=True, varname='test')

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_low_birth_weight_metric.csv


Unnamed: 0,census tract,county,total population 2019,low_birth_weight_percent_2019
0,6085504321,Santa Clara,5574.0,6.010000
1,6085504410,Santa Clara,4724.0,5.030000
2,6085507003,Santa Clara,,5.023595
3,6085507004,Santa Clara,,5.023595
4,6085502204,Santa Clara,,5.023595
...,...,...,...,...
9124,6059001303,Orange,5884.0,4.170000
9125,6059001304,Orange,3982.0,4.960000
9126,6059001401,Orange,4495.0,4.910000
9127,6013367200,Contra Costa,6042.0,6.390000


Uploaded society_vulnerable_low_birth_weight_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_low_birth_weight_metric.csv

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_education_metric.csv


Unnamed: 0,census tract,county,total population 2019,education_percent_2019
0,6085504321,Santa Clara,5574.0,12.200000
1,6085504410,Santa Clara,4724.0,22.000000
2,6085507003,Santa Clara,,12.325826
3,6085507004,Santa Clara,,12.325826
4,6085502204,Santa Clara,,12.325826
...,...,...,...,...
9124,6059001303,Orange,5884.0,20.600000
9125,6059001304,Orange,3982.0,26.900000
9126,6059001401,Orange,4495.0,19.800000
9127,6013367200,Contra Costa,6042.0,24.600000


Uploaded society_vulnerable_education_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_education_metric.csv

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_linguistic_isolation_metric.csv


Unnamed: 0,census tract,county,total population 2019,linguistic_isolation_percent_2019
0,6085504321,Santa Clara,5574.0,21.100000
1,6085504410,Santa Clara,4724.0,21.100000
2,6085507003,Santa Clara,,11.064134
3,6085507004,Santa Clara,,11.064134
4,6085502204,Santa Clara,,11.064134
...,...,...,...,...
9124,6059001303,Orange,5884.0,10.200000
9125,6059001304,Orange,3982.0,15.800000
9126,6059001401,Orange,4495.0,13.300000
9127,6013367200,Contra Costa,6042.0,10.500000


Uploaded society_vulnerable_linguistic_isolation_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_linguistic_isolation_metric.csv

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_poverty_metric.csv


Unnamed: 0,census tract,county,total population 2019,poverty_percent_2019
0,6085504321,Santa Clara,5574.0,17.500000
1,6085504410,Santa Clara,4724.0,23.100000
2,6085507003,Santa Clara,,17.793413
3,6085507004,Santa Clara,,17.793413
4,6085502204,Santa Clara,,17.793413
...,...,...,...,...
9124,6059001303,Orange,5884.0,28.300000
9125,6059001304,Orange,3982.0,46.900000
9126,6059001401,Orange,4495.0,37.600000
9127,6013367200,Contra Costa,6042.0,37.100000


Uploaded society_vulnerable_poverty_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_poverty_metric.csv

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_unemployment_metric.csv


Unnamed: 0,census tract,county,total population 2019,unemployment_percent_2019
0,6085504321,Santa Clara,5574.0,5.000000
1,6085504410,Santa Clara,4724.0,8.300000
2,6085507003,Santa Clara,,4.223708
3,6085507004,Santa Clara,,4.223708
4,6085502204,Santa Clara,,4.223708
...,...,...,...,...
9124,6059001303,Orange,5884.0,6.600000
9125,6059001304,Orange,3982.0,14.700000
9126,6059001401,Orange,4495.0,8.600000
9127,6013367200,Contra Costa,6042.0,6.000000


Uploaded society_vulnerable_unemployment_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_unemployment_metric.csv

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_housing_burden_metric.csv


Unnamed: 0,census tract,county,total population 2019,housing_burden_percent_2019
0,6085504321,Santa Clara,5574.0,11.700000
1,6085504410,Santa Clara,4724.0,17.800000
2,6085507003,Santa Clara,,14.783133
3,6085507004,Santa Clara,,14.783133
4,6085502204,Santa Clara,,14.783133
...,...,...,...,...
9124,6059001303,Orange,5884.0,7.600000
9125,6059001304,Orange,3982.0,17.700000
9126,6059001401,Orange,4495.0,25.600000
9127,6013367200,Contra Costa,6042.0,20.700000


Uploaded society_vulnerable_housing_burden_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_housing_burden_metric.csv

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_asthma_metric.csv


Unnamed: 0,census tract,county,total population 2019,total population 2021,asthma,asthma_related_ed_visits_per_10000_people_2019,asthma_related_ed_visits_per_10000_people_2021
0,6085504321,Santa Clara,5574.0,5412,25.790000,46.268389,47.653363
1,6085504410,Santa Clara,4724.0,4124,24.240000,51.312447,58.777886
2,6085507003,Santa Clara,,3074,33.249401,,108.163309
3,6085507004,Santa Clara,,3926,33.249401,,84.690273
4,6085502204,Santa Clara,,3242,33.249401,,102.558301
...,...,...,...,...,...,...,...
9124,6059001303,Orange,5884.0,6515,47.280000,80.353501,72.570990
9125,6059001304,Orange,3982.0,3565,46.820000,117.579106,131.332398
9126,6059001401,Orange,4495.0,4756,47.280000,105.183537,99.411270
9127,6013367200,Contra Costa,6042.0,5869,107.760000,178.351539,183.608792


Uploaded society_vulnerable_asthma_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_asthma_metric.csv

Data transformation: isolating columns relevant to Cal-CRAI metrics.
Data transformation: merging CalEnviroScreen data with 2021 ACS population data based on census tract.
Data transformation: extra tracts merged in were given a value based on the average metric value for the county that tract resides within.
Data transformation: renaming columns to reflect calculation year.
Data transformation: adding calculation columns for metrics with emergency department visits.
Saved DataFrame to: society_vulnerable_cardiovascular_disease_metric.csv


Unnamed: 0,census tract,county,total population 2019,total population 2021,cardiovascular disease,cardiovascular_disease_related_ed_visits_per_10000_people_2019,cardiovascular_disease_related_ed_visits_per_10000_people_2021
0,6085504321,Santa Clara,5574.0,5412,9.050000,16.236096,16.722099
1,6085504410,Santa Clara,4724.0,4124,8.700000,18.416596,21.096023
2,6085507003,Santa Clara,,3074,9.679880,,31.489526
3,6085507004,Santa Clara,,3926,9.679880,,24.655834
4,6085502204,Santa Clara,,3242,9.679880,,29.857743
...,...,...,...,...,...,...,...
9124,6059001303,Orange,5884.0,6515,15.010000,25.509857,23.039140
9125,6059001304,Orange,3982.0,3565,14.860000,37.317931,41.683029
9126,6059001401,Orange,4495.0,4756,15.010000,33.392659,31.560135
9127,6013367200,Contra Costa,6042.0,5869,18.280000,30.254882,31.146703


Uploaded society_vulnerable_cardiovascular_disease_metric.csv to s3://ca-climate-index/3_fair_data/index_data/society_vulnerable_cardiovascular_disease_metric.csv

