### Cal-CRAI Metric Calculation for: Economic Health
* Gini Index
* Median Income

In [1]:
import pandas as pd
import os
import sys
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [2]:
# Define a function to display data for a specific county
def display_tract_data(df, tract_num):
    tract_data = df[df['GEO_ID'] == tract_num]
    if tract_data.empty:
        print(f"No data found for {tract_num}")
    else:
        print(f"Data for {tract_num}:")
        display(tract_data)

In [3]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/society_economy/economic_health/bureau_labor_statistics/employment_hachman_index/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

aws_dir = '1_pull_data/society_economy/economic_health/'
pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'employment_data_hachman_subset.csv'
Saved DataFrame as 'ACSDT5Y2022.B19083-Column-Metadata.csv'
Saved DataFrame as 'ACSDT5Y2022.B19083-Data.csv'
Saved DataFrame as 'ACSST5Y2022.S1901-Column-Metadata.csv'


  df = pd.read_csv(csv_data)


Saved DataFrame as 'ACSST5Y2022.S1901-Data.csv'


  df = pd.read_csv(csv_data)


Saved DataFrame as 'allhlcn22.csv'
Saved DataFrame as 'data_layout.csv'


In [4]:
gini_index_data = pd.read_csv('ACSDT5Y2022.B19083-Data.csv')
median_income_data = pd.read_csv('ACSST5Y2022.S1901-Data.csv')

  median_income_data = pd.read_csv('ACSST5Y2022.S1901-Data.csv')


In [5]:
county_tract = "s3://ca-climate-index/0_map_data/ca_tract_county_population_2022.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'Census Tract': 'GEO_ID'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0', 'COUNTYFP', 'County', 'Total Population 2021'})
ca_county_tract

Unnamed: 0,GEO_ID
0,6085504321
1,6085504410
2,6085507003
3,6085507004
4,6085502204
...,...
9124,6059001303
9125,6059001304
9126,6059001401
9127,6013367200


### Gini Index Metric

In [6]:
gini_index_data

Unnamed: 0,GEO_ID,NAME,B19083_001E,B19083_001M,Unnamed: 4
0,Geography,Geographic Area Name,Estimate!!Gini Index,Margin of Error!!Gini Index,
1,1400000US06001400100,Census Tract 4001; Alameda County; California,0.4228,0.0672,
2,1400000US06001400200,Census Tract 4002; Alameda County; California,0.4084,0.0432,
3,1400000US06001400300,Census Tract 4003; Alameda County; California,0.4615,0.0513,
4,1400000US06001400400,Census Tract 4004; Alameda County; California,0.5063,0.0557,
...,...,...,...,...,...
9125,1400000US06115040902,Census Tract 409.02; Yuba County; California,0.4252,0.0592,
9126,1400000US06115041001,Census Tract 410.01; Yuba County; California,0.5024,0.0806,
9127,1400000US06115041002,Census Tract 410.02; Yuba County; California,0.4875,0.1175,
9128,1400000US06115041101,Census Tract 411.01; Yuba County; California,0.4072,0.0543,


In [7]:
gini_index_data_cleaned = gini_index_data.drop(columns={'Unnamed: 4', 'B19083_001M', 'NAME'})
gini_index_data_cleaned = gini_index_data_cleaned.rename(columns={'B19083_001E':'est_gini_index'})
gini_index_data_cleaned = gini_index_data_cleaned[1:]
gini_index_data_cleaned['GEO_ID'] = gini_index_data_cleaned['GEO_ID'].str[10:]

gini_index_data_cleaned

Unnamed: 0,GEO_ID,est_gini_index
1,6001400100,0.4228
2,6001400200,0.4084
3,6001400300,0.4615
4,6001400400,0.5063
5,6001400500,0.4571
...,...,...
9125,6115040902,0.4252
9126,6115041001,0.5024
9127,6115041002,0.4875
9128,6115041101,0.4072


In [8]:
# Convert the 'GEO_ID' column in ca_county_tract DataFrame to string data type
ca_county_tract['GEO_ID'] = ca_county_tract['GEO_ID'].astype(str)

gini_merged_data = pd.merge(gini_index_data_cleaned, ca_county_tract, on='GEO_ID', how='right')
gini_merged_data

Unnamed: 0,GEO_ID,est_gini_index
0,6085504321,0.4197
1,6085504410,0.4309
2,6085507003,0.5357
3,6085507004,0.4793
4,6085502204,0.4010
...,...,...
9124,6059001303,0.3590
9125,6059001304,0.3582
9126,6059001401,0.4594
9127,6013367200,0.4130


In [9]:
gini_merged_data.to_csv('society_economy_gini_metric.csv')

### Median Income Metric

In [10]:
columns_to_keep = ['GEO_ID', 'S1901_C01_012E']

# Drop all columns except the specified ones
median_income_data_cleaned = median_income_data.drop(columns=[col for col in median_income_data.columns if col not in columns_to_keep])

median_income_data_cleaned = median_income_data_cleaned.rename(columns={'S1901_C01_012E':'est_median_income_dollars'})
median_income_data_cleaned = median_income_data_cleaned[1:]
median_income_data_cleaned['GEO_ID'] = median_income_data_cleaned['GEO_ID'].str[10:]
median_income_data_cleaned

Unnamed: 0,GEO_ID,est_median_income_dollars
1,6001400100,234236
2,6001400200,225500
3,6001400300,164000
4,6001400400,158836
5,6001400500,95078
...,...,...
9125,6115040902,54265
9126,6115041001,67321
9127,6115041002,102534
9128,6115041101,37018


In [11]:
# Convert the 'GEO_ID' column in ca_county_tract DataFrame to string data type
ca_county_tract['GEO_ID'] = ca_county_tract['GEO_ID'].astype(str)

# Now, you can proceed with merging
median_merged_data = pd.merge(median_income_data_cleaned, ca_county_tract, on='GEO_ID', how='right')

# View the merged DataFrame
median_merged_data

Unnamed: 0,GEO_ID,est_median_income_dollars
0,6085504321,172857
1,6085504410,143173
2,6085507003,240179
3,6085507004,198306
4,6085502204,67552
...,...,...
9124,6059001303,93363
9125,6059001304,68984
9126,6059001401,76319
9127,6013367200,90941


In [12]:
median_merged_data.to_csv('society_economy_median_income_metric.csv')

### Function call for both metrics

In [13]:
@append_metadata
def economy_metric(input_csv, export=False, varname=''):
    '''
    Calculates the Median income and Gini index data metrics and uploads to AWS.
    Data is sourced from United States Census Bureau: American Community Survey (ACS).
    Data codes: S1901 and B19083.

    Methods
    -------
    Rows without data were omitted, relevant columns were isolated and renamed.
    California census tract data was merged with the dataset based on tract.
    
    Parameters
    ----------
    input_csv: string
        csv economic data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI economy metrics to AWS
        True = will upload resulting df containing CAL CRAI economy metrics to AWS

    Script
    ------
    society_economic_metrics.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: non-data rows were removed.')
    print('Data transformation: data columns were renamed and isolated for metric relevancy.')
    print('Data transformation: data was merged with CA census tract data.')

    bucket_name = 'ca-climate-index'
    directory = '3_fair_data/index_data'
    export_filename = [input_csv]
    
    if export == True:
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{export_filename} uploaded to AWS.')

    if os.path.exists(input_csv):
        os.remove(input_csv)

In [32]:
input_csv = ['society_economy_gini_metric.csv',
            'society_economy_median_income_metric.csv'
            ]

varnames = [
    'society_acs_gini',
    'society_acs_income'
]

for csv, var in zip(input_csv, varnames):
    economy_metric(csv, export=True, varname=var)