## Cal-CRAI Metric Calculation for: Vulnerable Communities
This notebook calculates 6 metrics, all sourced from the American Community Survey.

- Ambulatory Difficulty: % of population living with an ambulatory disability
- Cognitive Difficulty: % of population living with a cognitive disability
- Financial Assistance: % of population living in a household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the last 12 months
- Health Insurance: % of population without health insurance

- Demography: 
    - % of population aged 65 years or older
    - % of population under 5 years old
    - % of population American Indian and Alaska Native

In [1]:
import os
import sys
import pandas as pd
import io
import numpy as np
import boto3
import zipfile
import shutil
# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)

# Adjust display options, helpful for long descriptions within ACS data
pd.set_option('display.max_colwidth', None)
# IMPORT WHEN PR #42 IS MERGED
'''
sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import (
    pull_zipped_csv, upload_csv_aws
)
'''

"\nsys.path.append(os.path.expanduser('../../'))\nfrom scripts.utils.file_helpers import (\n    pull_zipped_csv, upload_csv_aws\n)\n"

In [2]:

# Allow to copy dataframe when making a new column
# pd.options.mode.copy_on_write = True


In [3]:
def pull_csv_from_directory(bucket_name, directory, search_zipped=True):
    """
    Pulls CSV files from a specified directory in an S3 bucket.
    
    Parameters:
    - bucket_name (str): The name of the S3 bucket.
    - directory (str): The directory within the bucket to search for CSV files.
    - search_zipped (bool): If True, search for CSV files within zip files. If False, search for CSV files directly.
    """
    # Create an S3 client
    s3 = boto3.client('s3')

    # List objects in the specified directory
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=directory)

    # Check if objects were found
    if 'Contents' in response:
        # Iterate through each object found
        for obj in response['Contents']:
            # Get the key (filename) of the object
            key = obj['Key']
            
            # Check if the object is a .zip file
            if search_zipped and key.endswith('.zip'):
                # Download the zip file into memory
                zip_object = s3.get_object(Bucket=bucket_name, Key=key)
                zip_data = io.BytesIO(zip_object['Body'].read())
                
                # Open the zip file
                with zipfile.ZipFile(zip_data, 'r') as zip_ref:
                    # Iterate through each file in the zip
                    for file_name in zip_ref.namelist():
                        # Check if the file is a .csv file
                        if file_name.endswith('.csv'):
                            # Read the .csv file
                            with zip_ref.open(file_name) as csv_file:
                                # Convert the csv content to pandas DataFrame
                                # we do not need the second row, as it just has some header information
                                df = pd.read_csv(csv_file, header=[0,1])
                                # Save the DataFrame with a similar name as the .csv file
                                df_name = file_name[:-4]  # Remove .csv extension
                                df.to_csv(f"{df_name}.csv", index=False)
                                print(f"Saved DataFrame as '{df_name}.csv'")
                                # You can now manipulate df as needed
            elif not search_zipped and key.endswith('.csv'):
                # Directly download the CSV file
                csv_object = s3.get_object(Bucket=bucket_name, Key=key)
                csv_data = io.BytesIO(csv_object['Body'].read())
                # Convert the csv content to pandas DataFrame
                df = pd.read_csv(csv_data, header=[0,1])
                # Save the DataFrame with a similar name as the .csv file
                df_name = key.split('/')[-1][:-4]  # Extract filename from key
                df.to_csv(f"{df_name}.csv", index=False)
                print(f"Saved DataFrame as '{df_name}.csv'")
                # You can now manipulate df as needed

    else:
        print("No objects found in the specified directory.")

def upload_csv_aws(file_name, bucket_name, directory):
    # Create an S3 client
    s3 = boto3.client('s3')
     # Save the file to AWS S3 using the client
    with open(file_name, 'rb') as data:
        s3.upload_fileobj(data, bucket_name, f"{directory}/{file_name}")
    print(f"{file_name} uploaded to AWS")

## Pulling all zipped folders within the vulnerable populations folder from AWS

In [4]:
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/society_economy/vulnerable_populations/american_community_survey/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=True)

Saved DataFrame as 'ACSDT5Y2022.B18105-Column-Metadata.csv'
Saved DataFrame as 'ambulatory_difficulty_B18105.csv'
Saved DataFrame as 'ACSDT5Y2022.B18104-Column-Metadata.csv'
Saved DataFrame as 'cognitive_difficulty_B18104.csv'
Saved DataFrame as 'ACSDP5Y2022.DP05-Column-Metadata.csv'
Saved DataFrame as 'demographic_DP05.csv'
Saved DataFrame as 'ACSDT5Y2022.B09010-Column-Metadata.csv'
Saved DataFrame as 'financial_support_B09010.csv'
Saved DataFrame as 'ACSDT5Y2022.B27010-Column-Metadata.csv'
Saved DataFrame as 'health_insurance_B27010.csv'


## Metrics 1-3: % of population aged 65 years or older, under 5 years old, American Indian and Alaska Native

In [5]:
# Read in demographic data
# we do not need the second row since it only describes the data
demographic_data = pd.read_csv('demographic_DP05.csv', skiprows=[1])
demographic_data.head()

Unnamed: 0,GEO_ID,NAME,DP05_0001E,DP05_0001M,DP05_0002E,DP05_0002M,DP05_0003E,DP05_0003M,DP05_0004E,DP05_0004M,...,DP05_0087PM,DP05_0088PE,DP05_0088PM,DP05_0089PE,DP05_0089PM,DP05_0090PE,DP05_0090PM,DP05_0091PE,DP05_0091PM,Unnamed: 366_level_0
0,1400000US06001400100,Census Tract 4001; Alameda County; California,3269,452,1621,339,1648,205,98.4,19.9,...,2.5,(X),(X),2351,(X),47.4,4.4,52.6,4.4,
1,1400000US06001400200,Census Tract 4002; Alameda County; California,2147,201,1075,138,1072,129,100.3,16.3,...,3.4,(X),(X),1679,(X),49.4,4.5,50.6,4.5,
2,1400000US06001400300,Census Tract 4003; Alameda County; California,5619,571,2801,504,2818,332,99.4,22.5,...,4.3,(X),(X),4414,(X),47.6,5.6,52.4,5.6,
3,1400000US06001400400,Census Tract 4004; Alameda County; California,4278,598,1926,327,2352,363,81.9,13.5,...,1.8,(X),(X),3180,(X),46.9,4.0,53.1,4.0,
4,1400000US06001400500,Census Tract 4005; Alameda County; California,3949,737,1870,291,2079,565,89.9,23.0,...,4.9,(X),(X),3169,(X),44.3,7.0,55.7,7.0,


In [6]:
# Making a Census tract column using the GEO_ID column
demographic_data['Census_Tract'] = demographic_data.copy()['GEO_ID'].str[10:]
demographic_data.head()

Unnamed: 0,GEO_ID,NAME,DP05_0001E,DP05_0001M,DP05_0002E,DP05_0002M,DP05_0003E,DP05_0003M,DP05_0004E,DP05_0004M,...,DP05_0088PE,DP05_0088PM,DP05_0089PE,DP05_0089PM,DP05_0090PE,DP05_0090PM,DP05_0091PE,DP05_0091PM,Unnamed: 366_level_0,Census_Tract
0,1400000US06001400100,Census Tract 4001; Alameda County; California,3269,452,1621,339,1648,205,98.4,19.9,...,(X),(X),2351,(X),47.4,4.4,52.6,4.4,,6001400100
1,1400000US06001400200,Census Tract 4002; Alameda County; California,2147,201,1075,138,1072,129,100.3,16.3,...,(X),(X),1679,(X),49.4,4.5,50.6,4.5,,6001400200
2,1400000US06001400300,Census Tract 4003; Alameda County; California,5619,571,2801,504,2818,332,99.4,22.5,...,(X),(X),4414,(X),47.6,5.6,52.4,5.6,,6001400300
3,1400000US06001400400,Census Tract 4004; Alameda County; California,4278,598,1926,327,2352,363,81.9,13.5,...,(X),(X),3180,(X),46.9,4.0,53.1,4.0,,6001400400
4,1400000US06001400500,Census Tract 4005; Alameda County; California,3949,737,1870,291,2079,565,89.9,23.0,...,(X),(X),3169,(X),44.3,7.0,55.7,7.0,,6001400500


## Renaming demographic data columns from their code to our desired metrics
* dataset contains percent of population for each of the demographic metrics

In [7]:
# Renaming columns from dictionary code to definition
demographic_data = demographic_data.rename(columns={'DP05_0005PE': 'percent_total_pop_under_5'})
demographic_data = demographic_data.rename(columns={'DP05_0029PE': 'percent_total_pop_over_65'})
demographic_data = demographic_data.rename(columns={'DP05_0039PE': 'percent_total_pop_american_indian_alaska_native'})
demographic_data = demographic_data.rename(columns={'DP05_0001E': 'est_total_pop'})
demographic_data = demographic_data.rename(columns={'DP05_0024E': 'est_total_pop_over_65'})
# Adding in estimates under age 18 as it is used in another metric below
demographic_data = demographic_data.rename(columns={'DP05_0019E': 'est_under_18'})


* have to calculate percent of the population over 65, the values in their 'percent_pop_over_65' are not percentages
* save df as a csv

In [8]:
# Isolating relevant columns to our data metrics
# Ommitting 'percent_total_pop_over_65' as the column is incorrect
cri_demographic_data = demographic_data[['GEO_ID', 'Census_Tract', 'percent_total_pop_under_5', 'percent_total_pop_american_indian_alaska_native', 'est_total_pop', 'est_total_pop_over_65', 'est_under_18']]

# Create a new column for % of population over 65 years using estimated population values 
cri_demographic_data = cri_demographic_data.assign(
    real_percent_total_pop_over_65=
    lambda x: 100*(x.est_total_pop_over_65 / x.est_total_pop)
)

# Saving metric df to .csv file
print('Saving demographic metric data to a .csv')
cri_demographic_data.to_csv('society_age_race_metric.csv')
print('Saved')

cri_demographic_data

Saving demographic metric data to a .csv
Saved


Unnamed: 0,GEO_ID,Census_Tract,percent_total_pop_under_5,percent_total_pop_american_indian_alaska_native,est_total_pop,est_total_pop_over_65,est_under_18,real_percent_total_pop_over_65
0,1400000US06001400100,6001400100,4.1,0.0,3269,884,661,27.041909
1,1400000US06001400200,6001400200,7.9,0.4,2147,553,350,25.756870
2,1400000US06001400300,6001400300,2.3,0.5,5619,916,942,16.301833
3,1400000US06001400400,6001400400,7.5,0.5,4278,550,941,12.856475
4,1400000US06001400500,6001400500,4.0,0.1,3949,649,496,16.434540
...,...,...,...,...,...,...,...,...
9124,1400000US06115040902,6115040902,12.3,0.2,1868,0,432,0.000000
9125,1400000US06115041001,6115041001,6.9,0.3,3672,1234,514,33.605664
9126,1400000US06115041002,6115041002,2.6,2.6,3417,1025,552,29.997073
9127,1400000US06115041101,6115041101,2.7,1.1,2288,478,494,20.891608


### Separating the three metrics for individual csv creation

In [9]:
cri_under_5_metric = cri_demographic_data[['GEO_ID', 'Census_Tract', 'percent_total_pop_under_5']]

cri_american_indian_alaska_native_metric = cri_demographic_data[['GEO_ID', 'Census_Tract', 'percent_total_pop_american_indian_alaska_native']]

cri_over_65_metric = cri_demographic_data[['GEO_ID', 'Census_Tract', 'real_percent_total_pop_over_65']]


In [11]:
# Saving metric df to .csv file
print('Saving under 5 years old metric data to a .csv')
cri_under_5_metric.to_csv('society_under_5yo_metric.csv')
print('Saved')

# Saving American Indian and Alaska Native population metrics to .csv file
print('Saving demographic metric data to a .csv')
cri_american_indian_alaska_native_metric.to_csv('society_american_indian_alaska_native_metric.csv')
print('Saved')

# Saving over 65 years old metric data to .csv file
print('Saving demographic metric data to a .csv')
cri_over_65_metric.to_csv('society_over_65yo_metric.csv')
print('Saved')

Saving under 5 years old metric data to a .csv
Saved
Saving demographic metric data to a .csv
Saved
Saving demographic metric data to a .csv
Saved


## We have decided to use the ACS demographic data estimated population values for all other population percent calculations, we should create a separate csv file with just the population estimates per census tract

In [12]:
cri_demographic_estimated_population = cri_demographic_data[['est_total_pop']]
# Saving metric df to .csv file
print('Saving demographic metric data to a .csv')
cri_demographic_estimated_population.to_csv('cri_acs_demographic_estimated_population.csv')
print('Saved')
cri_demographic_estimated_population

Saving demographic metric data to a .csv
Saved


Unnamed: 0,est_total_pop
0,3269
1,2147
2,5619
3,4278
4,3949
...,...
9124,1868
9125,3672
9126,3417
9127,2288


## Upload the newly made demographic estimated population data to AWS so we can call it for other metrics

In [13]:
bucket_name = 'ca-climate-index'
file_name = 'cri_acs_demographic_estimated_population.csv'
directory = '0_map_data'

upload_csv_aws(file_name, bucket_name, directory)
# Remove final csv files from local directory
os.remove(file_name)

cri_acs_demographic_estimated_population.csv uploaded to AWS


## Metrics 4-5
* will be using total population from demographic data (originally column DP05_0001E) to calculate percentages
    - so csv files resulting from these metrics will be run through a final function at the end to calculate percent of population metric

### Ambulatory Disability

In [14]:
# Read in ambulatory data
ambulatory_data = pd.read_csv('ambulatory_difficulty_B18105.csv', header=[0,1])
ambulatory_data.head(5)

Unnamed: 0_level_0,GEO_ID,NAME,B18105_001E,B18105_001M,B18105_002E,B18105_002M,B18105_003E,B18105_003M,B18105_004E,B18105_004M,...,B18105_029M,B18105_030E,B18105_030M,B18105_031E,B18105_031M,B18105_032E,B18105_032M,B18105_033E,B18105_033M,Unnamed: 68_level_0
Unnamed: 0_level_1,Geography,Geographic Area Name,Estimate!!Total:,Margin of Error!!Total:,Estimate!!Total:!!Male:,Margin of Error!!Total:!!Male:,Estimate!!Total:!!Male:!!5 to 17 years:,Margin of Error!!Total:!!Male:!!5 to 17 years:,Estimate!!Total:!!Male:!!5 to 17 years:!!With an ambulatory difficulty,Margin of Error!!Total:!!Male:!!5 to 17 years:!!With an ambulatory difficulty,...,Margin of Error!!Total:!!Female:!!65 to 74 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!65 to 74 years:!!No ambulatory difficulty,Margin of Error!!Total:!!Female:!!65 to 74 years:!!No ambulatory difficulty,Estimate!!Total:!!Female:!!75 years and over:,Margin of Error!!Total:!!Female:!!75 years and over:,Estimate!!Total:!!Female:!!75 years and over:!!With an ambulatory difficulty,Margin of Error!!Total:!!Female:!!75 years and over:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!75 years and over:!!No ambulatory difficulty,Margin of Error!!Total:!!Female:!!75 years and over:!!No ambulatory difficulty,Unnamed: 68_level_1
0,1400000US06001400100,Census Tract 4001; Alameda County; California,3136,448,1549,341,300,216,0,13,...,47,206,71,279,113,47,38,232,114,
1,1400000US06001400200,Census Tract 4002; Alameda County; California,1978,199,992,130,76,37,0,13,...,13,136,33,174,60,64,45,110,39,
2,1400000US06001400300,Census Tract 4003; Alameda County; California,5492,574,2767,502,516,199,0,19,...,25,307,144,279,87,0,19,279,87,
3,1400000US06001400400,Census Tract 4004; Alameda County; California,3937,475,1791,277,261,118,0,13,...,27,190,60,119,50,33,28,86,43,
4,1400000US06001400500,Census Tract 4005; Alameda County; California,3791,737,1748,287,224,72,0,13,...,21,137,68,325,280,18,29,307,277,


In [15]:
# Making a Census tract column using the GEO_ID column
ambulatory_data['Census_Tract'] = ambulatory_data.copy()['GEO_ID', 'Geography'].str[10:]
filtered_ambulatory_disability = ambulatory_data[['GEO_ID', 'Census_Tract']]
# filter data to only include estimated population living with an ambulatory disability
filtered_ambulatory_disability = pd.concat(
    [filtered_ambulatory_disability,
    ambulatory_data.filter(regex=r'Estimate').filter(
    regex=r'With an ambulatory difficulty')], axis=1)

# Display the resulting DataFrame
display(filtered_ambulatory_disability)

Unnamed: 0_level_0,GEO_ID,Census_Tract,B18105_004E,B18105_007E,B18105_010E,B18105_013E,B18105_016E,B18105_020E,B18105_023E,B18105_026E,B18105_029E,B18105_032E
Unnamed: 0_level_1,Geography,Unnamed: 2_level_1,Estimate!!Total:!!Male:!!5 to 17 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!18 to 34 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!35 to 64 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!65 to 74 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!75 years and over:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!5 to 17 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!18 to 34 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!35 to 64 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!65 to 74 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!75 years and over:!!With an ambulatory difficulty
0,1400000US06001400100,6001400100,0,0,19,29,22,0,0,0,36,47
1,1400000US06001400200,6001400200,0,0,0,10,4,0,0,8,0,64
2,1400000US06001400300,6001400300,0,0,2,67,43,0,0,0,17,0
3,1400000US06001400400,6001400400,0,0,48,7,22,0,0,11,35,33
4,1400000US06001400500,6001400500,0,6,71,21,0,0,0,0,17,18
...,...,...,...,...,...,...,...,...,...,...,...,...
9124,1400000US06115040902,6115040902,0,0,0,0,0,10,0,0,0,0
9125,1400000US06115041001,6115041001,0,0,30,72,30,0,16,94,11,55
9126,1400000US06115041002,6115041002,0,0,19,23,111,0,0,50,16,94
9127,1400000US06115041101,6115041101,0,0,72,100,0,0,0,153,0,60


In [16]:
# Create a new column by summing all columns with data (ie not tract info)
filtered_ambulatory_disability['sum_ambulatory_disabilities'] = filtered_ambulatory_disability.iloc[:, 2:].sum(axis=1).astype(int)

# Display the DataFrame with the new column
display(filtered_ambulatory_disability)

Unnamed: 0_level_0,GEO_ID,Census_Tract,B18105_004E,B18105_007E,B18105_010E,B18105_013E,B18105_016E,B18105_020E,B18105_023E,B18105_026E,B18105_029E,B18105_032E,sum_ambulatory_disabilities
Unnamed: 0_level_1,Geography,Unnamed: 2_level_1,Estimate!!Total:!!Male:!!5 to 17 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!18 to 34 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!35 to 64 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!65 to 74 years:!!With an ambulatory difficulty,Estimate!!Total:!!Male:!!75 years and over:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!5 to 17 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!18 to 34 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!35 to 64 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!65 to 74 years:!!With an ambulatory difficulty,Estimate!!Total:!!Female:!!75 years and over:!!With an ambulatory difficulty,Unnamed: 13_level_1
0,1400000US06001400100,6001400100,0,0,19,29,22,0,0,0,36,47,153
1,1400000US06001400200,6001400200,0,0,0,10,4,0,0,8,0,64,86
2,1400000US06001400300,6001400300,0,0,2,67,43,0,0,0,17,0,129
3,1400000US06001400400,6001400400,0,0,48,7,22,0,0,11,35,33,156
4,1400000US06001400500,6001400500,0,6,71,21,0,0,0,0,17,18,133
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9124,1400000US06115040902,6115040902,0,0,0,0,0,10,0,0,0,0,10
9125,1400000US06115041001,6115041001,0,0,30,72,30,0,16,94,11,55,308
9126,1400000US06115041002,6115041002,0,0,19,23,111,0,0,50,16,94,313
9127,1400000US06115041101,6115041101,0,0,72,100,0,0,0,153,0,60,385


## Subset necessary columns and clean up header row

In [17]:
ambulatory_disability_sum = filtered_ambulatory_disability.loc[:,['GEO_ID', 'Census_Tract', 'sum_ambulatory_disabilities']]
ambulatory_disability_sum.columns = ambulatory_disability_sum.columns.droplevel(-1)

# Saving metric df to .csv file
print('Saving demographic metric data to a .csv')
ambulatory_disability_sum.to_csv('ambulatory_disability_sum.csv')
print('Saved')

ambulatory_disability_sum

Saving demographic metric data to a .csv
Saved


Unnamed: 0,GEO_ID,Census_Tract,sum_ambulatory_disabilities
0,1400000US06001400100,6001400100,153
1,1400000US06001400200,6001400200,86
2,1400000US06001400300,6001400300,129
3,1400000US06001400400,6001400400,156
4,1400000US06001400500,6001400500,133
...,...,...,...
9124,1400000US06115040902,6115040902,10
9125,1400000US06115041001,6115041001,308
9126,1400000US06115041002,6115041002,313
9127,1400000US06115041101,6115041101,385


# This is as far as Beth has gotten so far

### Cognitive Disability

In [214]:
# Read in cognitive data
cognitive_data = pd.read_csv('cognitive_difficulty_B18104.csv')
cognitive_data.head(5)

  cognitive_data = pd.read_csv('cognitive_difficulty_B18104.csv')


Unnamed: 0,GEO_ID,NAME,B18104_001E,B18104_001M,B18104_002E,B18104_002M,B18104_003E,B18104_003M,B18104_004E,B18104_004M,B18104_005E,B18104_005M,B18104_006E,B18104_006M,B18104_007E,B18104_007M,B18104_008E,B18104_008M,B18104_009E,B18104_009M,B18104_010E,B18104_010M,B18104_011E,B18104_011M,B18104_012E,B18104_012M,B18104_013E,B18104_013M,B18104_014E,B18104_014M,B18104_015E,B18104_015M,B18104_016E,B18104_016M,B18104_017E,B18104_017M,B18104_018E,B18104_018M,B18104_019E,B18104_019M,B18104_020E,B18104_020M,B18104_021E,B18104_021M,B18104_022E,B18104_022M,B18104_023E,B18104_023M,B18104_024E,B18104_024M,B18104_025E,B18104_025M,B18104_026E,B18104_026M,B18104_027E,B18104_027M,B18104_028E,B18104_028M,B18104_029E,B18104_029M,B18104_030E,B18104_030M,B18104_031E,B18104_031M,B18104_032E,B18104_032M,B18104_033E,B18104_033M,Unnamed: 68
0,Geography,Geographic Area Name,Estimate!!Total:,Margin of Error!!Total:,Estimate!!Total:!!Male:,Margin of Error!!Total:!!Male:,Estimate!!Total:!!Male:!!5 to 17 years:,Margin of Error!!Total:!!Male:!!5 to 17 years:,Estimate!!Total:!!Male:!!5 to 17 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Male:!!5 to 17 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!5 to 17 years:!!No cognitive difficulty,Margin of Error!!Total:!!Male:!!5 to 17 years:!!No cognitive difficulty,Estimate!!Total:!!Male:!!18 to 34 years:,Margin of Error!!Total:!!Male:!!18 to 34 years:,Estimate!!Total:!!Male:!!18 to 34 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Male:!!18 to 34 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!18 to 34 years:!!No cognitive difficulty,Margin of Error!!Total:!!Male:!!18 to 34 years:!!No cognitive difficulty,Estimate!!Total:!!Male:!!35 to 64 years:,Margin of Error!!Total:!!Male:!!35 to 64 years:,Estimate!!Total:!!Male:!!35 to 64 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Male:!!35 to 64 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!35 to 64 years:!!No cognitive difficulty,Margin of Error!!Total:!!Male:!!35 to 64 years:!!No cognitive difficulty,Estimate!!Total:!!Male:!!65 to 74 years:,Margin of Error!!Total:!!Male:!!65 to 74 years:,Estimate!!Total:!!Male:!!65 to 74 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Male:!!65 to 74 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!65 to 74 years:!!No cognitive difficulty,Margin of Error!!Total:!!Male:!!65 to 74 years:!!No cognitive difficulty,Estimate!!Total:!!Male:!!75 years and over:,Margin of Error!!Total:!!Male:!!75 years and over:,Estimate!!Total:!!Male:!!75 years and over:!!With a cognitive difficulty,Margin of Error!!Total:!!Male:!!75 years and over:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!75 years and over:!!No cognitive difficulty,Margin of Error!!Total:!!Male:!!75 years and over:!!No cognitive difficulty,Estimate!!Total:!!Female:,Margin of Error!!Total:!!Female:,Estimate!!Total:!!Female:!!5 to 17 years:,Margin of Error!!Total:!!Female:!!5 to 17 years:,Estimate!!Total:!!Female:!!5 to 17 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Female:!!5 to 17 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!5 to 17 years:!!No cognitive difficulty,Margin of Error!!Total:!!Female:!!5 to 17 years:!!No cognitive difficulty,Estimate!!Total:!!Female:!!18 to 34 years:,Margin of Error!!Total:!!Female:!!18 to 34 years:,Estimate!!Total:!!Female:!!18 to 34 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Female:!!18 to 34 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!18 to 34 years:!!No cognitive difficulty,Margin of Error!!Total:!!Female:!!18 to 34 years:!!No cognitive difficulty,Estimate!!Total:!!Female:!!35 to 64 years:,Margin of Error!!Total:!!Female:!!35 to 64 years:,Estimate!!Total:!!Female:!!35 to 64 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Female:!!35 to 64 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!35 to 64 years:!!No cognitive difficulty,Margin of Error!!Total:!!Female:!!35 to 64 years:!!No cognitive difficulty,Estimate!!Total:!!Female:!!65 to 74 years:,Margin of Error!!Total:!!Female:!!65 to 74 years:,Estimate!!Total:!!Female:!!65 to 74 years:!!With a cognitive difficulty,Margin of Error!!Total:!!Female:!!65 to 74 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!65 to 74 years:!!No cognitive difficulty,Margin of Error!!Total:!!Female:!!65 to 74 years:!!No cognitive difficulty,Estimate!!Total:!!Female:!!75 years and over:,Margin of Error!!Total:!!Female:!!75 years and over:,Estimate!!Total:!!Female:!!75 years and over:!!With a cognitive difficulty,Margin of Error!!Total:!!Female:!!75 years and over:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!75 years and over:!!No cognitive difficulty,Margin of Error!!Total:!!Female:!!75 years and over:!!No cognitive difficulty,
1,1400000US06001400100,Census Tract 4001; Alameda County; California,3136,448,1549,341,300,216,11,18,289,216,187,89,13,19,174,93,699,149,19,22,680,152,209,67,10,17,199,64,154,58,0,13,154,58,1587,192,228,110,20,16,208,111,167,94,11,17,156,81,671,165,28,40,643,171,242,81,0,13,242,81,279,113,10,15,269,115,
2,1400000US06001400200,Census Tract 4002; Alameda County; California,1978,199,992,130,76,37,0,13,76,37,245,86,0,13,245,86,428,90,33,25,395,91,149,35,5,8,144,36,94,34,11,11,83,31,986,122,105,51,0,13,105,51,203,98,0,13,203,98,368,56,0,13,368,56,136,33,0,13,136,33,174,60,58,44,116,40,
3,1400000US06001400300,Census Tract 4003; Alameda County; California,5492,574,2767,502,516,199,22,37,494,198,836,302,20,24,816,299,1102,209,42,48,1060,207,151,96,0,19,151,96,162,79,18,29,144,74,2725,324,299,137,15,24,284,123,749,251,19,32,730,251,1074,166,25,36,1049,159,324,144,0,19,324,144,279,87,12,20,267,87,
4,1400000US06001400400,Census Tract 4004; Alameda County; California,3937,475,1791,277,261,118,0,13,261,118,340,132,0,13,340,132,984,189,98,89,886,204,141,53,0,13,141,53,65,37,10,17,55,30,2146,287,358,131,0,13,358,131,563,190,0,13,563,190,881,169,17,27,864,170,225,63,0,13,225,63,119,50,0,13,119,50,


In [216]:
# Making a Census tract column using the GEO_ID column
cognitive_data['Census_Tract'] = cognitive_data['GEO_ID'].str[10:]

# Filter columns based on criteria
columns_to_keep = ['GEO_ID', 'Census_Tract']  # Always keep these columns
for column in cognitive_data.columns:
    # Check if the column value is not NaN and if both strings are present in the column description
    if not pd.isna(cognitive_data.iloc[0][column]) and 'Estimate' in cognitive_data.iloc[0][column] and 'With a cognitive difficulty' in cognitive_data.iloc[0][column]:
        columns_to_keep.append(column)

# Create a new DataFrame with the filtered columns
filtered_cognitive_disability = cognitive_data[columns_to_keep].copy()

# Reset index
filtered_cognitive_disability = filtered_cognitive_disability.reset_index(drop=True)

# Display the resulting DataFrame
display(filtered_cognitive_disability)


Unnamed: 0,GEO_ID,Census_Tract,B18104_004E,B18104_007E,B18104_010E,B18104_013E,B18104_016E,B18104_020E,B18104_023E,B18104_026E,B18104_029E,B18104_032E
0,Geography,,Estimate!!Total:!!Male:!!5 to 17 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!18 to 34 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!35 to 64 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!65 to 74 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!75 years and over:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!5 to 17 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!18 to 34 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!35 to 64 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!65 to 74 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!75 years and over:!!With a cognitive difficulty
1,1400000US06001400100,6001400100,11,13,19,10,0,20,11,28,0,10
2,1400000US06001400200,6001400200,0,0,33,5,11,0,0,0,0,58
3,1400000US06001400300,6001400300,22,20,42,0,18,15,19,25,0,12
4,1400000US06001400400,6001400400,0,0,98,0,10,0,0,17,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9125,1400000US06115040902,6115040902,0,0,0,0,0,20,0,0,0,0
9126,1400000US06115041001,6115041001,0,0,27,14,11,0,4,6,23,43
9127,1400000US06115041002,6115041002,0,0,0,45,71,49,0,0,16,41
9128,1400000US06115041101,6115041101,86,0,32,10,0,0,0,53,0,0


In [217]:
# Convert columns to numeric
filtered_cognitive_disability.iloc[1:, 2:] = filtered_cognitive_disability.iloc[1:, 2:].apply(pd.to_numeric, errors='coerce')

# Create a new column by summing all columns starting from the fourth column
filtered_cognitive_disability['sum_cognitive_disabilities'] = filtered_cognitive_disability.iloc[1:, 2:].sum(axis=1)

# Display the DataFrame with the new column
display(filtered_cognitive_disability)

Unnamed: 0,GEO_ID,Census_Tract,B18104_004E,B18104_007E,B18104_010E,B18104_013E,B18104_016E,B18104_020E,B18104_023E,B18104_026E,B18104_029E,B18104_032E,sum_cognitive_disabilities
0,Geography,,Estimate!!Total:!!Male:!!5 to 17 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!18 to 34 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!35 to 64 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!65 to 74 years:!!With a cognitive difficulty,Estimate!!Total:!!Male:!!75 years and over:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!5 to 17 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!18 to 34 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!35 to 64 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!65 to 74 years:!!With a cognitive difficulty,Estimate!!Total:!!Female:!!75 years and over:!!With a cognitive difficulty,
1,1400000US06001400100,6001400100,11,13,19,10,0,20,11,28,0,10,122
2,1400000US06001400200,6001400200,0,0,33,5,11,0,0,0,0,58,107
3,1400000US06001400300,6001400300,22,20,42,0,18,15,19,25,0,12,173
4,1400000US06001400400,6001400400,0,0,98,0,10,0,0,17,0,0,125
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9125,1400000US06115040902,6115040902,0,0,0,0,0,20,0,0,0,0,20
9126,1400000US06115041001,6115041001,0,0,27,14,11,0,4,6,23,43,128
9127,1400000US06115041002,6115041002,0,0,0,45,71,49,0,0,16,41,222
9128,1400000US06115041101,6115041101,86,0,32,10,0,0,0,53,0,0,181


In [218]:
cognitive_disability_sum = filtered_cognitive_disability.drop(0)
cognitive_disability_sum = cognitive_disability_sum.loc[:,['GEO_ID', 'Census_Tract', 'sum_cognitive_disabilities']]

# Saving metric df to .csv file
print('Saving demographic metric data to a .csv')
cognitive_disability_sum.to_csv('cognitive_disability_sum.csv')
print('Saved')

cognitive_disability_sum

Saving demographic metric data to a .csv
Saved


Unnamed: 0,GEO_ID,Census_Tract,sum_cognitive_disabilities
1,1400000US06001400100,6001400100,122
2,1400000US06001400200,6001400200,107
3,1400000US06001400300,6001400300,173
4,1400000US06001400400,6001400400,125
5,1400000US06001400500,6001400500,128
...,...,...,...
9125,1400000US06115040902,6115040902,20
9126,1400000US06115041001,6115041001,128
9127,1400000US06115041002,6115041002,222
9128,1400000US06115041101,6115041101,181


## Metric 6: Financial Assistance
* ACS data is for children under 18 years in households
* number of children per tract in financial support data matches number\
of children in demographic data, so no conversion necessary

In [219]:
# Read in cognitive data
financial_assistance_data = pd.read_csv('financial_support_B09010.csv')
# Making a Census tract column using the GEO_ID column
financial_assistance_data['Census_Tract'] = financial_assistance_data['GEO_ID'].str[10:]
financial_assistance_data.head(5)

Unnamed: 0,GEO_ID,NAME,B09010_001E,B09010_001M,B09010_002E,B09010_002M,B09010_003E,B09010_003M,B09010_004E,B09010_004M,B09010_005E,B09010_005M,B09010_006E,B09010_006M,B09010_007E,B09010_007M,B09010_008E,B09010_008M,B09010_009E,B09010_009M,B09010_010E,B09010_010M,B09010_011E,B09010_011M,B09010_012E,B09010_012M,B09010_013E,B09010_013M,Unnamed: 28,Census_Tract
0,Geography,Geographic Area Name,Estimate!!Total:,Margin of Error!!Total:,"Estimate!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:","Margin of Error!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:","Estimate!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:","Margin of Error!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:","Estimate!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In married-couple family","Margin of Error!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In married-couple family","Estimate!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In male householder, no spouse present, family","Margin of Error!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In male householder, no spouse present, family","Estimate!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In female householder, no spouse present, family","Margin of Error!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In female householder, no spouse present, family","Estimate!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In nonfamily households","Margin of Error!!Total:!!Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In nonfamily households","Estimate!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:","Margin of Error!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:","Estimate!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:","Margin of Error!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:","Estimate!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In married-couple family","Margin of Error!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In married-couple family","Estimate!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In male householder, no spouse present, family","Margin of Error!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In male householder, no spouse present, family","Estimate!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In female householder, no spouse present, family","Margin of Error!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In family households:!!In female householder, no spouse present, family","Estimate!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In nonfamily households","Margin of Error!!Total:!!Living in household with no Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months:!!In nonfamily households",,
1,1400000US06001400100,Census Tract 4001; Alameda County; California,661,237,105,68,85,63,85,63,0,13,0,13,20,31,556,238,556,238,382,222,96,100,78,53,0,13,,6001400100.0
2,1400000US06001400200,Census Tract 4002; Alameda County; California,350,44,0,13,0,13,0,13,0,13,0,13,0,13,350,44,350,44,309,47,9,16,32,31,0,13,,6001400200.0
3,1400000US06001400300,Census Tract 4003; Alameda County; California,942,258,30,49,30,49,30,49,0,19,0,19,0,19,912,233,912,233,609,158,132,173,171,90,0,19,,6001400300.0
4,1400000US06001400400,Census Tract 4004; Alameda County; California,941,308,134,163,134,163,8,13,0,13,126,162,0,13,807,312,769,303,666,298,31,30,72,65,38,60,,6001400400.0


In [220]:
# Renaming columns from dictionary code to definition
financial_assistance_data = financial_assistance_data.rename(columns={'B09010_001E': 'total_children_under_18'})
financial_assistance_data = financial_assistance_data.rename(columns={'B09010_002E': 'estimated_total_children_household_ssi_cash_assistance_or_SNAP_12_months'})

# Drop the first row that contains additional info about columns
financial_assistance_data = financial_assistance_data.iloc[1:]

# Subset for desired columns
filtered_financial_assistance_data = financial_assistance_data.loc[:,['GEO_ID', 'Census_Tract', 'total_children_under_18', 'estimated_total_children_household_ssi_cash_assistance_or_SNAP_12_months']]

# Create a new column for % of children living in household with financial assistance 
filtered_financial_assistance_data.loc[:,'percent_children_household_financial_assistance'] = pd.to_numeric(filtered_financial_assistance_data['estimated_total_children_household_ssi_cash_assistance_or_SNAP_12_months']) / pd.to_numeric(filtered_financial_assistance_data['total_children_under_18'])

# Convert to percentage
filtered_financial_assistance_data.loc[:,'percent_children_household_financial_assistance'] *= 100
# filtered_financial_assistance_data

In [221]:
# Saving metric df to .csv file
print('Saving demographic metric data to a .csv')
filtered_financial_assistance_data.to_csv('society_financial_assistance_metric.csv')
print('Saved')

filtered_financial_assistance_data

Saving demographic metric data to a .csv
Saved


Unnamed: 0,GEO_ID,Census_Tract,total_children_under_18,estimated_total_children_household_ssi_cash_assistance_or_SNAP_12_months,percent_children_household_financial_assistance
1,1400000US06001400100,6001400100,661,105,15.885023
2,1400000US06001400200,6001400200,350,0,0.000000
3,1400000US06001400300,6001400300,942,30,3.184713
4,1400000US06001400400,6001400400,941,134,14.240170
5,1400000US06001400500,6001400500,496,22,4.435484
...,...,...,...,...,...
9125,1400000US06115040902,6115040902,432,57,13.194444
9126,1400000US06115041001,6115041001,514,0,0.000000
9127,1400000US06115041002,6115041002,552,0,0.000000
9128,1400000US06115041101,6115041101,494,219,44.331984


## Metric 7: Health Insurance
* though the estimated total code (_001E) is the same as cognitive and ambulatory disability datasets,\
SOME of the values are the same as the demographic data values. Strange, first three match, fourth one does not, so to be safe,
I will impliment the resulting csv in the function below to calculate percent based on the demographic data total population

In [222]:
# Read in cognitive data
health_insurance_data = pd.read_csv('health_insurance_B27010.csv')

# Making a Census tract column using the GEO_ID column
health_insurance_data['Census_Tract'] = health_insurance_data['GEO_ID'].str[10:]
health_insurance_data.head(5)

  health_insurance_data = pd.read_csv('health_insurance_B27010.csv')


Unnamed: 0,GEO_ID,NAME,B27010_001E,B27010_001M,B27010_002E,B27010_002M,B27010_003E,B27010_003M,B27010_004E,B27010_004M,B27010_005E,B27010_005M,B27010_006E,B27010_006M,B27010_007E,B27010_007M,B27010_008E,B27010_008M,B27010_009E,B27010_009M,B27010_010E,B27010_010M,B27010_011E,B27010_011M,B27010_012E,B27010_012M,B27010_013E,B27010_013M,B27010_014E,B27010_014M,B27010_015E,B27010_015M,B27010_016E,B27010_016M,B27010_017E,B27010_017M,B27010_018E,B27010_018M,B27010_019E,B27010_019M,B27010_020E,B27010_020M,B27010_021E,B27010_021M,B27010_022E,B27010_022M,B27010_023E,B27010_023M,B27010_024E,B27010_024M,B27010_025E,B27010_025M,B27010_026E,B27010_026M,B27010_027E,B27010_027M,B27010_028E,B27010_028M,B27010_029E,B27010_029M,B27010_030E,B27010_030M,B27010_031E,B27010_031M,B27010_032E,B27010_032M,B27010_033E,B27010_033M,B27010_034E,B27010_034M,B27010_035E,B27010_035M,B27010_036E,B27010_036M,B27010_037E,B27010_037M,B27010_038E,B27010_038M,B27010_039E,B27010_039M,B27010_040E,B27010_040M,B27010_041E,B27010_041M,B27010_042E,B27010_042M,B27010_043E,B27010_043M,B27010_044E,B27010_044M,B27010_045E,B27010_045M,B27010_046E,B27010_046M,B27010_047E,B27010_047M,B27010_048E,B27010_048M,B27010_049E,B27010_049M,B27010_050E,B27010_050M,B27010_051E,B27010_051M,B27010_052E,B27010_052M,B27010_053E,B27010_053M,B27010_054E,B27010_054M,B27010_055E,B27010_055M,B27010_056E,B27010_056M,B27010_057E,B27010_057M,B27010_058E,B27010_058M,B27010_059E,B27010_059M,B27010_060E,B27010_060M,B27010_061E,B27010_061M,B27010_062E,B27010_062M,B27010_063E,B27010_063M,B27010_064E,B27010_064M,B27010_065E,B27010_065M,B27010_066E,B27010_066M,Unnamed: 134,Census_Tract
0,Geography,Geographic Area Name,Estimate!!Total:,Margin of Error!!Total:,Estimate!!Total:!!Under 19 years:,Margin of Error!!Total:!!Under 19 years:,Estimate!!Total:!!Under 19 years:!!With one type of health insurance coverage:,Margin of Error!!Total:!!Under 19 years:!!With one type of health insurance coverage:,Estimate!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With employer-based health insurance only,Margin of Error!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With employer-based health insurance only,Estimate!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Margin of Error!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Estimate!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With Medicare coverage only,Margin of Error!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With Medicare coverage only,Estimate!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With Medicaid/means-tested public coverage only,Margin of Error!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With Medicaid/means-tested public coverage only,Estimate!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Margin of Error!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Estimate!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With VA Health Care only,Margin of Error!!Total:!!Under 19 years:!!With one type of health insurance coverage:!!With VA Health Care only,Estimate!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:,Margin of Error!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:,Estimate!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Margin of Error!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Estimate!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Margin of Error!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Estimate!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Margin of Error!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Estimate!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!Other private only combinations,Margin of Error!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!Other private only combinations,Estimate!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!Other public only combinations,Margin of Error!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!Other public only combinations,Estimate!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!Other coverage combinations,Margin of Error!!Total:!!Under 19 years:!!With two or more types of health insurance coverage:!!Other coverage combinations,Estimate!!Total:!!Under 19 years:!!No health insurance coverage,Margin of Error!!Total:!!Under 19 years:!!No health insurance coverage,Estimate!!Total:!!19 to 34 years:,Margin of Error!!Total:!!19 to 34 years:,Estimate!!Total:!!19 to 34 years:!!With one type of health insurance coverage:,Margin of Error!!Total:!!19 to 34 years:!!With one type of health insurance coverage:,Estimate!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With employer-based health insurance only,Margin of Error!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With employer-based health insurance only,Estimate!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Margin of Error!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Estimate!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With Medicare coverage only,Margin of Error!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With Medicare coverage only,Estimate!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With Medicaid/means-tested public coverage only,Margin of Error!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With Medicaid/means-tested public coverage only,Estimate!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Margin of Error!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Estimate!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With VA Health Care only,Margin of Error!!Total:!!19 to 34 years:!!With one type of health insurance coverage:!!With VA Health Care only,Estimate!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:,Margin of Error!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:,Estimate!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Margin of Error!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Estimate!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Margin of Error!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Estimate!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Margin of Error!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Estimate!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!Other private only combinations,Margin of Error!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!Other private only combinations,Estimate!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!Other public only combinations,Margin of Error!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!Other public only combinations,Estimate!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!Other coverage combinations,Margin of Error!!Total:!!19 to 34 years:!!With two or more types of health insurance coverage:!!Other coverage combinations,Estimate!!Total:!!19 to 34 years:!!No health insurance coverage,Margin of Error!!Total:!!19 to 34 years:!!No health insurance coverage,Estimate!!Total:!!35 to 64 years:,Margin of Error!!Total:!!35 to 64 years:,Estimate!!Total:!!35 to 64 years:!!With one type of health insurance coverage:,Margin of Error!!Total:!!35 to 64 years:!!With one type of health insurance coverage:,Estimate!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With employer-based health insurance only,Margin of Error!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With employer-based health insurance only,Estimate!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Margin of Error!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Estimate!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With Medicare coverage only,Margin of Error!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With Medicare coverage only,Estimate!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With Medicaid/means-tested public coverage only,Margin of Error!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With Medicaid/means-tested public coverage only,Estimate!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Margin of Error!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Estimate!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With VA Health Care only,Margin of Error!!Total:!!35 to 64 years:!!With one type of health insurance coverage:!!With VA Health Care only,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With direct-purchase and Medicare coverage,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With direct-purchase and Medicare coverage,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!Other private only combinations,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!Other private only combinations,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!Other public only combinations,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!Other public only combinations,Estimate!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!Other coverage combinations,Margin of Error!!Total:!!35 to 64 years:!!With two or more types of health insurance coverage:!!Other coverage combinations,Estimate!!Total:!!35 to 64 years:!!No health insurance coverage,Margin of Error!!Total:!!35 to 64 years:!!No health insurance coverage,Estimate!!Total:!!65 years and over:,Margin of Error!!Total:!!65 years and over:,Estimate!!Total:!!65 years and over:!!With one type of health insurance coverage:,Margin of Error!!Total:!!65 years and over:!!With one type of health insurance coverage:,Estimate!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With employer-based health insurance only,Margin of Error!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With employer-based health insurance only,Estimate!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Margin of Error!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With direct-purchase health insurance only,Estimate!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With Medicare coverage only,Margin of Error!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With Medicare coverage only,Estimate!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Margin of Error!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With TRICARE/military health coverage only,Estimate!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With VA Health Care only,Margin of Error!!Total:!!65 years and over:!!With one type of health insurance coverage:!!With VA Health Care only,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With employer-based and direct-purchase coverage,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With employer-based and Medicare coverage,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With direct-purchase and Medicare coverage,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With direct-purchase and Medicare coverage,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!With Medicare and Medicaid/means-tested public coverage,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!Other private only combinations,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!Other private only combinations,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!Other public only combinations,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!Other public only combinations,Estimate!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!Other coverage combinations,Margin of Error!!Total:!!65 years and over:!!With two or more types of health insurance coverage:!!Other coverage combinations,Estimate!!Total:!!65 years and over:!!No health insurance coverage,Margin of Error!!Total:!!65 years and over:!!No health insurance coverage,,
1,1400000US06001400100,Census Tract 4001; Alameda County; California,3269,452,671,236,570,222,289,101,281,216,0,13,0,13,0,13,0,13,101,70,17,20,0,13,9,15,0,13,0,13,75,66,0,13,344,159,295,151,208,100,66,55,0,13,21,25,0,13,0,13,49,40,24,28,0,13,0,13,0,13,0,13,25,28,0,13,1370,277,1276,263,905,180,331,183,0,13,40,51,0,13,0,13,85,59,23,21,0,13,0,13,13,20,0,13,0,13,49,50,9,15,884,134,345,126,28,24,79,85,238,87,0,13,0,13,539,129,0,13,194,83,131,77,0,13,0,13,0,13,214,93,0,13,,6001400100.0
2,1400000US06001400200,Census Tract 4002; Alameda County; California,2147,201,354,44,331,52,308,53,23,20,0,13,0,13,0,13,0,13,23,31,23,31,0,13,0,13,0,13,0,13,0,13,0,13,444,147,413,144,397,147,13,14,0,13,3,18,0,13,0,13,13,18,0,13,0,13,0,13,0,13,0,13,13,18,18,65,796,104,747,109,635,101,92,46,7,12,13,15,0,13,0,13,49,37,29,32,10,15,0,13,10,13,0,13,0,13,0,13,0,13,553,81,149,54,11,11,6,9,132,54,0,13,0,13,404,75,0,13,202,69,80,40,0,13,0,13,0,13,122,43,0,13,,6001400200.0
3,1400000US06001400300,Census Tract 4003; Alameda County; California,5619,571,990,249,911,234,889,234,0,19,0,19,22,35,0,19,0,19,79,80,7,15,0,19,0,19,0,19,0,19,72,80,0,19,1537,368,1296,274,1052,251,103,68,0,19,141,111,0,19,0,19,208,161,99,135,98,105,0,19,0,19,0,19,11,21,33,39,2176,270,1980,273,1633,299,168,106,24,38,155,106,0,19,0,19,136,102,91,97,15,23,0,19,0,19,0,19,0,19,30,35,60,55,916,166,171,90,0,19,0,19,171,90,0,19,0,19,745,158,1,4,214,101,223,153,94,86,0,19,0,19,213,139,0,19,,6001400300.0
4,1400000US06001400400,Census Tract 4004; Alameda County; California,4259,597,951,305,899,298,694,296,40,41,0,13,154,167,11,18,0,13,0,13,0,13,0,13,0,13,0,13,0,13,0,13,52,66,893,266,842,256,773,257,51,42,0,13,18,22,0,13,0,13,28,26,9,14,0,13,0,13,0,13,0,13,19,21,23,30,1865,302,1707,319,1370,297,137,77,17,26,183,82,0,13,0,13,137,105,0,13,36,55,36,53,0,13,0,13,0,13,65,70,21,25,550,85,273,74,84,47,0,13,189,69,0,13,0,13,277,74,0,13,119,55,66,38,10,15,0,13,0,13,82,44,0,13,,6001400400.0


In [223]:
# Renaming columns from dictionary code to definition
financial_assistance_data = financial_assistance_data.rename(columns={'B27010_001E': 'est_total_pop'})

# Filter columns based on criteria
columns_to_keep = ['GEO_ID', 'Census_Tract', ]  # Always keep these columns
for column in health_insurance_data.columns:
    # Check if the column value is not NaN and if both strings are present in the column description
    if not pd.isna(health_insurance_data.iloc[0][column]) and 'Estimate' in health_insurance_data.iloc[0][column] and 'No health insurance coverage' in health_insurance_data.iloc[0][column]: 
        columns_to_keep.append(column)

# Create a new DataFrame with the filtered columns
filtered_health_insurance_data = health_insurance_data[columns_to_keep].copy()

# Drop the first row as it contains additional information
#filtered_health_insurance_data = filtered_health_insurance_data.drop(0)

# Reset index
filtered_health_insurance_data = filtered_health_insurance_data.reset_index(drop=True)

# Display the resulting DataFrame
display(filtered_health_insurance_data)


Unnamed: 0,GEO_ID,Census_Tract,B27010_017E,B27010_033E,B27010_050E,B27010_066E
0,Geography,,Estimate!!Total:!!Under 19 years:!!No health insurance coverage,Estimate!!Total:!!19 to 34 years:!!No health insurance coverage,Estimate!!Total:!!35 to 64 years:!!No health insurance coverage,Estimate!!Total:!!65 years and over:!!No health insurance coverage
1,1400000US06001400100,6001400100,0,0,9,0
2,1400000US06001400200,6001400200,0,18,0,0
3,1400000US06001400300,6001400300,0,33,60,0
4,1400000US06001400400,6001400400,52,23,21,0
...,...,...,...,...,...,...
9125,1400000US06115040902,6115040902,13,0,14,0
9126,1400000US06115041001,6115041001,0,0,102,0
9127,1400000US06115041002,6115041002,63,164,240,63
9128,1400000US06115041101,6115041101,19,76,75,0


In [224]:
# Convert columns to numeric
filtered_health_insurance_data.iloc[1:, 2:] = filtered_health_insurance_data.iloc[1:, 2:].apply(pd.to_numeric, errors='coerce')

# Create a new column by summing all columns starting from the fourth column
filtered_health_insurance_data['sum_without_health_insurance'] = filtered_health_insurance_data.iloc[1:, 2:].sum(axis=1)

# Display the DataFrame with the new column
display(filtered_health_insurance_data)

Unnamed: 0,GEO_ID,Census_Tract,B27010_017E,B27010_033E,B27010_050E,B27010_066E,sum_without_health_insurance
0,Geography,,Estimate!!Total:!!Under 19 years:!!No health insurance coverage,Estimate!!Total:!!19 to 34 years:!!No health insurance coverage,Estimate!!Total:!!35 to 64 years:!!No health insurance coverage,Estimate!!Total:!!65 years and over:!!No health insurance coverage,
1,1400000US06001400100,6001400100,0,0,9,0,9
2,1400000US06001400200,6001400200,0,18,0,0,18
3,1400000US06001400300,6001400300,0,33,60,0,93
4,1400000US06001400400,6001400400,52,23,21,0,96
...,...,...,...,...,...,...,...
9125,1400000US06115040902,6115040902,13,0,14,0,27
9126,1400000US06115041001,6115041001,0,0,102,0,102
9127,1400000US06115041002,6115041002,63,164,240,63,530
9128,1400000US06115041101,6115041101,19,76,75,0,170


In [225]:
without_health_insurance_sum = filtered_health_insurance_data.drop(0)
without_health_insurance_sum = without_health_insurance_sum.loc[:,['GEO_ID', 'Census_Tract', 'sum_without_health_insurance']]

# Saving metric df to .csv file
print('Saving health insurance metric data to a .csv')
without_health_insurance_sum.to_csv('without_health_insurance_sum.csv')
print('Saved')

without_health_insurance_sum

Saving health insurance metric data to a .csv
Saved


Unnamed: 0,GEO_ID,Census_Tract,sum_without_health_insurance
1,1400000US06001400100,6001400100,9
2,1400000US06001400200,6001400200,18
3,1400000US06001400300,6001400300,93
4,1400000US06001400400,6001400400,96
5,1400000US06001400500,6001400500,177
...,...,...,...
9125,1400000US06115040902,6115040902,27
9126,1400000US06115041001,6115041001,102
9127,1400000US06115041002,6115041002,530
9128,1400000US06115041101,6115041101,170


## Make a function for datasets that did not have total population values similar to the ACS demographic (code dpo5) data estimate total population

I made this really modular as we may have to use it for some other datasets, if we like it we can modify and add it to utils

The function will:
* pull the acs demographic estimated population csv file from AWS
* iterate through each input csv file that needs to calculate metric percentage
* perform the calculation on the selected csv column (sum/demographic total population) *100
* save new metric as a csv file

In [226]:
# @append_metadata
def calculate_acs_metric_percentage(input_csvs, column_num_to_be_divided, output_csvs, percent_pop_column_names):
    cri_est_pop = "s3://ca-climate-index/0_map_data/cri_acs_demographic_estimated_population.csv"
    cri_tract_est_pop = pd.read_csv(cri_est_pop)

    for i, csv in enumerate(input_csvs):
        data = pd.read_csv(csv)
        data['est_total_population'] = cri_tract_est_pop['est_total_pop']
        column = column_num_to_be_divided[i]
        data[percent_pop_column_names[i]] = (data.iloc[:, column] / data['est_total_population']) * 100
        
        # Save the updated DataFrame to a new CSV file
        output_csv = output_csvs[i]
        data.to_csv(output_csv, index=False)
        os.remove(csv)

In [236]:
input_csvs = 'ambulatory_disability_sum.csv', 'cognitive_disability_sum.csv', 'without_health_insurance_sum.csv'
column_num_to_be_divided = [3,3,3]
output_csvs = 'society_ambulatory_disability_metric.csv', 'society_cognitive_disability_metric.csv', 'society_without_health_insurance_metric.csv'
percent_pop_column_names = 'percent_population_ambulatory_disability', 'percent_population_cognitive_disability', 'percent_population_without_health_insurance'
calculate_acs_metric_percentage(input_csvs, column_num_to_be_divided, output_csvs, percent_pop_column_names)

In [237]:
bucket_name = 'ca-climate-index'
file_name = 'society_ambulatory_disability_metric.csv', 'society_cognitive_disability_metric.csv', 'society_without_health_insurance_metric.csv','society_financial_assistance_metric.csv' ,'society_under_5yo_metric.csv','society_over_65yo_metric.csv','society_american_indian_alaska_native_metric.csv' 
directory = '3_fair_data/index_data'

for name in file_name:
    upload_csv_aws(name, bucket_name, directory)
    # Remove final csv files from local directory
    os.remove(name)

society_ambulatory_disability_metric.csv uploaded to AWS
society_cognitive_disability_metric.csv uploaded to AWS
society_without_health_insurance_metric.csv uploaded to AWS
society_financial_assistance_metric.csv uploaded to AWS
society_under_5yo_metric.csv uploaded to AWS
society_over_65yo_metric.csv uploaded to AWS
society_american_indian_alaska_native_metric.csv uploaded to AWS
