## Governance Domain: Mortgage Percentage among Homeowners Metric Calculation
* % of homeowners with a mortgage

In [1]:
import pandas as pd
import os
import sys
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/governance/personal_preparedness/american_community_survey/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

In [None]:
homeowners_insurance_data_county = pd.read_csv('by_county.csv')
print(len(homeowners_insurance_data_county))
homeowners_insurance_data_county = homeowners_insurance_data_county.rename(columns={'Label (Grouping)':'index'})
homeowners_insurance_data_county = homeowners_insurance_data_county[1:]
homeowners_insurance_data_county.head(5)

# os.remove('by_county.csv')

In [None]:
# Transpose the DataFrame
homeowners_insurance_data_county_transpose = homeowners_insurance_data_county.set_index('index').T.reset_index()

homeowners_insurance_data_county_transpose

In [None]:
homeowners_insurance_data_county_transpose.columns

In [None]:
# retain one of the columns after transposing
# really strange how the names change to multiple '\xa0' before the name after transposing 
columns = [
    'index',
    '\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0Housing units with a mortgage']
# Select the specified columns
homeowners_insurance_county_columns = homeowners_insurance_data_county_transpose[columns]
homeowners_insurance_county_columns = homeowners_insurance_county_columns.rename(columns={'MORTGAGE STATUS':'mortgage_status', '\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0Housing units with a mortgage':'housing_units_with_mortgage', 'index':'county'})
# Display the selected columns
homeowners_insurance_county_columns.head()

In [None]:
homeowners_insurance_county_columns.columns

In [None]:
homeowners_insurance_county_columns

In [None]:
# get rid of extra, non-percentage data entries
filtered_county_mortgage = homeowners_insurance_county_columns[~homeowners_insurance_county_columns['county'].str.contains('Margin|Estimate')]

print(len(filtered_county_mortgage))
filtered_county_mortgage.head()

In [None]:
# Filter and remove " County, California!!Percent" from 'county' column to isolate county name
filtered_county_mortgage = filtered_county_mortgage[~filtered_county_mortgage['county'].str.contains('Margin')]
filtered_county_mortgage['county'] = filtered_county_mortgage['county'].str.replace(' County, California!!Percent', '')

print(len(filtered_county_mortgage))
filtered_county_mortgage.head(50)

In [None]:
# pull census tract data for merging
county_tract = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'TRACT': 'census_tract'})
ca_county_tract = ca_county_tract.rename(columns={'County': 'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0','COUNTYFP'})

ca_county_tract

In [None]:
# merge data with tract data
merged_homeowners = pd.merge(ca_county_tract, filtered_county_mortgage, on='county', how='left')

# replace 'N' entry with nan
merged_homeowners['housing_units_with_mortgage'].replace('N', np.nan, inplace=True)

merged_homeowners['housing_units_with_mortgage'] = merged_homeowners['housing_units_with_mortgage'].replace('%', '', regex=True).astype(float)

merged_homeowners

In [None]:
unique_values = merged_homeowners['housing_units_with_mortgage'].unique()
unique_values

In [None]:
# checking counties that werent included in the dataset showed up as NaN
alpine = merged_homeowners[merged_homeowners['county']=='Amador']
alpine

In [20]:
merged_homeowners.to_csv('governance_morgage_metric.csv')

In [None]:
merged_homeowners

## Function Call

In [25]:
@append_metadata
def governance_mortgage_upload(input_csv, export=False, varname=""):  
    '''
    The function uploads a CSV file of the percentage of homeowners with mortgages to S3. Data was sources from the American Community Survey (ACS). Data code is: DP05

    Parameters
    ----------
    input_csv: str
        calculated metric csv
    export: bool, optional
        True to upload csv to AWS, False otherwise.

    Methods
    --------
    Relevant columns for the Cal-CRAI metric were isolated from the original dataset.
    Entries within rows were converted to columns for better metric entry/visualization.
    Data entries were renamed for better transparency.
    Cal-CRAI tracts were merged in with the county level data, assiging mortage percentages to each tract within its repective county.
    Missing data is kept as is (i.e., "NaN") to avoid misrepresentation by setting to zero.

    Script
    ------
    governance_homeowners_insurance.ipynb

    Note
    ------
    This function assumes users have configured the AWS CLI such that their access key / 
    secret key pair are stored in ~/.aws/credentials. 
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    ''' 
    print('Data transformation: eliminate excess headers and columns not relevant to metric calculation.')
    print('Data transformation: rename and adjust column entires to maintain cleaning standardization.')
    print('Data transformation: flatten data so metric variables become columns.')
    print('Data transformation: assign non-percentage values as nan.')

    if export == True:
        # pull csv from aws
        bucket_name = 'ca-climate-index'
        upload_csv_aws([input_csv], bucket_name, '3_fair_data/index_data')

    if export == False:
        print(f'{input_csv} uploaded to AWS.')

       # os.remove(input_csv)  # Remove local file after upload

In [26]:
input_csv = 'governance_morgage_metric.csv'
variable = 'governance_acs_homeowners_insurance'

governance_mortgage_upload(input_csv, export=True, varname='test')