In [1]:
#Merging the ACS and HMDA Datasets for AZ

In [2]:
#Download the AZ HMDA Dataset
import pandas as pd

url = "https://ffiec.cfpb.gov/file/modifiedLar/year/2021/institution/QOT5WN9RBKQTFRVKEV31/txt/header"
# reading HMDA's 2021 Modified Loan/Application Register (LAR) 
# Bank of the Wesk (LEI: QOT5WN9RBKQTFRVKEV31)

# Read CSV file and keep the header
df = pd.read_csv(url, sep='|', low_memory=False, header=0)

# filter out rows where state_code is CA
df = df[df['state_code'] == 'AZ']

# Selecting a smaller dataset
df1 = df[["census_tract", "loan_amount"]]

In [3]:
#Viewing the dataset
df1

Unnamed: 0,census_tract,loan_amount
2,4019004717,105000
9,4021000316,55000
10,4013114200,225000
60,4001970501,65000
77,4013216843,805000
...,...,...
29773,4013092401,205000
29780,4013112513,215000
29787,4013092312,235000
29829,4013082008,195000


In [4]:
#Downloading ACS data
import cenpy
acs = cenpy.products.ACS(2019)
az = acs.from_state('AZ',level='tract',variables=['B01002H_001E'])
az.shape # of tracts

#renaming the census variable for merging purposes
az.rename(columns={'GEOID':'census_tract'}, inplace=True)
az

  return self._from_name(state, variables, level, "States", **kwargs)


Unnamed: 0,census_tract,geometry,B01002H_001E,NAME,state,county,tract
0,04027011000,"POLYGON ((-12776137.850 3846534.780, -12776135...",71.0,"Census Tract 110, Yuma County, Arizona",04,027,011000
1,04027000907,"POLYGON ((-12764723.370 3853408.390, -12764711...",47.4,"Census Tract 9.07, Yuma County, Arizona",04,027,000907
2,04015952003,"POLYGON ((-12757980.860 4167616.330, -12757980...",61.5,"Census Tract 9520.03, Mohave County, Arizona",04,015,952003
3,04027011104,"POLYGON ((-12745593.450 3851626.960, -12745593...",61.9,"Census Tract 111.04, Yuma County, Arizona",04,027,011104
4,04027011110,"POLYGON ((-12739866.280 3851192.820, -12739863...",72.3,"Census Tract 111.10, Yuma County, Arizona",04,027,011110
...,...,...,...,...,...,...,...
1521,04007001000,"POLYGON ((-12350808.560 3972491.030, -12350804...",57.3,"Census Tract 10, Gila County, Arizona",04,007,001000
1522,04017960400,"POLYGON ((-12325589.240 4186965.180, -12325146...",49.8,"Census Tract 9604, Navajo County, Arizona",04,017,960400
1523,04017960200,"POLYGON ((-12281279.960 4115564.580, -12281277...",38.3,"Census Tract 9602, Navajo County, Arizona",04,017,960200
1524,04009961300,"POLYGON ((-12215424.800 3872870.940, -12215424...",34.9,"Census Tract 9613, Graham County, Arizona",04,009,961300


In [5]:
# Making both census tract codes match by removing the leading '0' from ACS dataset
az["census_tract"] = az["census_tract"].astype(str)
az["census_tract"] = az["census_tract"].str.lstrip('0')
az

Unnamed: 0,census_tract,geometry,B01002H_001E,NAME,state,county,tract
0,4027011000,"POLYGON ((-12776137.850 3846534.780, -12776135...",71.0,"Census Tract 110, Yuma County, Arizona",04,027,011000
1,4027000907,"POLYGON ((-12764723.370 3853408.390, -12764711...",47.4,"Census Tract 9.07, Yuma County, Arizona",04,027,000907
2,4015952003,"POLYGON ((-12757980.860 4167616.330, -12757980...",61.5,"Census Tract 9520.03, Mohave County, Arizona",04,015,952003
3,4027011104,"POLYGON ((-12745593.450 3851626.960, -12745593...",61.9,"Census Tract 111.04, Yuma County, Arizona",04,027,011104
4,4027011110,"POLYGON ((-12739866.280 3851192.820, -12739863...",72.3,"Census Tract 111.10, Yuma County, Arizona",04,027,011110
...,...,...,...,...,...,...,...
1521,4007001000,"POLYGON ((-12350808.560 3972491.030, -12350804...",57.3,"Census Tract 10, Gila County, Arizona",04,007,001000
1522,4017960400,"POLYGON ((-12325589.240 4186965.180, -12325146...",49.8,"Census Tract 9604, Navajo County, Arizona",04,017,960400
1523,4017960200,"POLYGON ((-12281279.960 4115564.580, -12281277...",38.3,"Census Tract 9602, Navajo County, Arizona",04,017,960200
1524,4009961300,"POLYGON ((-12215424.800 3872870.940, -12215424...",34.9,"Census Tract 9613, Graham County, Arizona",04,009,961300


In [6]:
# Converting the variable back to an int (it was a string above)
az["census_tract"] = az["census_tract"].astype(int)

In [7]:
#merging the dataset
merge = az.merge(df1, on = "census_tract", how = "inner")

In [8]:
#View the merged dataset
merge.head()

Unnamed: 0,census_tract,geometry,B01002H_001E,NAME,state,county,tract,loan_amount
0,4015952800,"POLYGON ((-12725235.900 4095992.210, -12725235...",60.2,"Census Tract 9528, Mohave County, Arizona",4,15,952800,85000
1,4015952800,"POLYGON ((-12725235.900 4095992.210, -12725235...",60.2,"Census Tract 9528, Mohave County, Arizona",4,15,952800,185000
2,4015952800,"POLYGON ((-12725235.900 4095992.210, -12725235...",60.2,"Census Tract 9528, Mohave County, Arizona",4,15,952800,95000
3,4015952800,"POLYGON ((-12725235.900 4095992.210, -12725235...",60.2,"Census Tract 9528, Mohave County, Arizona",4,15,952800,185000
4,4012020100,"POLYGON ((-12718472.790 4031885.010, -12718457...",75.2,"Census Tract 201, La Paz County, Arizona",4,12,20100,85000
