In [None]:
#This analysis investigates whether eviction rates are higher near Purple Line light rail stations 
#compared to other areas in Prince George’s and Montgomery Counties, Maryland. The research builds on prior 
#tabular analyses by incorporating spatial methods to examine how proximity to transit infrastructure may 
#correlate with housing instability. Using address-level eviction data from Maryland Judiciary Case Search 
#and geospatial data for planned Line station locations, I conduct a proximity analysis to
#classify eviction events as either "near" or "far" from Purple Line stations (within 1 mile and beyond 1 
#mile, respectively). Eviction density is calculated using kernel density estimation and aggregated to 
#Census tracts for comparison with demographic and housing variables.

In [1]:
import pandas as pd
import geopandas as gpd
import utils
import exercise03
import census_geocode

%load_ext autoreload
%autoreload 2

In [None]:
# Load warrants and make sure zip codes are stored as strings without decimals
warrants_df = pd.read_csv('md_eviction_warrants_through_2024.csv')
warrants_df['TenantZipCode'] = warrants_df['TenantZipCode'].astype('Int64').astype('string')
len(warrants_df) # How many warrants are we working with?

In [None]:
# Rather than geocoding 400K+ addresses, could we get only the unique ones?
geocode_input_df = exercise03.prep_warrants_for_geocoding(warrants_df)

In [None]:
# The Census Geocoder API can only accept up to 10K rows at a time, so we have to break
# our dataframe into chunks

# Split into dataframes with less than 10K rows each
geocode_input_dfs = utils.chunk_dataframe(geocode_input_df, 9999)

# Save each dataframe as a CSV without a header
utils.save_dfs_to_csv(geocode_input_dfs, 'geocode_inputs', header=False)

In [None]:
# Geocode addresses with the Census Geocoder (set test=True to process only one file)
census_geocode.geocode_csvs('geocode_inputs', 'geocode_outputs', test=True)

In [None]:
# Recombine outputs from geocoder into a single dataframe
geocode_output_df = exercise03.combine_census_geocoded_csvs('geocode_outputs')
len(geocode_output_df)

In [None]:
# Merge geocoded address back onto the inputs with separate fields for address, city, state, and zip
geocoded_df = geocode_input_df.merge(geocode_output_df, left_index=True, right_index=True)
len(geocoded_df)

In [None]:
# Use address, city, state, and zip columns to join geocodes onto original warrant records
warrants_df = warrants_df.merge(geocoded_df, on=['TenantAddress','TenantCity','TenantState','TenantZipCode'])
len(warrants_df)

In [None]:
# Convert warrants into a geodataframe with points
warrants_gdf = utils.lonlat_str_to_geodataframe(warrants_df, 'match_lon_lat')

In [None]:
# What proportion of records have points?
len(warrants_gdf[warrants_gdf.lon.notnull()]) / len(warrants_gdf)

In [None]:
# What proportion of records have exact geocode matches?
len(warrants_gdf[warrants_gdf.match_type == 'Exact']) / len(warrants_gdf)

In [None]:
warrants_gdf.to_parquet('md_eviction_warrants_through_2024.geoparquet')

In [None]:
gdf = gpd.read_parquet('md_eviction_warrants_through_2024.geoparquet')

In [None]:
gdf.columns.tolist()