In [1]:
import getpass
import pathlib
import pandas as pd
import geopandas as gpd
from mtcpy.geospatial import google_geocode_batch

user = getpass.getuser()

Info: Found credentials at: /Users/jcroff/Library/CloudStorage/Box-Box/dvutils-creds-jcroff.json


## Import data

In [2]:
work_dir = pathlib.Path(f"/Users/{user}/Library/CloudStorage/Box-Box/DSA Projects/Spatial Analysis and Mapping/Clipper Retail Operator in EPCs")

In [3]:
clipper_path = work_dir / "Data" / "mtc_current_retail_list.xlsx"
clipper_df = pd.read_excel(clipper_path)

In [4]:
epc_2014_2018 = gpd.read_file(
    "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/equity_priority_communities_2025_acs2018/FeatureServer/0/query?outFields=*&where=epc_2050%20%3D%201&f=geojson"
)

epc_2018_2022 = gpd.read_file(
    "https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/draft_equity_priority_communities_pba2050plus_acs2022a/FeatureServer/0/query?outFields=*&where=epc_2050p%20%3D%201&f=geojson"
)

## Geocode the clipper retail locations

In [5]:
# add full address column to clipper_df
clipper_df["Full Address"] = (
    clipper_df["Address"]
    + ", "
    + clipper_df["City"]
    + ", "
    + "CA "
    + clipper_df["Zipcode"].astype(str)
)

In [13]:
# geocode the addresses
# check first if feather file exists
feather_path = work_dir / "Data" / "clipper_geocode_results.feather"
if feather_path.exists():
    # read in the feather file
    gc_results_gdf = gpd.read_feather(feather_path)
else:
    gc_results_gdf = google_geocode_batch(
        address_list=clipper_df["Full Address"].tolist(),
        include_details=False,
        allowed_location_types=["ROOFTOP", "RANGE_INTERPOLATED"],
    )

In [14]:
clipper_gdf = pd.merge(gc_results_gdf, clipper_df, left_on="address_orig", right_on="Full Address", how="left")
clipper_gdf.geometry_location_type.value_counts()

geometry_location_type
ROOFTOP               241
GEOMETRIC_CENTER        4
RANGE_INTERPOLATED      2
Name: count, dtype: int64

## Perform a spatial join

In [35]:
sj_2014_2018 = gpd.sjoin(
    left_df=clipper_gdf[["address_orig", "geometry"]],
    right_df=epc_2014_2018[["epc_2050", "geometry"]],
    how="left",
    predicate="intersects",
)

# drop index_right
sj_2014_2018 = sj_2014_2018.drop(columns="index_right")

sj_2018_2022 = gpd.sjoin(
    left_df=clipper_gdf[["address_orig", "geometry"]],
    right_df=epc_2018_2022[["epc_2050p", "geometry"]],
    how="left",
    predicate="intersects",
)

# drop index_right
sj_2018_2022 = sj_2018_2022.drop(columns="index_right")

# merge the two dataframes
sj_merge = pd.merge(
    sj_2014_2018[["address_orig", "epc_2050"]],
    sj_2018_2022[["address_orig", "epc_2050p"]],
    left_on="address_orig",
    right_on="address_orig",
    how="left",
)

# fill nan values with 0
sj_merge["epc_2050"] = sj_merge["epc_2050"].fillna(0)
sj_merge["epc_2050p"] = sj_merge["epc_2050p"].fillna(0)

In [36]:
# join the clipper_gdf with the sj_merge
final_gdf = pd.merge(
    clipper_gdf,
    sj_merge,
    left_on="address_orig",
    right_on="address_orig",
    how="left",
)

In [37]:
# create a class column to include whether it's in the 2014-2018 or 2018-2022 epc
final_gdf["epc_status"] = final_gdf.apply(lambda x: "Within EPC" if x["epc_2050p"] == 1 or x["epc_2050"] == 1 else "Not Within EPC", axis=1)
final_gdf["epc_status"].value_counts()

epc_status
Not Within EPC    222
Within EPC        145
Name: count, dtype: int64

In [38]:
reorder_cols = [
    "Name",
    "Full Address",
    "Address",
    "City",
    "Zipcode",
    "epc_2050",
    "epc_2050p",
    "epc_status",
    "geometry_location_type",
    "geometry",
]
final_gdf = final_gdf[reorder_cols]

In [40]:
final_gdf.to_csv(
    work_dir / "Results" / "mtc_current_retail_list_geocoded.csv",
    index=False,
)