# Make file that calculates what % of parcels are specific zones by tract
commercial, residential, or call out specific ones (TOC is very C2 and R3 heavy)

In [1]:
import geopandas as gpd
import intake
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import laplan

catalog = intake.open_catalog("../catalogs/*.yml")

In [2]:
zoning = catalog.zoning_cleaned.read()

In [3]:
zone_class = (zoning[["zone_class", "geometry"]]
              .drop_duplicates()
              .dissolve(by="zone_class")
              .reset_index()
             )
zone_class.head(2)

Unnamed: 0,zone_class,geometry
0,A1,"MULTIPOLYGON (((6462965.620 1907000.500, 64637..."
1,A2,"MULTIPOLYGON (((6429791.220 1809983.400, 64297..."


In [4]:
# Get parcel centroids
parcel_geom = catalog.parcels_with_duplicates.read()

In [5]:
parcel_geom = (parcel_geom.assign(
        centroid = parcel_geom.geometry.centroid
    )
    [["AIN", "num_AIN", "centroid"]]
)
parcel_geom = parcel_geom.set_geometry("centroid")

In [10]:
parcel_geom = parcel_geom.head(5000)

## Spatial join parcels with zones

In [11]:
gdf = gpd.sjoin(parcel_geom, zone_class, 
                how = "left", op = "intersects").drop(columns = "index_right")

In [None]:
# Drop duplicates
gdf = (gdf.assign(
        x = gdf.geometry.centroid.x, 
        y = gdf.geometry.centroid.y
    ).drop_duplicates(subset = ['x', 'y', 'num_AIN', 'zone_class'])
       .reset_index(drop=True)
       .drop(columns = ['x', 'y', 'num_AIN'])
)

## Join parcels to tracts, aggregate to tract level

In [120]:
tracts = catalog.census_tracts.read()

tracts = (tracts[["GEOID10", "geometry"]]
          .rename(columns = {"GEOID10": "GEOID"})
         )

crosswalk_parcels_tracts = catalog.crosswalk_parcels_tracts.read()

In [121]:
gdf2 = pd.merge(crosswalk_parcels_tracts, gdf, 
                on = "AIN", how = "inner", validate = "1:1")

In [123]:
# Aggregte and count number of parcels by each zone_class
group_cols = ["GEOID", "TOC_Tier", "total_AIN", "zone_class"]
by_zone_tract = (gdf2.groupby(group_cols)
                 .agg({"AIN": "count"})
                 .reset_index()
                )

In [137]:
by_tract = (by_zone_tract.pivot(index=["GEOID", "TOC_Tier", "total_AIN"], 
                                columns = "zone_class", values = "AIN")
            .reset_index()
            )

# Tracts can have portions in different tiers
# Aggregate to tract-level
by_tract = (by_tract.drop(columns = "TOC_Tier")
            .pivot_table(index = ["GEOID", "total_AIN"], aggfunc = "sum")
            .sort_values("GEOID")
            .reset_index()
           )

In [140]:
# Make columns % AIN that are of certain zone_class
remove_me = ["GEOID", "total_AIN"]
zone_cols = [x for x in list(by_tract.columns) if x not in remove_me]

In [141]:
for c in zone_cols:
    by_tract[c] = by_tract[c] / by_tract["total_AIN"]

In [158]:
final = pd.merge(tracts[["GEOID"]], by_tract, 
         on = "GEOID", how = "left", validate = "1:1")

In [160]:
final.to_parquet("s3://city-planning-entitlements/data/crosswalk_tracts_zone_class.parquet")