# Recreate PCTS table before regression

In [1]:
import intake
import numpy as np
import pandas as pd
import pcts_census_utils

catalog = intake.open_catalog("../catalogs/*.yml")

bucket_name = 'city-planning-entitlements'

In [2]:
parent_cases = pd.read_parquet(f"s3://{bucket_name}/data/final/parents_with_prefix_suffix.parquet")

prefix_suffix_list = list(parent_cases.columns)
prefix_suffix_list.remove("PARENT_CASE")

pcts = pcts_census_utils.subset_pcts("2010-01", prefix_suffix_list)

In [7]:
pcts_ids = ["CASE_ID", "APLC_ID", "CASE_NBR", "CASE_SEQ_NBR", "CASE_YR_NBR", "CASE_ACTION_ID", 
           "CASE_FILE_RCV_DT", "CASE_FILE_DATE", "PARNT_CASE_ID", "PARENT_CASE", 
            "AIN", "PROJ_DESC_TXT", "id"]

pcts_case_info = pcts[pcts_ids]

In [8]:
pcts_ids_exclude = pcts_ids.copy()
pcts_ids_exclude.remove("CASE_ID")

pcts_suffix = pcts.drop(columns = pcts_ids_exclude).drop_duplicates()
pcts_suffix = pcts_suffix.replace(0, np.nan)

In [10]:
pcts_suffix = pcts_suffix.melt(
        id_vars="CASE_ID",
        var_name="suffix",
        value_name="dummy"
    ).dropna(subset=["dummy"]).drop(columns = "dummy")

In [26]:
pcts_cleaned = pd.merge(pcts_case_info, pcts_suffix, on = "CASE_ID", how = "inner")

## Make sure we can get entitlement table in same shape

In [27]:
# Merge PCTS suffixes with AIN to tract
pcts_suffixes = pd.merge(pcts_cleaned, parcel_to_tract, on = "AIN", how = "inner", validate = "m:1")

In [33]:
parcel_to_tract = pd.read_parquet(
    "s3://city-planning-entitlements/data/crosswalk_parcels_tracts.parquet"
)

# ACS data for income, race, commute, tenure
census = pd.read_parquet(
    "s3://city-planning-entitlements/data/final/census_analysis_table.parquet"
)

# Census tracts
tracts = catalog.census_tracts.read()
tracts = tracts.assign(
    GEOID = tracts.GEOID10,
    density=tracts.HD01_VD01.astype(int)/(tracts.Shape_STAr/5280./5280.),
    population=tracts.HD01_VD01,
)[["GEOID", "density", "population", "geometry"]]

In [36]:
# Merge in density and population
pcts_suffixes = pd.merge(tracts, pcts_suffixes, on = "GEOID", how = "inner", validate = "1:m")

In [37]:
# Drop some cases which are region-wide, touching thousands of parcels,
# or should otherwise be ignored.
pcts_suffixes = pcts_suffixes[
    ~pcts_suffixes.suffix.isin(("CPU", "CA", "SP", "SN", "ICO", "HPOZ", "HD", "FH"))
]

big_cases = pcts_suffixes.CASE_ID.value_counts()
big_cases = big_cases[big_cases > 10]
pcts_suffixes = pcts_suffixes[~pcts_suffixes.CASE_ID.isin(big_cases.index)]

In [39]:
# Our first pass at analyzing entitlements is to count the number
# of cases for each census tract, to see which kinds of entitlements
# are being applied for in which types of census tract:
entitlement = (pcts_suffixes
    .groupby(["GEOID", "suffix", "CASE_YR_NBR"])
    .size()
    .to_frame("count")
).reset_index(level=1).reset_index(level=1).rename(columns={"CASE_YR_NBR": "year"})
entitlement = entitlement.assign(
    year=entitlement.year.astype("Int64")
)

In [40]:
entitlement

Unnamed: 0_level_0,year,suffix,count
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
06037101110,2006,AA,1
06037101110,2007,AA,1
06037101110,2014,AA,1
06037101110,2017,AA,1
06037101110,2007,APCNV,1
...,...,...,...
06037980028,2015,ZA,1
06037980028,2016,ZA,2
06037980028,2018,ZA,2
06037980028,2019,ZA,1


## Parcels with lots of duplicate geom
* Different AIN, but same geometry

In [None]:
"""
crosswalk = pd.read_parquet(f"s3://{bucket_name}/data/crosswalk_parcels_tracts.parquet")
display(crosswalk[crosswalk.num_AIN > 1].num_AIN.describe())

print(f"# parcels (denom): {len(crosswalk)}")
print(f"# unique parcels (numer): {len(crosswalk[crosswalk.num_AIN==1])}")

numer = len(crosswalk[crosswalk.num_AIN==1])
denom = len(crosswalk)

print(f"% unique parcels: {numer/denom}")

print(f"# parcels with 2 obs: {len(crosswalk[crosswalk.num_AIN==2])}")
print(f"# parcels with 3-5 obs: {len(crosswalk[(crosswalk.num_AIN >= 3) & (crosswalk.num_AIN <=5)])}")
print(f"# parcels with 6-10 obs: {len(crosswalk[(crosswalk.num_AIN >= 6) & (crosswalk.num_AIN <=10)])}")
print(f"# parcels with 11+ obs: {len(crosswalk[crosswalk.num_AIN >= 11])}")
"""