# What are the demographic characteristics of neighborhoods where entitlements are?

In [1]:
import intake
import IPython.display
import matplotlib.pyplot as plt
import pandas

cat = intake.open_catalog("../catalogs/*.yml")

In [2]:
# Load PCTS extract
pcts = cat.pcts2.read().reset_index(drop=True)

In [3]:
# ACS data for income, race, commute, tenure
census = cat.census_analysis_table.read()

In [4]:
# Census tracts
tracts = cat.census_tracts.read()
tracts = tracts.assign(
    density=tracts.HD01_VD01.astype(int)/(tracts.Shape_STAr/5280./5280.),
    population=tracts.HD01_VD01,
)

In [5]:
parcel_to_tract = cat.crosswalk_parcels_tracts.read()

In [6]:
# The requested entitlements are in the suffixes of the PCTS case number.
# A given case can have an arbitrary number of entitlement suffixes, so
# we need to parse it into its component parts.
# We can use a regex for that:

from utils import GENERAL_PCTS_RE
cols = pcts.CASE_NBR.str.extract(GENERAL_PCTS_RE)

In [7]:
cols

Unnamed: 0,0,1,2,3
0,ZA,2013,3079,-CEX
1,CPC,2019,7393,-CA
2,AA,2013,3080,-PMLA-SL
3,ENV,2013,3081,-MND
4,ZA,2013,3082,-ZAA
...,...,...,...,...
349550,ZA,2020,400,-CU
349551,ADM,2020,329,-TOC
349552,DIR,2020,306,-DRB-SPP-MSP
349553,ADM,2020,272,-CPIOC


In [8]:
all_suffixes = cols[3].str.strip("-").str.split("-", expand=True)

In [9]:
suffix_counts = (all_suffixes
    .apply(lambda col: col.value_counts(), axis=0)
    .sum(axis=1)
    .astype(int)
    .sort_values(ascending=False)
)

In [10]:
# Associate each PCTS entitlement case with a census tract:
pcts = pcts.merge(
    parcel_to_tract[["GEOID", "AIN"]],
    how="left",
    on="AIN",
).merge(
    tracts[["GEOID10"]],
    how="left",
    left_on="GEOID",
    right_on="GEOID10",
)

In [11]:
pcts_suffixes = pandas.concat((pcts, all_suffixes), axis=1)

In [None]:
# Each case can have multiple suffixes. However, it is useful
# to be able to refer to a single suffix column when performing
# aggregations and computing statistics.
# Here we melt the suffix columns into a single column.
# This has the effect of creating multiple rows for cases
# that have multiple entitlements being requested.

pcts_suffixes = pcts_suffixes.melt(
    id_vars=pcts.columns,
    var_name="nothing",
    value_name="suffix"
).dropna(subset=["suffix"]).drop(columns=["GEOID10", "nothing"])

In [None]:
big_cases = pcts_suffixes.CASE_ID.value_counts().head(100).index
pcts_suffixes = pcts_suffixes[~pcts_suffixes.CASE_ID.isin(big_cases)]

In [None]:
# Our first pass at analyzing entitlements is to count the number
# of cases for each census tract, to see which kinds of entitlements
# are being applied for in which types of census tract:
entitlement = (pcts_suffixes
    .groupby(["GEOID", "suffix", "CASE_YR_NBR"])
    .size()
    .to_frame("count")
).reset_index(level=1).reset_index(level=1).rename(columns={"CASE_YR_NBR": "year"})
entitlement = entitlement.assign(
    year=entitlement.year.astype("Int64")
)

In [None]:
# Merge the income data with the entitlements counts:
joined = pandas.merge(
    census.set_index("GEOID"),
    entitlement,
    how="left",
    left_index=True,
    right_index=True,
)
joined = joined.assign(
    count=joined["count"].fillna(0).astype("int64")
).dropna()

In [None]:
# Bring in population density
joined = joined.reset_index().merge(
    tracts[["GEOID10"]],
    how="left",
    left_on="GEOID",
    right_on="GEOID10",
).drop(columns="GEOID10").set_index("GEOID").dropna()

In [None]:
# Plot entitlement stats against median household income,
# population density, and geography:
def plot_entitlement(df, tracts, suffix, year="2017"):
    if year == "all":
        to_plot = df[(df["count"] > 0) & (df.year >= 2010) & (df.suffix == suffix)]
        to_plot = to_plot.groupby(to_plot.index).agg({
            "count": "sum",
            "medhhincome": "first",
            "density": "first"
        })
    else:
        to_plot = df[(df["count"] > 0) & (df.year == int(year)) & (df.suffix == suffix)]
    
    fig = plt.figure(figsize=(12,8))
    ax1 = plt.subplot2grid((2, 2), (0, 0), colspan=1)
    ax1.set_xlabel("Median income")
    ax1.set_ylabel("Entitlement count per tract")
    ax1.scatter(to_plot.medhhincome, to_plot["count"])

    ax2 = plt.subplot2grid((2, 2), (1, 0), colspan=1)
    ax2.set_xlabel("Population density")
    ax2.set_ylabel("Entitlement count per tract")
    ax2.scatter(to_plot.density, to_plot["count"])

    ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2)
    ax3.axis("off")
    tracts.merge(
        to_plot,
        left_on="GEOID10",
        right_index=True,
        how="left"
    ).fillna(
        {"count": 0}
    ).plot(
        ax=ax3,
        column="count",
        cmap="magma",
    )
    plt.close() # Prevent double plotting
    return fig

In [None]:
df = joined
suffix="CE"
to_plot = df[(df["count"] > 0) & (df.year >= 2010) & (df.suffix == suffix)]
to_plot.groupby(to_plot.index).agg({
            "count": "sum",
            "medhhincome": "first",
            "density": "first"
        }).dtypes

In [None]:
import ipywidgets

years = [("All (from 2010 - present)", "all")] + [(str(i), str(i)) for i in range(2010, 2020)]
year_dropdown = ipywidgets.Dropdown(description="Year", options=years)
suffix_dropdown = ipywidgets.Dropdown(description="Suffix")
output = ipywidgets.Output()

display(year_dropdown)
display(suffix_dropdown)
display(output)

change_guard = False

def on_suffix_selection(*args):
    global change_guard
    if change_guard:
        return
    output.clear_output(wait=True)
    suffix = suffix_dropdown.value
    year = year_dropdown.value
    with output:
        display(plot_entitlement(joined, tracts, suffix, year))

def on_year_selection(*args):
    global change_guard
    if year_dropdown.value == "all":
        condition = (joined.year >= 2010)
    else:
        condition = (joined.year == int(year_dropdown.value))
    counts = joined[condition].groupby("suffix").agg({"count": "sum"})["count"]
    counts = counts.sort_values(ascending=False)
    old_val = suffix_dropdown.value 
    change_guard=True
    suffix_dropdown.options = [
        (f"{name} ({count:,} applications)", name) 
        for name,count in zip(counts.index, counts)
    ]
    if old_val in counts.index:
        suffix_dropdown.value = old_val
    else:
        suffix_dropdown.index = 0
    change_guard=False
    on_suffix_selection()

on_year_selection()
suffix_dropdown.observe(on_suffix_selection, names="value")
year_dropdown.observe(on_year_selection, names="value")