# Check counts with PMU notes
Check counts for PCTS, why are some cases higher / lower than the aggregations PMU did?

In [1]:
import functools
import intake
import pandas as pd
import pcts_parser
import pcts_census_utils

bucket_name = "city-planning-entitlements"
catalog = intake.open_catalog("../catalogs/*.yml")

In [2]:
master_pcts = pd.read_parquet(f's3://{bucket_name}/data/final/master_pcts.parquet')
parents = pd.read_parquet(f's3://{bucket_name}/data/final/parents_with_prefix_suffix.parquet')

In [3]:
pcts = master_pcts.copy()

# Subset PCTS by start / end date
start_date = pd.to_datetime("2010-01-01")
end_date = pd.Timestamp.now()

prefix_list = None
suffix_list = ["TOC"]

pcts = (pcts[(pcts.CASE_FILE_RCV_DT >= start_date) & 
           (pcts.CASE_FILE_RCV_DT <= end_date)]
        .drop_duplicates()
        .reset_index(drop=True)
       )

# Parse CASE_NBR
cols = pcts.CASE_NBR.str.extract(pcts_parser.GENERAL_PCTS_RE)

all_prefixes = cols[0]
all_suffixes = cols[3].str[1:].str.split("-", expand=True)

""" 
# Subset by prefix
if prefix_list is not None:
    pcts = pcts[all_prefixes.isin(prefix_list)]
# Subset by suffix. Since the suffix may be in any of the all_suffixes
# column, we logical-or them together, checking if each column has one
# of the requested ones.
if suffix_list is not None:
    has_suffix = functools.reduce(
        lambda x, y: all_suffixes[y].isin(suffix_list) | (x),
        all_suffixes.columns
    )
    pcts = pcts[has_suffix]
    
pcts = pcts.sort_values(["CASE_ID", "AIN"]).reset_index(drop=True)    
"""

' \n# Subset by prefix\nif prefix_list is not None:\n    pcts = pcts[all_prefixes.isin(prefix_list)]\n# Subset by suffix. Since the suffix may be in any of the all_suffixes\n# column, we logical-or them together, checking if each column has one\n# of the requested ones.\nif suffix_list is not None:\n    has_suffix = functools.reduce(\n        lambda x, y: all_suffixes[y].isin(suffix_list) | (x),\n        all_suffixes.columns\n    )\n    pcts = pcts[has_suffix]\n    \npcts = pcts.sort_values(["CASE_ID", "AIN"]).reset_index(drop=True)    \n'

In [9]:
toc_cases = pcts[pcts.CASE_NBR.str.contains('TOC')]

In [11]:
toc_cases.CASE_ID.nunique()

1787

In [18]:
# This means all cases with TOC flagged are parent cases
# But they can apply to multiple AIN
len(toc_cases[toc_cases.CASE_ID != toc_cases.PARENT_CASE])

0

In [20]:
toc_cases[toc_cases.CASE_ID == 224348]

Unnamed: 0,CASE_ID,APLC_ID,CASE_NBR,CASE_SEQ_NBR,CASE_YR_NBR,CASE_ACTION_ID,CASE_FILE_RCV_DT,CASE_FILE_DATE,PARNT_CASE_ID,PARENT_CASE,AIN,PROJ_DESC_TXT,id
147395,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003002,TIER VERIFICATION,5781_2018
147396,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003003,TIER VERIFICATION,5781_2018
147397,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003004,TIER VERIFICATION,5781_2018
147398,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003005,TIER VERIFICATION,5781_2018
147399,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003900,TIER VERIFICATION,5781_2018
147400,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003901,TIER VERIFICATION,5781_2018
147401,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003902,TIER VERIFICATION,5781_2018
147402,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003903,TIER VERIFICATION,5781_2018
147403,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003904,TIER VERIFICATION,5781_2018
147404,224348.0,184593.0,ADM-2018-5781-TOC,5781.0,2018.0,13.0,2018-10-04,2018-10,,224348.0,5032003905,TIER VERIFICATION,5781_2018


In [15]:
toc_parents = pd.merge(toc_cases, parents, on = "PARENT_CASE", how = "inner", validate = "m:1")

In [17]:
toc_parents.CASE_ID.nunique()

1787

## Appeals
* Appeals are low in the dashboard vs PMU counts
* But, we do drop child cases
* Even with parent cases, there are 1A, 2A, 5A suffixes that remain
* Let's check what would happen without dropping child cases, and counting appeals

In [None]:
suffix_list = ["1A", "2A"]

appeals = pcts_census_utils.subset_pcts(None, None, None, suffix_list=suffix_list)

In [None]:
appeals.CASE_ID.nunique()

In [None]:
appeals_parents = pcts_census_utils.drop_child_cases(appeals, None, suffix_list)

# TOC

In [None]:
suffix_list = ["TOC"]

toc = pcts_census_utils.subset_pcts(None, None, None, suffix_list=suffix_list)

In [None]:
toc_parents = pcts_census_utils.drop_child_cases(toc, None, suffix_list)

In [None]:
toc_parents2 = pcts_census_utils.get_pcts_parents(None, None, None, suffix_list)

In [None]:
print(len(toc))
print(len(toc_parents))
print(len(toc_parents2))

In [None]:
master_toc = master_pcts[master_pcts.CASE_NBR.str.contains("TOC")]

In [None]:
len(master_toc)

In [None]:
master_toc.head()