In [1]:
import os

import ibis
import pandas
import sqlalchemy

The full query for the PCTS reporting module is given by:
```sql
SELECT DISTINCT CC.CASE_NBR AS ''CASE NUMBER'',
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        FEE.FEE_RCPT_NBR AS ''CITY RECEIPT NUMBER'',
        APC.APC_AREA_DESC AS ''AREA PLANNING COMMISSION'',
        TPA.PLAN_AREA_DESC AS ''PLAN AREA'',
        PP.CNCL_DIST_NBR AS ''COUNCIL DISTRICT'',
        CNC.CNC_DESC AS ''NEIGHBORHOOD COUNCIL'',
        TO_CHAR(NVL(CC.CASE_FILE_RCV_DT, CC.CRTN_DT), 'MM/DD/YYYY') AS ''FILE DATE'',
        SUBSTR(
        TO_CHAR(SUB.DAA_HEAR_DT, 'MM/DD/YYYY') ||
        TO_CHAR(SUB.DAA_DT_HEAR_WAIVED, 'MM/DD/YYYY') ||
        TO_CHAR(CHC.CHC_DTRM_HEAR_DT, 'MM/DD/YYYY') ||
        TO_CHAR(CHC.CHC_CONTINUE_DT, 'MM/DD/YYYY') ||
        TO_CHAR(ZON.HEAR_DT, 'MM/DD/YYYY') ||
        TO_CHAR(ZON.DT_HEAR_WAIVED, 'MM/DD/YYYY') ||
        TO_CHAR(APL.BZA_PUBLC_HEAR_DT, 'MM/DD/YYYY') ||
        '          ', 1, 10) AS ''DCP HEARING DATE'',
        TO_CHAR(NVL(CPC.CPC_DTRM_DT, CPC.DT_HEAR_WAIVED), 'MM/DD/YYYY') AS ''CPC/APC HEARING DATE'',
        SUBSTR(
        TO_CHAR(SUB.DAA_ACTN_DT, 'MM/DD/YYYY') ||
        TO_CHAR(CPC.CPC_ACTN_ADVSD_DT, 'MM/DD/YYYY') ||
        TO_CHAR(ZON.ZA_ACTN_DT, 'MM/DD/YYYY') ||
        TO_CHAR(ENV.PUB_DT, 'MM/DD/YYYY') ||
        TO_CHAR(CHC.CHC_DTRM_DT, 'MM/DD/YYYY') ||
        TO_CHAR(APL.BZA_DECISN_DT, 'MM/DD/YYYY') ||
        TO_CHAR(CC.CC_HEAR_DT, 'MM/DD/YYYY') ||
        '          ', 1, 10) AS ''COMPLETION DATE'',
        ACT.CASE_ACTION_DESC AS ''CASE ACTION'',
        CC.EXPEDITED_CASE_FLG AS ''EXPEDITED CASE'',
        CC.TRACT_CASE_FLG AS ''INCIDENTAL CASE'',
        LC.PROJ_DESC_TXT AS ''PROJECT DESCRIPTION''
FROM CTS.TCASE CC,
        CTS.TAPLC LC,
        CTS.TLOC LL,
        CTS.TLA_PROP PP,
        CTS.TREF_PLAN_AREA TPA,
        CTS.TREF_CNC CNC,
        CTS.TCPC_CASE CPC,
        CTS.TSUBDIV_CASE SUB,
        CTS.TZONING_CASE ZON,
        CTS.TCHC_CASE CHC,
        CTS.TENV_CASE ENV,
        CTS.TAPEL_CASE APL,
        CTS.TREF_APC_AREA APC,
        CTS.TREF_CASE_ACTION ACT,
        CTS.TAPLC_FEE FEE
WHERE CC.APLC_ID = LC.APLC_ID
    AND LC.APLC_ID = LL.APLC_ID
    AND LL.LOC_ID = PP.PROP_ID
    AND PP.PLAN_AREA_NBR = TPA.PLAN_AREA_NBR (+)
    AND PP.CNC_CD = CNC.CNC_CD (+)
    AND PP.APC_AREA_CD = APC.APC_AREA_CD (+)
    AND LC.APLC_ID = FEE.APLC_ID (+)
    AND CC.CASE_ID = CPC.CASE_ID (+)
    AND CC.CASE_ID = SUB.CASE_ID (+)
    AND CC.CASE_ID = ZON.CASE_ID (+)
    AND CC.CASE_ID = CHC.CASE_ID (+)
    AND CC.CASE_ID = ENV.CASE_ID (+)
    AND CC.CASE_ID = APL.CASE_ID (+)
    AND CC.CASE_ACTION_ID = ACT.CASE_ACTION_ID (+) 
```

Let's simplify it a bit by removing some of the string concatenations and some tables that we don't care about or don't have access to in our extracts:

```sql
SELECT DISTINCT CC.CASE_NBR AS ''CASE NUMBER'',
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        TPA.PLAN_AREA_DESC AS ''PLAN AREA'',
        PP.CNCL_DIST_NBR AS ''COUNCIL DISTRICT'',
        ACT.CASE_ACTION_DESC AS ''CASE ACTION'',
        CC.EXPEDITED_CASE_FLG AS ''EXPEDITED CASE'',
        CC.TRACT_CASE_FLG AS ''INCIDENTAL CASE'',
        LC.PROJ_DESC_TXT AS ''PROJECT DESCRIPTION''
FROM CTS.TCASE CC,
        CTS.TAPLC LC,
        CTS.TLOC LL,
        CTS.TLA_PROP PP,
        CTS.TAPEL_CASE APL,
        CTS.TREF_CASE_ACTION ACT,
WHERE CC.APLC_ID = LC.APLC_ID
    AND LC.APLC_ID = LL.APLC_ID
    AND LL.LOC_ID = PP.PROP_ID
    AND CC.CASE_ID = APL.CASE_ID (+)
    AND CC.CASE_ACTION_ID = ACT.CASE_ACTION_ID (+)
```

This expression uses the Oracle `(+)` shorthand for outer joins.
Let's first remove those and construct a query with only implicit `WHERE` joins:

```sql
SELECT DISTINCT CC.CASE_NBR AS "CASE NUMBER",
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS "COUNCIL DISTRICT",
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL CASE",
        LC.PROJ_DESC_TXT AS "PROJECT DESCRIPTION"
FROM CTS.TCASE CC,
        CTS.TAPLC LC,
        CTS.TLOC LL,
        CTS.TLA_PROP PP,
WHERE CC.APLC_ID = LC.APLC_ID
    AND LC.APLC_ID = LL.APLC_ID
    AND LL.LOC_ID = PP.PROP_ID
 ```

In [2]:
if not os.path.exists("PCTS.sqlite"):
    import s3fs
    fs = s3fs.S3FileSystem()
    fs.download("s3://city-planning-entitlements/PCTS.sqlite", "PCTS.sqlite")
engine = sqlalchemy.create_engine("sqlite:///PCTS.sqlite")

In [3]:
sql="""
SELECT DISTINCT CC.CASE_NBR AS "CASE NUMBER",
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS "COUNCIL DISTRICT",
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL CASE",
        LC.PROJ_DESC_TXT AS "PROJECT DESCRIPTION"
FROM tCASE CC,
        tAPLC LC,
        tLOC LL,
        tLA_PROP PP
WHERE CC.APLC_ID = LC.APLC_ID
    AND LC.APLC_ID = LL.APLC_ID
    AND LL.LOC_ID = PP.PROP_ID
"""
pcts = pandas.read_sql(sql, engine)
pcts

Unnamed: 0,CASE NUMBER,ADDRESS,COUNCIL DISTRICT,EXPEDITED CASE,INCIDENTAL CASE,PROJECT DESCRIPTION
0,PC-1987-764-PC,,10,,,3 STORY-30 UNITS APT. BUILDING 0/2 LEVELS GARA...
1,PC-1987-761-PC,,15,,,4 UNIT APT.
2,CPC-1987-640-ZC,,6,,,ZONE CHANGE FROM R1-1 TO M1-1 TO CONSTRUCT A 5...
3,PC-1987-758-PC,,9,,,SERVICE STATION & MART.
4,CPC-1987-630-BL,,7,,,REMOVAL OF A 10 FT BUILDING SETBACK LINE ALONG...
...,...,...,...,...,...,...
1636908,ADM-2020-352-CUW,8039 S VERMONT,8,N,,6409(A) ADMINISTRATIVE PLAN APPROVAL
1636909,DIR-2020-404-SPP,135 E PARK,11,N,,PURSUANT TO LAMC SECTION 11.5.7. FOR PROJECT P...
1636910,ADM-2020-374-CWC,1071 S SYCAMORE,10,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."
1636911,ADM-2020-374-CWC,1073 S SYCAMORE,10,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."


Convert implicit inner joins to explicit ones:
```sql
SELECT DISTINCT CC.CASE_NBR AS "CASE NUMBER",
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS "COUNCIL DISTRICT",
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL CASE",
        LC.PROJ_DESC_TXT AS "PROJECT DESCRIPTION"
FROM tCASE CC
        INNER JOIN tAPLC LC ON CC.APLC_ID=LC.APLC_ID
        INNER JOIN tLOC LL ON LC.APLC_ID=LL.APLC_ID
        INNER JOIN tLA_PROP PP on LL.LOC_ID=PP.PROP_ID
```

In [4]:
sql="""
SELECT DISTINCT CC.CASE_NBR AS "CASE NUMBER",
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS "COUNCIL DISTRICT",
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL CASE",
        LC.PROJ_DESC_TXT AS "PROJECT DESCRIPTION"
FROM tCASE CC
        INNER JOIN tAPLC LC ON CC.APLC_ID=LC.APLC_ID
        INNER JOIN tLOC LL ON LC.APLC_ID=LL.APLC_ID
        INNER JOIN tLA_PROP PP on LL.LOC_ID=PP.PROP_ID
"""
pcts = pandas.read_sql(sql, engine)
pcts

Unnamed: 0,CASE NUMBER,ADDRESS,COUNCIL DISTRICT,EXPEDITED CASE,INCIDENTAL CASE,PROJECT DESCRIPTION
0,PC-1987-764-PC,,10,,,3 STORY-30 UNITS APT. BUILDING 0/2 LEVELS GARA...
1,PC-1987-761-PC,,15,,,4 UNIT APT.
2,CPC-1987-640-ZC,,6,,,ZONE CHANGE FROM R1-1 TO M1-1 TO CONSTRUCT A 5...
3,PC-1987-758-PC,,9,,,SERVICE STATION & MART.
4,CPC-1987-630-BL,,7,,,REMOVAL OF A 10 FT BUILDING SETBACK LINE ALONG...
...,...,...,...,...,...,...
1636908,ADM-2020-352-CUW,8039 S VERMONT,8,N,,6409(A) ADMINISTRATIVE PLAN APPROVAL
1636909,DIR-2020-404-SPP,135 E PARK,11,N,,PURSUANT TO LAMC SECTION 11.5.7. FOR PROJECT P...
1636910,ADM-2020-374-CWC,1071 S SYCAMORE,10,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."
1636911,ADM-2020-374-CWC,1073 S SYCAMORE,10,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."


Let's add more geospatial data to the table and snake case column names:

```sql
SELECT DISTINCT CC.CASE_ID as CASE_ID,
        CC.CASE_NBR AS CASE_NUMBER,
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS COUNCIL_DISTRICT,
        PP.PIN AS PIN,
        PP.PLAN_AREA_NBR as PLAN_AREA,
        PP.BOE_DIST_MAP_NBR AS BOE_DISTRICT,
        PP.APC_AREA_CD AS APC_AREA,
        PP.CENSUS_TRACT_NBR as CENSUS_TRACT,
        PP.ZONE_REG_CD as ZONING,
        PP.ASSR_PRCL_NBR as AIN,
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL CASE",
        LC.PROJ_DESC_TXT AS "PROJECT DESCRIPTION"
FROM tCASE CC
        INNER JOIN tAPLC LC ON CC.APLC_ID=LC.APLC_ID
        INNER JOIN tLOC LL ON LC.APLC_ID=LL.APLC_ID
        INNER JOIN tLA_PROP PP on LL.LOC_ID=PP.PROP_ID
```

In [5]:
sql="""
SELECT DISTINCT CC.CASE_ID as CASE_ID,
        CC.CASE_NBR AS CASE_NUMBER,
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS COUNCIL_DISTRICT,
        PP.PIN AS PIN,
        PP.PLAN_AREA_NBR as PLAN_AREA,
        PP.BOE_DIST_MAP_NBR AS BOE_DISTRICT,
        PP.APC_AREA_CD AS APC_AREA,
        PP.CENSUS_TRACT_NBR as CENSUS_TRACT,
        PP.ZONE_REG_CD as ZONING,
        PP.ASSR_PRCL_NBR as AIN,
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL CASE",
        LC.PROJ_DESC_TXT AS "PROJECT DESCRIPTION"
FROM tCASE CC
        INNER JOIN tAPLC LC ON CC.APLC_ID=LC.APLC_ID
        INNER JOIN tLOC LL ON LC.APLC_ID=LL.APLC_ID
        INNER JOIN tLA_PROP PP on LL.LOC_ID=PP.PROP_ID
"""
pcts = pandas.read_sql(sql, engine)
pcts

Unnamed: 0,CASE_ID,CASE_NUMBER,ADDRESS,COUNCIL_DISTRICT,PIN,PLAN_AREA,BOE_DISTRICT,APC_AREA,CENSUS_TRACT,ZONING,AIN,EXPEDITED CASE,INCIDENTAL CASE,PROJECT DESCRIPTION
0,66.0,PC-1987-764-PC,,10,,,,,,,,,,3 STORY-30 UNITS APT. BUILDING 0/2 LEVELS GARA...
1,67.0,PC-1987-761-PC,,15,,,,,,,,,,4 UNIT APT.
2,68.0,CPC-1987-640-ZC,,6,,,,,,,,,,ZONE CHANGE FROM R1-1 TO M1-1 TO CONSTRUCT A 5...
3,70.0,PC-1987-758-PC,,9,,,,,,,,,,SERVICE STATION & MART.
4,72.0,CPC-1987-630-BL,,7,,,,,,,,,,REMOVAL OF A 10 FT BUILDING SETBACK LINE ALONG...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1913638,235073.0,ADM-2020-352-CUW,8039 S VERMONT,8,099B197 966,105.0,099B197,S,2382.00,C2-1-CPIO,6033021018,N,,6409(A) ADMINISTRATIVE PLAN APPROVAL
1913639,235136.0,DIR-2020-404-SPP,135 E PARK,11,109-5A143 377,328.0,109-5A143,W,2734.00,RD1.5-1,4286014019,N,,PURSUANT TO LAMC SECTION 11.5.7. FOR PROJECT P...
1913640,235095.0,ADM-2020-374-CWC,1071 S SYCAMORE,10,132B181 1007,106.0,132B181,C,2162.00,R2-1 ...,5084014007,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."
1913641,235095.0,ADM-2020-374-CWC,1073 S SYCAMORE,10,132B181 1007,106.0,132B181,C,2162.00,R2-1-HPOZ,5084014007,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."


Let's also add back in some more application information and dates:
```sql
SELECT DISTINCT
        CC.CASE_ID as CASE_ID,
        CC.CASE_NBR AS CASE_NUMBER,
        COALESCE(CC.CASE_FILE_RCV_DT, CC.CRTN_DT) AS FILE_DATE,
        CC.APLC_ID AS APPLICATION_ID,
        CC.CASE_SEQ_NBR AS CASE_SEQUENCE_NUMBER,
        CC.CASE_YR_NBR AS CASE_YEAR_NUMBER,
        CC.PARNT_CASE_ID AS PARENT_CASE_ID,
        CC.CASE_ACTION_ID AS CASE_ACTION_ID,
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS COUNCIL_DISTRICT,
        PP.PIN AS PIN,
        PP.PLAN_AREA_NBR as PLAN_AREA,
        PP.BOE_DIST_MAP_NBR AS BOE_DISTRICT,
        PP.APC_AREA_CD AS APC_AREA,
        PP.CENSUS_TRACT_NBR as CENSUS_TRACT,
        PP.ZONE_REG_CD as ZONING,
        PP.ASSR_PRCL_NBR as AIN,
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL CASE",
        LC.PROJ_DESC_TXT AS "PROJECT DESCRIPTION"
FROM tCASE CC
        INNER JOIN tAPLC LC ON CC.APLC_ID=LC.APLC_ID
        INNER JOIN tLOC LL ON LC.APLC_ID=LL.APLC_ID
        INNER JOIN tLA_PROP PP on LL.LOC_ID=PP.PROP_ID
```

In [6]:
sql="""
SELECT DISTINCT
        CC.CASE_ID as CASE_ID,
        CC.CASE_NBR AS CASE_NUMBER,
        COALESCE(CC.CASE_FILE_RCV_DT, CC.CRTN_DT) AS FILE_DATE,
        CC.APLC_ID AS APPLICATION_ID,
        CC.CASE_SEQ_NBR AS CASE_SEQUENCE_NUMBER,
        CC.CASE_YR_NBR AS CASE_YEAR_NUMBER,
        CC.PARNT_CASE_ID AS PARENT_CASE_ID,
        CC.CASE_ACTION_ID AS CASE_ACTION_ID,
        PP.STR_NBR || ' ' || PP.STR_DIR_CD || ' ' || PP.STR_NM AS ADDRESS,
        PP.CNCL_DIST_NBR AS COUNCIL_DISTRICT,
        PP.PIN AS PIN,
        PP.PLAN_AREA_NBR as PLAN_AREA,
        PP.BOE_DIST_MAP_NBR AS BOE_DISTRICT,
        PP.APC_AREA_CD AS APC_AREA,
        PP.CENSUS_TRACT_NBR as CENSUS_TRACT,
        PP.ZONE_REG_CD as ZONING,
        PP.ASSR_PRCL_NBR as AIN,
        CC.EXPEDITED_CASE_FLG AS "EXPEDITED_CASE",
        CC.TRACT_CASE_FLG AS "INCIDENTAL_CASE",
        LC.PROJ_DESC_TXT AS "PROJECT_DESCRIPTION"
FROM tCASE CC
        INNER JOIN tAPLC LC ON CC.APLC_ID=LC.APLC_ID
        INNER JOIN tLOC LL ON LC.APLC_ID=LL.APLC_ID
        INNER JOIN tLA_PROP PP on LL.LOC_ID=PP.PROP_ID
"""
pcts = pandas.read_sql(sql, engine)
pcts

Unnamed: 0,CASE_ID,CASE_NUMBER,FILE_DATE,APPLICATION_ID,CASE_SEQUENCE_NUMBER,CASE_YEAR_NUMBER,PARENT_CASE_ID,CASE_ACTION_ID,ADDRESS,COUNCIL_DISTRICT,PIN,PLAN_AREA,BOE_DISTRICT,APC_AREA,CENSUS_TRACT,ZONING,AIN,EXPEDITED_CASE,INCIDENTAL_CASE,PROJECT_DESCRIPTION
0,66.0,PC-1987-764-PC,1999-10-16 00:00:00.000000,66.0,764.0,1987.0,,,,10,,,,,,,,,,3 STORY-30 UNITS APT. BUILDING 0/2 LEVELS GARA...
1,67.0,PC-1987-761-PC,1999-10-16 00:00:00.000000,67.0,761.0,1987.0,,,,15,,,,,,,,,,4 UNIT APT.
2,68.0,CPC-1987-640-ZC,1999-10-16 20:19:11.000000,68.0,640.0,1987.0,,,,6,,,,,,,,,,ZONE CHANGE FROM R1-1 TO M1-1 TO CONSTRUCT A 5...
3,70.0,PC-1987-758-PC,1999-10-16 00:00:00.000000,70.0,758.0,1987.0,,,,9,,,,,,,,,,SERVICE STATION & MART.
4,72.0,CPC-1987-630-BL,1999-10-16 20:19:11.000000,72.0,630.0,1987.0,,,,7,,,,,,,,,,REMOVAL OF A 10 FT BUILDING SETBACK LINE ALONG...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1913638,235073.0,ADM-2020-352-CUW,2020-01-16 00:00:00.000000,192918.0,352.0,2020.0,,,8039 S VERMONT,8,099B197 966,105.0,099B197,S,2382.00,C2-1-CPIO,6033021018,N,,6409(A) ADMINISTRATIVE PLAN APPROVAL
1913639,235136.0,DIR-2020-404-SPP,2020-01-21 10:26:08.000000,192968.0,404.0,2020.0,,,135 E PARK,11,109-5A143 377,328.0,109-5A143,W,2734.00,RD1.5-1,4286014019,N,,PURSUANT TO LAMC SECTION 11.5.7. FOR PROJECT P...
1913640,235095.0,ADM-2020-374-CWC,2020-01-16 00:00:00.000000,192937.0,374.0,2020.0,,1.0,1071 S SYCAMORE,10,132B181 1007,106.0,132B181,C,2162.00,R2-1 ...,5084014007,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."
1913641,235095.0,ADM-2020-374-CWC,2020-01-16 00:00:00.000000,192937.0,374.0,2020.0,,1.0,1073 S SYCAMORE,10,132B181 1007,106.0,132B181,C,2162.00,R2-1-HPOZ,5084014007,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."


Can we reproduce using ibis?

In [7]:
import ibis
con = ibis.sqlite.connect("PCTS.sqlite")

In [8]:
cases = con.table("tCASE")
loc = con.table("tLOC")
la_prop = con.table("tLA_PROP")
application = con.table("tAPLC")

In [9]:
table = (
    cases
    .inner_join(application, cases.APLC_ID==application.APLC_ID)
    .inner_join(loc, application.APLC_ID==loc.APLC_ID)
    .inner_join(la_prop, loc.LOC_ID==la_prop.PROP_ID)
)[
    cases.CASE_ID.name("CASE_ID"),
    cases.CASE_NBR.name("CASE NUMBER"),
    ibis.coalesce(cases.CASE_FILE_RCV_DT, cases.CRTN_DT).name("FILE_DATE"),
    cases.APLC_ID.name("APPLICATION_ID"),
    cases.CASE_SEQ_NBR.name("CASE_SEQUENCE_NUMBER"),
    cases.CASE_YR_NBR.name("CASE_YEAR_NUMBER"),
    cases.PARNT_CASE_ID.name("PARENT_CASE_ID"),
    cases.CASE_ACTION_ID.name("CASE_ACTION_ID"),
    la_prop.STR_NBR.name("STREET_NUMBER"),
    la_prop.STR_DIR_CD.name("STREET_DIRECTION"),
    la_prop.STR_NM.name("STREET_NAME"),
    la_prop.CNCL_DIST_NBR.name("COUNCIL_DISTRICT"),
    la_prop.PIN.name("PIN"),
    la_prop.PLAN_AREA_NBR.name("PLAN_AREA"),
    la_prop.BOE_DIST_MAP_NBR.name("BOE_DISTRICT"),
    la_prop.APC_AREA_CD.name("APC_AREA"),
    la_prop.CENSUS_TRACT_NBR.name("CENSUS_TRACT"),
    la_prop.ZONE_REG_CD.name("ZONING"),
    la_prop.ASSR_PRCL_NBR.name("AIN"),
    cases.EXPEDITED_CASE_FLG.name("EXPEDITED_CASE"),
    cases.TRACT_CASE_FLG.name("INCIDENTAL_CASE"),
    application.PROJ_DESC_TXT.name("PROJECT_DESCRIPTION")
].distinct()

In [10]:
table.execute(limit=None)

Unnamed: 0,CASE_ID,CASE NUMBER,FILE_DATE,APPLICATION_ID,CASE_SEQUENCE_NUMBER,CASE_YEAR_NUMBER,PARENT_CASE_ID,CASE_ACTION_ID,STREET_NUMBER,STREET_DIRECTION,...,PIN,PLAN_AREA,BOE_DISTRICT,APC_AREA,CENSUS_TRACT,ZONING,AIN,EXPEDITED_CASE,INCIDENTAL_CASE,PROJECT_DESCRIPTION
0,66.0,PC-1987-764-PC,1999-10-16 00:00:00,66.0,764.0,1987.0,,,720,,...,,,,,,,,,,3 STORY-30 UNITS APT. BUILDING 0/2 LEVELS GARA...
1,67.0,PC-1987-761-PC,1999-10-16 00:00:00,67.0,761.0,1987.0,,,1545,,...,,,,,,,,,,4 UNIT APT.
2,68.0,CPC-1987-640-ZC,1999-10-16 20:19:11,68.0,640.0,1987.0,,,7101,,...,,,,,,,,,,ZONE CHANGE FROM R1-1 TO M1-1 TO CONSTRUCT A 5...
3,70.0,PC-1987-758-PC,1999-10-16 00:00:00,70.0,758.0,1987.0,,,254,,...,,,,,,,,,,SERVICE STATION & MART.
4,72.0,CPC-1987-630-BL,1999-10-16 20:19:11,72.0,630.0,1987.0,,,14503,,...,,,,,,,,,,REMOVAL OF A 10 FT BUILDING SETBACK LINE ALONG...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1913797,235073.0,ADM-2020-352-CUW,2020-01-16 00:00:00,192918.0,352.0,2020.0,,,8039,S,...,099B197 966,105.0,099B197,S,2382.00,C2-1-CPIO,6033021018,N,,6409(A) ADMINISTRATIVE PLAN APPROVAL
1913798,235136.0,DIR-2020-404-SPP,2020-01-21 10:26:08,192968.0,404.0,2020.0,,,135,E,...,109-5A143 377,328.0,109-5A143,W,2734.00,RD1.5-1,4286014019,N,,PURSUANT TO LAMC SECTION 11.5.7. FOR PROJECT P...
1913799,235095.0,ADM-2020-374-CWC,2020-01-16 00:00:00,192937.0,374.0,2020.0,,1.0,1071,S,...,132B181 1007,106.0,132B181,C,2162.00,R2-1 ...,5084014007,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."
1913800,235095.0,ADM-2020-374-CWC,2020-01-16 00:00:00,192937.0,374.0,2020.0,,1.0,1073,S,...,132B181 1007,106.0,132B181,C,2162.00,R2-1-HPOZ,5084014007,N,,"PURSUANT TO LAMC 12.20.3.I, CONFORMING WORK ON..."


Ibis/sqlalchemy seem to be missing string concatenation methods,
so we keep the address fields separate for the time being.

They wind up being important at the margins when a case involves adjacent properties:
the `SELECT DISTINCT` can distinguish based on the address in a few places.

SQLite doesnt have outer join support, so we do that last step in pandas.
This has the effect of bringing in appeal dates for some cases, but probably
doesn't matter that much, except to bring some nulls into CASE_NUMBER.

This effectively makes it a left join, but I'm trying to keep the processing
as close as possible to the original query.

In [11]:
appeals = pandas.read_sql_table("tAPEL_CASE", engine)

In [12]:
pcts = pandas.merge(
    pcts,
    appeals[["CASE_ID", "BZA_PUBLC_HEAR_DT", "BZA_DECISN_DT"]].rename(
        columns={"BZA_PUBLC_HEAR_DT": "APPEAL_HEARING_DATE", "BZA_DECISN_DT": "APPEAL_DECISION_DATE"}
    ),
    on="CASE_ID",
    how="outer",
).dropna(subset=["CASE_NUMBER"])

Fix some dtypes:

In [13]:
pcts = pcts.astype({
    "CASE_ID": "Int64",
    "FILE_DATE": "datetime64[ns]",
    "APPLICATION_ID": "Int64",
    "CASE_SEQUENCE_NUMBER": "Int64",
    "CASE_YEAR_NUMBER": "Int64",
    "PARENT_CASE_ID": "Int64",
    "CASE_ACTION_ID": "Int64",
    "PLAN_AREA": "Int64",
})

Compute GEOID from the census tract:

In [14]:
STATE = "06"
COUNTY = "037"

split = pcts.CENSUS_TRACT.str.strip().str.split(".", expand=True)
geoid = STATE + COUNTY + split[0].str.zfill(4) + split[1].fillna("").str.rjust(2, "0")
pcts = pcts.assign(
    GEOID=geoid
)

In [15]:
pcts = pcts.sort_values(["FILE_DATE", "CASE_ID", "AIN"]).reset_index(drop=True)

In [18]:
pcts.to_parquet("s3://city-planning-entitlements/data/final/pcts.parquet")