# Zoningtaxlots QAQC

### Objectives:
+ Combine `qc_versioncomparison.csv` with `qc_versioncomparisonnownullcount.csv`; sort by field name. (Resulting report will show __if the value changed to a different value or to/from a null value__).
+ Add a __difference__ column to `qc_frequencychanges.csv`; sort by field name.
+ Add two fields to the BBL diff report
    + Flag indicating that __lot intersects with a rezoning done since the last version__
    + Flag indicating that __the area of the lot (taken from DTM) has changed by more than +/- 10% since the last version__
+ Rename fields in BBL diff report for the fields showing the new data, using similar naming convention as used for previous data set, e.g., ZD1NEW.

In [27]:
import geopandas as gpd
import pandas as pd
import cartoframes
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import time
from shapely.wkb import dumps, loads
from shapely.wkt import loads as wkt_loads 

pd.set_option('display.max_columns', 500)
print(time.strftime("%m/%d/%Y %H:%M:%S"))

10/03/2019 13:35:42


In [28]:
%load_ext dotenv
%dotenv ../.env

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [29]:
conn = create_engine(os.getenv('BUILD_ENGINE').replace('localhost', os.getenv('IP')))

In [30]:
recipe_conn = create_engine(os.getenv('RECIPE_ENGINE'))

In [31]:
# Reports the number of records that experienced a change in the value
query = '''
select field, count as diff_count, 
percent as diff_percent,  
        newnullcount, oldnullcount, 
        countnew as total_count_new, 
        countold as total_count_old, 
        (countnew-countold) as total_count_diff 
from(
    SELECT * FROM
        ztl_qc_versioncomparisoncount a 
    JOIN 
        ztl_qc_versioncomparisonnownullcount b
    USING (field)) c
JOIN
frequencychanges d
USING (field)
ORDER BY field;
'''
df_versioncomparison = pd.read_sql(sql=query, con=conn)
df_versioncomparison

Unnamed: 0,field,diff_count,diff_percent,newnullcount,oldnullcount,total_count_new,total_count_old,total_count_diff
0,commercialoverlay1,1.0,0.0,1,1,74961,74971,-10
1,commercialoverlay2,0.0,0.0,0,0,165,165,0
2,limitedheightdistrict,0.0,0.0,0,0,3037,3037,0
3,specialdistrict1,1.0,0.0,1,1,101896,101903,-7
4,specialdistrict2,1.0,0.0,0,0,81,80,1
5,specialdistrict3,1.0,0.0,1,1,0,1,-1
6,zoningdistrict1,0.0,0.0,0,0,858362,858394,-32
7,zoningdistrict2,9.0,0.0,2,2,19865,19858,7
8,zoningdistrict3,1.0,0.0,1,1,206,207,-1
9,zoningdistrict4,1.0,0.0,1,1,13,14,-1


In [32]:
# Reports the full zoning comarison table
query = '''
SELECT bblnew, bblprev, 
        zd1new, zd1prev, zd2new, zd2prev, zd3new, zd3prev, zd4new, zd4prev, 
        zmcnew, zmcprev, zmnnew, zmnprev, 
        co1new, co1prev, co2new, co2prev, 
        sd1new, sd1prev, sd2new, sd2prev, sd3new, sd3prev, 
        lhdnew, lhdprev, 
        inzonechange, mihflag, mihoption, 
        geom from bbldiffs;
'''
bbldiffs = gpd.GeoDataFrame.from_postgis(sql=query, con=conn)
bbldiffs

Unnamed: 0,bblnew,bblprev,zd1new,zd1prev,zd2new,zd2prev,zd3new,zd3prev,zd4new,zd4prev,zmcnew,zmcprev,zmnnew,zmnprev,co1new,co1prev,co2new,co2prev,sd1new,sd1prev,sd2new,sd2prev,sd3new,sd3prev,lhdnew,lhdprev,inzonechange,mihflag,mihoption,geom
0,4125420003,4125420003,R5D,R5D,,R3X,,,,,,,19A,19A,C2-3,C2-3,,,,,,,,,,,,,,(POLYGON ((-73.76193006001343 40.6791168105193...
1,2000008900,2000008900,PARK,PARK,R1-2,R1-2,,R8A,,R8,Y,Y,1C,1C,,,,,NA-2,NA-2,J,J,,C,,,,,,(POLYGON ((-73.91287516504593 40.8436027031722...
2,1010100041,1010100041,C5-1,C5-1,C5-3,C5-3,,,,,,,8C,8C,,,,,MiD,,,,,,,,,,,(POLYGON ((-73.9777484313263 40.76498839474848...
3,3036690022,3036690022,C4-4L,C4-4L,,,,,,,,,17C,17C,,,,,EC-5,EC-5,EC-6,,,,,,,True,Option 1 and Deep Affordability Option,(POLYGON ((-73.89730704253353 40.6768316332739...
4,4090060021,4090060021,R4B,R6B,R6B,R4B,,,,,,,18A,18A,C2-3,C2-3,,,,,,,,,,,,,,(POLYGON ((-73.85973760988671 40.6839143556305...
5,4017400047,4017400047,R5A,R5A,R5,,,,,,,,9D,9D,,,,,,,,,,,,,,,,(POLYGON ((-73.86821214925357 40.7532895691665...
6,3066790042,3066790042,R5B,R5B,R4-1,,,,,,,,28C,28C,,,,,,,,,,,,,,,,(POLYGON ((-73.97344719688098 40.6021755054876...
7,4103930056,4103930056,R2,R5B,R5B,R2,,,,,,,15B,15B,,,,,,,,,,,,,,,,(POLYGON ((-73.76252130286763 40.6962809611559...
8,3032370023,3032370023,R7A,R7A,R6B,,,,,,,,13B,13B,,,,,,,,,,,,,,True,Option 1 and Deep Affordability Option,"(POLYGON ((-73.9200200671126 40.7036335495867,..."
9,3018130041,3018130041,R6A,R6A,R6B,,,,,,,,17A,17A,,,,,,,,,,,,,,,,(POLYGON ((-73.94749636486402 40.6861963915373...


In [33]:
version_old = '2019/09/03'
version_new = 'latest'

In [34]:
# Reports lots that had an area change
query = f'''
with dtm_compare as (
    SELECT bbl, geom_new, geom_old, (case when geom_new = geom_old then 0 else 1 end) flag 
    FROM 
    (SELECT bbl, ST_Multi(ST_Union(f.wkb_geometry)) as geom_new 
        FROM dof_dtm."{version_new}" f GROUP BY bbl ) a
    JOIN 
    (SELECT bbl, ST_Multi(ST_Union(f.wkb_geometry)) as geom_old 
        FROM dof_dtm."{version_old}" f GROUP BY bbl ) b
    USING(bbl))
, changed as (
    SELECT *, (st_area(geom_new)-st_area(geom_old))/st_area(geom_old) as area_diff 
    FROM dtm_compare
    WHERE flag = 1)
SELECT * FROM changed WHERE area_diff > 0.1 OR area_diff < -0.1;
'''

In [35]:
bbl_areachange = gpd.GeoDataFrame.from_postgis(sql=query, con=recipe_conn, geom_col='geom_new')

In [36]:
bbl_areachange_new = bbl_areachange[['bbl', 'geom_new', 'area_diff']]

In [37]:
bbl_areachange_old = bbl_areachange[['bbl', 'geom_old', 'area_diff']]
bbl_areachange_old.loc[:, 'geom_old'] = bbl_areachange_old['geom_old'].apply(lambda x: wkt_loads(loads(x,  hex=True).wkt))
bbl_areachange_old=gpd.GeoDataFrame(bbl_areachange_old, geometry='geom_old')

In [45]:
from ipyleaflet import Map, basemaps, GeoData, basemap_to_tiles, LayersControl, FullScreenControl, Popup, Marker
from ipywidgets import HTML

m = Map(center=(40.730610, -73.935242), zoom=11)

dark_matter_layer = basemap_to_tiles(basemaps.CartoDB.DarkMatter, close_popup_on_click=True)
m.add_layer(dark_matter_layer)

In [46]:
new = GeoData(geo_dataframe = bbl_areachange_new, 
              style={'color': 'green', 'opacity':10, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.3},
              name = 'new')

old = GeoData(geo_dataframe = bbl_areachange_old, 
              style={'color': 'red', 'opacity':10, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.3}, 
              name = 'old')

In [47]:
m.add_layer(new)
m.add_layer(old)
m.add_control(LayersControl())
m.add_control(FullScreenControl())

In [48]:
for i in range(bbl_areachange_new.shape[0]):
    center = (bbl_areachange_new.loc[i, 'geom_new'].centroid.y,
              bbl_areachange_new.loc[i, 'geom_new'].centroid.x)
    bbl = str(list(bbl_areachange_new['bbl'])[i])
    area_change = round(list(bbl_areachange_new['area_diff'])[i]*100, 2)
    marker = Marker(location=center)
    m.add_layer(marker)
    marker.popup = HTML(value=f'''<a href=https://zola.planning.nyc.gov/bbl/{bbl}> {bbl} </a>
                                  <p> area change: {area_change}% </p>''')

In [49]:
m

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …