# Zoningtaxlots QAQC

### Objectives:
+ Combine `qc_versioncomparison.csv` with `qc_versioncomparisonnownullcount.csv`; sort by field name. (Resulting report will show __if the value changed to a different value or to/from a null value__).
+ Add a __difference__ column to `qc_frequencychanges.csv`; sort by field name.
+ Add two fields to the BBL diff report
    + Flag indicating that __lot intersects with a rezoning done since the last version__
    + Flag indicating that __the area of the lot (taken from DTM) has changed by more than +/- 10% since the last version__
+ Rename fields in BBL diff report for the fields showing the new data, using similar naming convention as used for previous data set, e.g., ZD1NEW.

In [1]:
import geopandas as gpd
import pandas as pd
import os
from sqlalchemy import create_engine
from pathlib import Path
import time
from shapely.wkb import dumps, loads
from shapely.wkt import loads as wkt_loads 

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 50)
print(time.strftime("%m/%d/%Y %H:%M:%S"))

01/07/2020 20:54:51


In [2]:
%load_ext dotenv
%dotenv ../.env

In [3]:
conn = create_engine(os.getenv('BUILD_ENGINE'))

In [4]:
recipe_conn = create_engine(os.getenv('RECIPE_ENGINE'))

In [5]:
# Reports the number of records that experienced a change in the value
query = '''
select field, count as diff_count, 
percent as diff_percent,  
        newnullcount, oldnullcount, 
        countnew as total_count_new, 
        countold as total_count_old, 
        (countnew-countold) as total_count_diff 
from(
    SELECT * FROM
        ztl_qc_versioncomparisoncount a 
    JOIN 
        ztl_qc_versioncomparisonnownullcount b
    USING (field)) c
JOIN
frequencychanges d
USING (field)
ORDER BY field;
'''
df_versioncomparison = pd.read_sql(sql=query, con=conn)
df_versioncomparison

Unnamed: 0,field,diff_count,diff_percent,newnullcount,oldnullcount,total_count_new,total_count_old,total_count_diff
0,commercialoverlay1,2.0,0.0,0,0,74945,74952,-7
1,commercialoverlay2,1.0,0.0,0,0,166,165,1
2,limitedheightdistrict,0.0,0.0,0,0,3037,3037,0
3,specialdistrict1,7.0,0.0,5,5,101891,101895,-4
4,specialdistrict2,0.0,0.0,0,0,80,80,0
5,specialdistrict3,0.0,0.0,0,0,0,0,0
6,zoningdistrict1,13.0,0.0,0,0,858305,858314,-9
7,zoningdistrict2,53.0,0.0,34,34,19830,19846,-16
8,zoningdistrict3,3.0,0.0,0,0,210,206,4
9,zoningdistrict4,0.0,0.0,0,0,13,13,0


In [6]:
# Reports the full zoning comarison table
query = '''
SELECT bblnew, bblprev, 
        zd1new, zd1prev, zd2new, zd2prev, zd3new, zd3prev, zd4new, zd4prev, 
        zmcnew, zmcprev, zmnnew, zmnprev, 
        co1new, co1prev, co2new, co2prev, 
        sd1new, sd1prev, sd2new, sd2prev, sd3new, sd3prev, 
        lhdnew, lhdprev, 
        inzonechange, mihflag, mihoption, 
        geom from bbldiffs;
'''
bbldiffs = gpd.GeoDataFrame.from_postgis(sql=query, con=conn)
bbldiffs

Unnamed: 0,bblnew,bblprev,zd1new,zd1prev,zd2new,zd2prev,zd3new,zd3prev,zd4new,zd4prev,zmcnew,zmcprev,zmnnew,zmnprev,co1new,co1prev,co2new,co2prev,sd1new,sd1prev,sd2new,sd2prev,sd3new,sd3prev,lhdnew,lhdprev,inzonechange,mihflag,mihoption,geom
0,3037670014,3037670014,R6,C4-3,C4-3,R6,,,,,,,17D,17D,,,,,,,,,,,,,,,,"MULTIPOLYGON (((-73.89962 40.66817, -73.89963 ..."
1,3008160036,3008160036,R6A,R6B,R6B,R6A,,,,,,,22A,22A,,,,,,,,,,,,,,,,"MULTIPOLYGON (((-74.01027 40.64225, -74.01034 ..."
2,1007090017,1007090017,PARK,C6-2,C6-4,,C6-2,,,,,,8D,8D,,,,,HY,HY,,,,,,,,,,"MULTIPOLYGON (((-73.99862 40.75757, -73.99880 ..."
3,4021140033,4021140033,R4B,R4B,R7-1,,,,,,,,14A,14A,,,,,,,,,,,,,,,,"MULTIPOLYGON (((-73.85386 40.73082, -73.85372 ..."
4,3008400042,3008400042,R6A,R6A,,R6B,,,,,,,22A,22A,C2-4,C2-4,,,,,,,,,,,,,,"MULTIPOLYGON (((-74.01230 40.64037, -74.01200 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,3008400041,3008400041,R6A,R6A,,R6B,,,,,,,22A,22A,C2-4,C2-4,,,,,,,,,,,,,,"MULTIPOLYGON (((-74.01200 40.64019, -74.01230 ..."
79,1007080048,1007080048,M1-5,M1-5,C2-8,,,,,,,,8D,8D,,,,,HY,HY,,,,,,,,,,"MULTIPOLYGON (((-73.99908 40.75695, -73.99900 ..."
80,4021710057,4021710057,R4,R6A,R6A,R4,,,,,,,14A,14A,C1-3,C1-3,,,,,,,,,,,,,,"MULTIPOLYGON (((-73.84849 40.73213, -73.84835 ..."
81,4021680001,4021680001,R6A,R6A,R4,,,,,,,,14A,14A,C1-3,C1-3,,,,,,,,,,,,,,"MULTIPOLYGON (((-73.84951 40.73412, -73.84938 ..."


## DTM Comparison

In [9]:
version_old = '2019/12/02'
version_new = '2020/01/07'

In [10]:
# Reports lots that had an area change
query = f'''
with dtm_compare as (
    SELECT bbl, geom_new, geom_old, (case when geom_new = geom_old then 0 else 1 end) flag 
    FROM 
    (SELECT bbl, ST_Multi(ST_Union(f.wkb_geometry)) as geom_new 
        FROM dof_dtm."{version_new}" f GROUP BY bbl ) a
    JOIN 
    (SELECT bbl, ST_Multi(ST_Union(f.wkb_geometry)) as geom_old 
        FROM dof_dtm."{version_old}" f GROUP BY bbl ) b
    USING(bbl))
, changed as (
    SELECT *, (st_area(geom_new)-st_area(geom_old))/st_area(geom_old) as area_diff 
    FROM dtm_compare
    WHERE flag = 1)
SELECT * FROM changed WHERE area_diff > 0.1 OR area_diff < -0.1;
'''

In [11]:
bbl_areachange = gpd.GeoDataFrame.from_postgis(sql=query, con=recipe_conn, geom_col='geom_new')

In [12]:
bbl_areachange_new = bbl_areachange[['bbl', 'geom_new', 'area_diff']]

In [13]:
bbl_areachange_old = bbl_areachange[['bbl', 'geom_old', 'area_diff']]
bbl_areachange_old.loc[:, 'geom_old'] = bbl_areachange_old['geom_old'].apply(lambda x: wkt_loads(loads(x,  hex=True).wkt))
bbl_areachange_old=gpd.GeoDataFrame(bbl_areachange_old, geometry='geom_old')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [14]:
from ipyleaflet import Map, basemaps, GeoData, basemap_to_tiles, LayersControl, FullScreenControl, Popup, Marker
from ipywidgets import HTML

m = Map(center=(40.730610, -73.935242), zoom=11)

dark_matter_layer = basemap_to_tiles(basemaps.CartoDB.DarkMatter, close_popup_on_click=True)
m.add_layer(dark_matter_layer)

In [15]:
new = GeoData(geo_dataframe = bbl_areachange_new, 
              style={'color': 'green', 'opacity':10, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.3},
              name = 'new')

old = GeoData(geo_dataframe = bbl_areachange_old, 
              style={'color': 'red', 'opacity':10, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.3}, 
              name = 'old')

In [16]:
m.add_layer(new)
m.add_layer(old)
m.add_control(LayersControl())
m.add_control(FullScreenControl())

In [17]:
for i in range(bbl_areachange_new.shape[0]):
    center = (bbl_areachange_new.loc[i, 'geom_new'].centroid.y,
              bbl_areachange_new.loc[i, 'geom_new'].centroid.x)
    bbl = str(list(bbl_areachange_new['bbl'])[i])
    area_change = round(list(bbl_areachange_new['area_diff'])[i]*100, 2)
    marker = Marker(location=center)
    m.add_layer(marker)
    marker.popup = HTML(value=f'''<a href=https://zola.planning.nyc.gov/bbl/{bbl}> {bbl} </a>
                                  <p> area change: {area_change}% </p>''')

In [18]:
m

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …