In [1]:
import numpy as np
import pandas as pd
from pandas.io import sql
from sqlalchemy import create_engine
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%load_ext dotenv
%dotenv ../.env

In [3]:
cnx = create_engine(os.getenv('EDMDB'))

Are there any invalid proxcode values on PLUTO? There are some NULL values.

In [4]:
proxcode_counts = pd.read_sql_query('''SELECT "ProxCode", COUNT(*)
FROM dcp.pluto202
GROUP BY "ProxCode";''', cnx)
proxcode_counts

Unnamed: 0,ProxCode,count
0,0.0,141749
1,1.0,313774
2,2.0,200383
3,3.0,200940
4,,375


In [5]:
sql.execute('''DROP TABLE IF EXISTS dcp.buildings_23;
DROP TABLE IF EXISTS dcp.attached_no_neighbor;
DROP TABLE IF EXISTS dcp.buildings_1;
DROP TABLE IF EXISTS dcp.standalone_building_with_neighbor;''', cnx)

<sqlalchemy.engine.result.ResultProxy at 0x1201fbc90>

Get buildings and geometries for lots marked as attached or semi-attached. Limit analysis to those lots having only one building.

In [6]:
sql.execute('''CREATE TABLE dcp.buildings_23 AS
    SELECT CAST(p."BBL" AS TEXT) AS bbl, bin, f.geom as building_geom, p.geom as lot_geom
    FROM dcp.bldg_footprints f, dcp.pluto202 p
    WHERE f.mpluto_bbl = CAST(p."BBL" AS TEXT)
    AND "ProxCode" IN ('2', '3')
    AND "NumBldgs" = 1;
CREATE INDEX lot_geom_idx
  ON dcp.buildings_23
  USING GIST (lot_geom);
CREATE INDEX building_geom_idx
  ON dcp.buildings_23
  USING GIST (building_geom);
''', cnx)

<sqlalchemy.engine.result.ResultProxy at 0x106284310>

Get buildings whose ProxCode is either 'attached' or 'semi-attached', yet have no buildings abutting them.
Note that ST_Intersects seemed to work better here than ST_Touches; I'm not sure why.

In [7]:
sql.execute('''CREATE TABLE dcp.attached_no_neighbor AS
SELECT b1.bbl, b1.bin, b1.building_geom, b1.lot_geom, COALESCE(x.count,0) AS count
FROM dcp.buildings_23 b1
LEFT JOIN (
	SELECT b1.bbl as bbl, COUNT(*) AS count
	FROM dcp.bldg_footprints b2, dcp.buildings_23 b1
	WHERE ST_Intersects(b2.geom, b1.building_geom)
	AND b2.mpluto_bbl <> b1.bbl
	AND b2.bin <> b1.bin
	GROUP BY b1.bbl
) x
ON b1.bbl = x.bbl
WHERE COALESCE(x.count,0) = 0;''', cnx)

<sqlalchemy.engine.result.ResultProxy at 0x1202a0b90>

5477 of 331926 buildings have no abutting building even if they are characterized as attached or semi attached. This is about 1.6%.

In [8]:
no_abutting_buildings = pd.read_sql_query('''SELECT COUNT(*) FROM dcp.attached_no_neighbor;''', cnx)
no_abutting_buildings

Unnamed: 0,count
0,5477


Of these buildings who have no attached building, how many have a building within a foot? Of the 5477, 1503 have a building within a foot away.

In [9]:
with_onefoot_neighbors = pd.read_sql_query('''SELECT a.*, ST_Distance(a.building_geom, b.geom) AS distance
FROM dcp.attached_no_neighbor a, dcp.bldg_footprints b
WHERE ST_DWithin(a.building_geom, b.geom, 1)
AND a.bin <> b.bin
AND ST_Distance(a.building_geom, b.geom) > 0
ORDER BY 6 desc;''', cnx)
with_onefoot_neighbors

Unnamed: 0,bbl,bin,building_geom,lot_geom,count,distance
0,1015790030,1089903.0,0106000020D70800000100000001030000000100000005...,0106000020D70800000100000001030000000100000009...,0,9.996955e-01
1,2031860078,2014075.0,0106000020D70800000100000001030000000100000009...,0106000020D70800000100000001030000000100000005...,0,9.996546e-01
2,4010050003,4616467.0,0106000020D70800000100000001030000000100000005...,0106000020D70800000100000001030000000100000005...,0,9.985120e-01
3,4101090020,4215653.0,0106000020D70800000100000001030000000100000007...,0106000020D70800000100000001030000000100000005...,0,9.983993e-01
4,2029940013,2120167.0,0106000020D70800000100000001030000000100000008...,0106000020D70800000100000001030000000100000006...,0,9.973484e-01
...,...,...,...,...,...,...
1498,3023770012,3251740.0,0106000020D70800000100000001030000000100000013...,0106000020D70800000100000001030000000100000005...,0,1.401444e-05
1499,3034327502,3403057.0,0106000020D7080000010000000103000000010000000B...,0106000020D7080000010000000103000000010000000B...,0,1.136166e-05
1500,2048250005,2062933.0,0106000020D70800000100000001030000000100000007...,0106000020D70800000100000001030000000100000005...,0,6.411072e-06
1501,4006910041,4011729.0,0106000020D7080000010000000103000000010000000C...,0106000020D70800000100000001030000000100000005...,0,5.629593e-06


Now create a table holding only those lots/buildings where the ProxCode = 1 (non-attached buildings).

In [10]:
sql.execute('''CREATE TABLE dcp.buildings_1 AS
SELECT CAST("BBL" AS TEXT) AS bbl, 
b.bin, 
b.geom as building_geom, 
p.geom as lot_geom
FROM dcp.pluto202 p, dcp.bldg_footprints b
WHERE "ProxCode" = '1'
AND "NumBldgs" = 1
AND CAST(p."BBL" AS TEXT) = b.mpluto_bbl;
CREATE INDEX lot_geom_idx_1
  ON dcp.buildings_1
  USING GIST (lot_geom);
CREATE INDEX building_geom_idx_1
  ON dcp.buildings_1
  USING GIST (building_geom);''', cnx)

<sqlalchemy.engine.result.ResultProxy at 0x1202a3450>

In [11]:
sql.execute('''CREATE TABLE dcp.standalone_with_neighbor AS
SELECT b1.bbl, b1.bin AS bldg1_bin, b1.building_geom, b2.bin AS footprints_bin
FROM dcp.buildings_1 b1, dcp.bldg_footprints b2
WHERE (b1.bin <> b2.bin)
AND ST_Intersects(b1.building_geom, b2.geom);''', cnx)

<sqlalchemy.engine.result.ResultProxy at 0x1202a37d0>

Importing the geometry from the buildings characterized as standalone along with neighboring buildings shows that quite a few actually abut other buildings (13627 of 162869 or 8.36%).

In [None]:
sql.execute('''CREATE TABLE dcp.standalone_with_neighbor_bf AS
SELECT b2.bin, b2.geom AS footprints_bin
FROM dcp.buildings_1 b1, dcp.bldg_footprints b2
WHERE (b1.bin <> b2.bin)
AND ST_Intersects(b1.building_geom, b2.geom);''', cnx)