In [1]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import os

In [3]:
%load_ext dotenv
%dotenv ../.env

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [4]:
cnx = create_engine(os.getenv('EDMDB'))

Find lots where building footprints does not have the same number of buildings as PLUTO.

In [21]:
df = pd.read_sql_query('''WITH footprints_count_by_bbl AS (
	SELECT mpluto_bbl,
	COUNT(*) AS bldg_footprints_count
	FROM dcp.bldg_footprints
	GROUP BY mpluto_bbl
)
SELECT CAST(bbl AS TEXT), 
address, 
landuse,
bldgclass,
ownername,
numbldgs,
bldg_footprints_count, 
numbldgs - bldg_footprints_count AS difference
FROM dcp.pluto201, footprints_count_by_bbl
WHERE bbl = CAST(mpluto_bbl AS NUMERIC)
AND numbldgs <> bldg_footprints_count
ORDER BY difference DESC;''', cnx)
df.head()

Unnamed: 0,bbl,address,landuse,bldgclass,ownername,numbldgs,bldg_footprints_count,difference
0,4163500400,,1,A8,"BREEZY POINT COOPERATIVE, INC",1861,59,1802
1,4163500300,,1,A8,BREEZY POINT COOPERA,795,21,774
2,4163400050,1 ROCKAWAY POINT BLVD,1,A8,BREEZY POINT CO-OP,515,17,498
3,4142600001,154-68 BROOKVILLE BOULEVARD,7,T1,PORT AUTHORITY OF NY AND NJ,422,5,417
4,2051410120,2049 BARTOW AVENUE,3,D4,RIVERBAY CORPORATION,251,3,248


How many lots have differences in counts?

In [8]:
df.shape[0]

21008

How often does PLUTO have greater numbers?

In [12]:
more_bldgs_on_pluto = df[(df['numbldgs'] > df['bldg_footprints_count'])]
more_bldgs_on_pluto.shape[0]

20703

How often does building footprints have greater numbers?

In [13]:
more_bldgs_on_footprints = df[(df['numbldgs'] < df['bldg_footprints_count'])]
more_bldgs_on_footprints.shape[0]

305

Does the landuse code tell us anything interesting?

In [17]:
landuse_counts = df['landuse'].value_counts()
landuse_counts

01    16717
02     2374
04      778
08      299
03      222
05      186
06      134
07      124
11       60
09       51
10       39
Name: landuse, dtype: int64

What about the building class?

In [19]:
bldgclass_counts = df['bldgclass'].str.slice(0, 1).value_counts()
bldgclass_counts

A    10240
B     6475
C     2139
R      446
S      434
D      242
K      202
M      124
G      100
W       88
E       79
O       79
V       60
F       55
U       54
Q       42
Z       34
I       28
Y       25
N       18
P       18
H       12
T        9
J        2
Name: bldgclass, dtype: int64

In [20]:
df.to_csv('../output/Building_Count_Discrepancies.csv')