In [1]:
import numpy as np
import pandas as pd
from pandas.io import sql
from sqlalchemy import create_engine
import os
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from pylab import rcParams

In [2]:
%load_ext dotenv
%dotenv ../.env

In [3]:
cnx = create_engine(os.getenv('EDMDB'))

In [9]:
facilities = pd.read_sql_query('''SELECT f.* FROM dcp.facilities f;''', cnx)
facilities.head()

Unnamed: 0,id,geom,uid,facname,addressnum,streetname,address,city,zipcode,boro,...,overlevel,capacity,captype,proptype,latitude,longitude,xcoord,ycoord,datasource,facsubgrp
0,1349,0101000020D7080000099B2EDC719B2E418D8275E22E19...,4f384e2e29509d47137ceda6ebf6530f,ADJ BRONX TERMINAL MARKET,,,,,,Manhattan,...,City,,,City Owned,40.8209373825321,-73.932483746857,1002937.0,238373.999999957,dcas_colp,MISCELLANEOUS USE
1,10891,0101000020D70800000C030056A4962F41CA4A34843A37...,d491ec4dabb06564040e0e01cc10c06a,EVERS,,EVERS,EVERS,BRONX,10465.0,Bronx,...,Federal,,,,40.845933611146,-73.816244721778,1035090.24524262,247527.173040469,usdot_airports,AIRPORTS AND HELIPORTS
2,2209,0101000020D7080000298D6C903A352F4131C1ECB35E33...,959f085e939096e88692ed8cbddf823d,"ARMSTRONG, LOUIS, HOUSE",,,,CORONA,11368.0,Queens,...,State,,,,40.7557191956,-73.8615,1022621.28196551,214635.90911726,nysparks_historicplaces,HISTORICAL SITES
3,19564,0101000020D7080000F321FF20C2F92F419550CD313097...,b4c74cc92ed8691e51aaafdfe5e3f580,NEW YORK CITY D.O.T. HART ISLAND FERRY SLIP,,HART ISLAND,,BRONX,10464.0,Bronx,...,City,,,,40.85428,-73.770361,1047777.06444651,250598.024317388,usdot_ports,PORTS AND FERRY LANDINGS
4,23472,0101000020D7080000B1679A00D0042E41A441F7302967...,6bd2ce56405e06811d0e1c5f6692e3e3,PIER 6,,,PIER 6,,,Manhattan,...,City,,,City Owned,40.6928955912787,-74.0021420042349,983655.999999999,191716.999999942,dcas_colp,MISCELLANEOUS USE


Some of the BBLs on the facilities table are null. Could they be backfilled using a POSTGIS query such as this one?

In [18]:
bbls_for_nulls = pd.read_sql_query('''SELECT CAST(p."BBL" AS TEXT) AS pluto_bbl, f.bbl, f.facname, f.factype
FROM dcp.facilities f, dcp.pluto202 p
WHERE f.bbl IS NULL
AND ST_Within(f.geom, p.geom)''', cnx)
bbls_for_nulls

Unnamed: 0,pluto_bbl,bbl,facname,factype
0,5044440001,,VOORLEZER'S HOUSE,STATE HISTORIC PLACE
1,1007297503,,"450 WEST 33 STREET, NEW YORK, NY 10001",PRIVATELY OWNED PUBLIC SPACE
2,5013010001,,ARLINGTON MARSH PARK,UNDEVELOPED
3,3023487501,,"AUSTIN, NICHOLS & COMPANY WAREHOUSE",STATE HISTORIC PLACE
4,2056500001,,BARTOW-PELL MANSION AND CARRIAGE HOUSE,STATE HISTORIC PLACE
...,...,...,...,...
70,5023590001,,WILLOWBROOK PARKWAY,PARKWAY
71,4142600001,,TRANS WORLD AIRLINES FLIGHT CENTER,STATE HISTORIC PLACE
72,4081620097,,UDALL'S COVE AND RAVINE NATURAL RESOURCE AREA,NATURAL RESOURCE AREA
73,4081620097,,UDALL'S PARK PRESERVE,NATURE AREA


In [14]:
factype = facilities.groupby("bbl")["factype"] \
                            .count() \
                            .reset_index(name='count') \
                            .sort_values(['count'], ascending=False)
factype = factype[factype["count"] > 1]
factype.head()

Unnamed: 0,bbl,count
1311,1007860001,39
2736,1013730001,30
4929,2026050040,28
14074,4096480001,25
1819,1009620100,25


In [19]:
potential_factype_dups = pd.read_sql('''WITH potential_dups AS (
	SELECT bbl, factype, count(*)
	FROM dcp.facilities f
	WHERE bbl IS NOT NULL
	GROUP BY bbl, factype
	HAVING COUNT(*) > 1
	ORDER BY COUNT(*) DESC
)
SELECT f.bbl, f.facdomain, f.facgroup, f.facsubgrp, f.factype, f.facname, f.id
FROM dcp.facilities f, potential_dups d
WHERE f.bbl = d.bbl
AND f.factype = d.factype
ORDER BY f.bbl;''', cnx)
potential_factype_dups

Unnamed: 0,bbl,facdomain,facgroup,facsubgrp,factype,facname,id
0,1000010010,"PARKS, GARDENS, AND HISTORICAL SITES",PARKS AND PLAZAS,PARKS,CITY-STATE PARK,NOLAN PARK,19634
1,1000010010,"PARKS, GARDENS, AND HISTORICAL SITES",HISTORICAL SITES,HISTORICAL SITES,STATE HISTORIC PLACE,"BLOCK HOUSE, THE",3461
2,1000010010,"PARKS, GARDENS, AND HISTORICAL SITES",PARKS AND PLAZAS,PARKS,CITY-STATE PARK,HAMMOCK GROVE,13186
3,1000010010,"PARKS, GARDENS, AND HISTORICAL SITES",PARKS AND PLAZAS,PARKS,CITY-STATE PARK,SOUTH BATTERY,27210
4,1000010010,"PARKS, GARDENS, AND HISTORICAL SITES",HISTORICAL SITES,HISTORICAL SITES,STATE HISTORIC PLACE,FORT JAY,11764
...,...,...,...,...,...,...,...
3904,5076640001,HEALTH AND HUMAN SERVICES,HEALTH CARE,MENTAL HEALTH,SUPPORT MENTAL HEALTH,PROJECT HOSPITALITY INC MENTAL HEALTH SERVICES,24037
3905,5076640001,"EDUCATION, CHILD WELFARE, AND YOUTH",CHILD SERVICES AND WELFARE,CHILD NUTRITION,FEEDING SITE,PS 3 ANNEX,24280
3906,5077100400,"PARKS, GARDENS, AND HISTORICAL SITES",PARKS AND PLAZAS,PRESERVES AND CONSERVATION AREAS,UNIQUE AREA,BUTLER MANOR WOODS,5136
3907,5077100400,"PARKS, GARDENS, AND HISTORICAL SITES",PARKS AND PLAZAS,PRESERVES AND CONSERVATION AREAS,UNIQUE AREA,BUTLER MANOR WOODS,5121
