Goal is to look into historical zoning data from West Harlem, MoHi, and nearby areas. Ultimately should try to find some features of zoning data that can be correlated to resource allocation data.

In [2]:
%matplotlib inline
import numpy as np
import scipy as sp
import pandas as pd
import os
from sodapy import Socrata

In [3]:
socrata_domain = 'data.cityofnewyork.us'
socrata_dataset_identifier = '64uk-42ks'
socrata_token = None

In [4]:
client = Socrata(socrata_domain, socrata_token)
print("Domain: {domain:}\nSession: {session:}\nURI Prefix: {uri_prefix:}".format(**client.__dict__))



Domain: data.cityofnewyork.us
Session: <requests.sessions.Session object at 0x00000234366C3148>
URI Prefix: https://


In [5]:
results = client.get(socrata_dataset_identifier)
df = pd.DataFrame.from_dict(results)
display(df.head())
print(df.size)

Unnamed: 0,borough,block,lot,cd,ct2010,cb2010,schooldist,council,zipcode,firecomp,...,appdate,ownertype,histdist,dcpedited,spdist1,zonedist2,firm07_flag,ltdheight,landmark,zonedist3
0,BK,834,46,307,106,2001,20,38,11220,L114,...,,,,,,,,,,
1,QN,4042,106,407,929,3000,25,19,11356,E297,...,,,,,,,,,,
2,BK,4679,17,317,866,3002,18,41,11203,L174,...,,,,,,,,,,
3,BK,7831,6,318,676,1002,22,46,11234,L159,...,,,,,,,,,,
4,BK,7831,7,318,676,1002,22,46,11234,L159,...,,,,,,,,,,


85000


In [6]:
Manhattan_df=df.loc[df['borough'] == "MN"]
display(Manhattan_df.head())
print(Manhattan_df.size)

Unnamed: 0,borough,block,lot,cd,ct2010,cb2010,schooldist,council,zipcode,firecomp,...,appdate,ownertype,histdist,dcpedited,spdist1,zonedist2,firm07_flag,ltdheight,landmark,zonedist3
67,MN,2061,125,109,231.0,3004,6,9,10031,L023,...,,,Hamilton Heights / Sugar Hill Historic District,t,,,,,,
144,MN,967,2,106,86.01,2002,2,4,10016,L007,...,2010-02-04T00:00:00.000,X,,,,,1.0,,,
170,MN,780,71,105,101.0,1005,2,3,10001,L024,...,,,,,,,,,,
223,MN,1226,43,107,181.0,1000,3,6,10025,L022,...,,,,,,,,,,
227,MN,1604,13,111,168.0,2002,4,8,10029,E053,...,,X,,,,,,,,


2720


In [7]:
council7_df=Manhattan_df.loc[Manhattan_df['council'] == "7"] ##Council 7 is Morningside Heights, West Harlem, Washington Heights, and part of the Upper West Side  
display(council7_df.head())
print(council7_df.size)
print(council7_df.loc[df['borough'] != "MN"].size)

Unnamed: 0,borough,block,lot,cd,ct2010,cb2010,schooldist,council,zipcode,firecomp,...,appdate,ownertype,histdist,dcpedited,spdist1,zonedist2,firm07_flag,ltdheight,landmark,zonedist3
446,MN,2073,118,109,225,3000,6,7,10031,E080,...,,,,,,,,,,
746,MN,2060,7502,109,231,2004,6,7,10031,L023,...,2006-11-15T00:00:00.000,,,,,,,,,
767,MN,1993,92,109,211,3000,5,7,10027,E047,...,,X,,,,,,,,
957,MN,1982,68,109,219,1014,5,7,10027,E037,...,,,,,125th,,,,,


340
0


In [8]:
print(council7_df.keys())

Index(['borough', 'block', 'lot', 'cd', 'ct2010', 'cb2010', 'schooldist',
       'council', 'zipcode', 'firecomp', 'policeprct', 'healtharea',
       'sanitboro', 'sanitsub', 'address', 'zonedist1', 'overlay1',
       'splitzone', 'bldgclass', 'landuse', 'easements', 'ownername',
       'lotarea', 'bldgarea', 'comarea', 'resarea', 'officearea', 'retailarea',
       'garagearea', 'strgearea', 'factryarea', 'otherarea', 'areasource',
       'numbldgs', 'numfloors', 'unitsres', 'unitstotal', 'lotfront',
       'lotdepth', 'bldgfront', 'bldgdepth', 'ext', 'proxcode', 'irrlotcode',
       'lottype', 'bsmtcode', 'assessland', 'assesstot', 'exempttot',
       'yearbuilt', 'yearalter1', 'yearalter2', 'builtfar', 'residfar',
       'commfar', 'facilfar', 'borocode', 'bbl', 'tract2010', 'xcoord',
       'ycoord', 'latitude', 'longitude', 'zonemap', 'sanborn', 'taxmap',
       'plutomapid', 'version', 'sanitdistrict', 'healthcenterdistrict',
       'geom', 'pfirm15_flag', 'condono', 'zmcode', 'ap

In [9]:
print(council7_df['landuse'])

446    2
746    2
767    4
957    4
Name: landuse, dtype: object


In [10]:
print(council7_df[['yearbuilt','yearalter1','yearalter2']])

    yearbuilt yearalter1 yearalter2
446      1910       1986          0
746      1926       1957       2005
767      1926          0          0
957      1910          0          0


Filtering down directly to Community Board 109:

In [11]:
cd109_df=df.loc[df['cd'] == "109"] 
display(cd109_df.head())
print(cd109_df.size)

Unnamed: 0,borough,block,lot,cd,ct2010,cb2010,schooldist,council,zipcode,firecomp,...,appdate,ownertype,histdist,dcpedited,spdist1,zonedist2,firm07_flag,ltdheight,landmark,zonedist3
67,MN,2061,125,109,231,3004,6,9,10031,L023,...,,,Hamilton Heights / Sugar Hill Historic District,t,,,,,,
446,MN,2073,118,109,225,3000,6,7,10031,E080,...,,,,,,,,,,
746,MN,2060,7502,109,231,2004,6,7,10031,L023,...,2006-11-15T00:00:00.000,,,,,,,,,
767,MN,1993,92,109,211,3000,5,7,10027,E047,...,,X,,,,,,,,
957,MN,1982,68,109,219,1014,5,7,10027,E037,...,,,,,125th,,,,,


425


df has 1000 rows but cd109_df only has 5...?

Filtering by boolean expression for rows of interest:

In [12]:
iscd109 = df["cd"]=="109"
cd109_df = df[iscd109]
display(cd109_df.head())
print(cd109_df.size)

Unnamed: 0,borough,block,lot,cd,ct2010,cb2010,schooldist,council,zipcode,firecomp,...,appdate,ownertype,histdist,dcpedited,spdist1,zonedist2,firm07_flag,ltdheight,landmark,zonedist3
67,MN,2061,125,109,231,3004,6,9,10031,L023,...,,,Hamilton Heights / Sugar Hill Historic District,t,,,,,,
446,MN,2073,118,109,225,3000,6,7,10031,E080,...,,,,,,,,,,
746,MN,2060,7502,109,231,2004,6,7,10031,L023,...,2006-11-15T00:00:00.000,,,,,,,,,
767,MN,1993,92,109,211,3000,5,7,10027,E047,...,,X,,,,,,,,
957,MN,1982,68,109,219,1014,5,7,10027,E037,...,,,,,125th,,,,,


425
