In [23]:
import pandas as pd
import requests
import json

#### Parse 'nyc_geo.json' into Dataframe

In [24]:
f = open('../data/nyc_geo.json')
data = json.load(f)

In [25]:
bk_neighboorhoods = {}

In [26]:
# will parse only brooklyn to reduce scope of project
for i in data['features']:
    if i['properties']['borough'] == 'Brooklyn':
        bk_neighboorhoods[i['properties']['name']] = { 
                                                      'latitude': i['geometry']['coordinates'][1],
                                                      'longitude': i['geometry']['coordinates'][0],
                                                     }

In [27]:
df = pd.DataFrame.from_dict(bk_neighboorhoods).T

In [28]:
df.head()

Unnamed: 0,latitude,longitude
Bay Ridge,40.625801,-74.030621
Bensonhurst,40.611009,-73.99518
Sunset Park,40.645103,-74.010316
Greenpoint,40.730201,-73.954241
Gravesend,40.59526,-73.973471


---

#### Loading brooklyn housing data

In [29]:
housing_df = pd.read_csv('../data/nyc_housing_prices_jul_2020.csv')
brooklyn_housing_data = housing_df[housing_df['borough'] == 'brooklyn']
brooklyn_housing_data.head(2)

Unnamed: 0,borough,neigborhood,studio,1_bedroom,2_bedroom,3_bedroom
33,brooklyn,Bedford-Stuyvesant,"$2,044","$2,297","$2,611","$3,214"
34,brooklyn,Boerum Hill,"$2,092","$2,854","$4,356","$4,921"


---

#### Google places API data (restaurants)

In [30]:
API_KEY = 'AIzaSyDIR1htuqa4Pctefq6f7JpH3--bmm5dZHg'

In [31]:
resp = requests.get(f'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=40.730201%2C-73.954241&radius=1500&type=restaurant&key={API_KEY}')

In [32]:
len(resp.json()['results'])

20

In [34]:
def get_restaruant_count(lat, long):
    resp = requests.get(f'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat}%2C{long}&radius=1500&type=restaurant&key={API_KEY}')
    return len(resp.json()['results'])

In [35]:
df['restaruant_count'] = df.apply(lambda x: get_restaruant_count(x.latitude, x.longitude), axis=1)

In [36]:
df.restaruant_count.value_counts()

20    68
13     1
17     1
Name: restaruant_count, dtype: int64

#### Squirrel Data

In [38]:
squirrel_df = pd.read_csv('../data/extra/2018_Central_Park_Squirrel_Census_-_Squirrel_Data.csv')

In [39]:
squirrel_df.shape

(3023, 36)

In [42]:
squirrel_df.columns

Index(['X', 'Y', 'Unique Squirrel ID', 'Hectare', 'Shift', 'Date',
       'Hectare Squirrel Number', 'Age', 'Primary Fur Color',
       'Highlight Fur Color', 'Combination of Primary and Highlight Color',
       'Color notes', 'Location', 'Above Ground Sighter Measurement',
       'Specific Location', 'Running', 'Chasing', 'Climbing', 'Eating',
       'Foraging', 'Other Activities', 'Kuks', 'Quaas', 'Moans', 'Tail flags',
       'Tail twitches', 'Approaches', 'Indifferent', 'Runs from',
       'Other Interactions', 'Lat/Long', 'Zip Codes', 'Community Districts',
       'Borough Boundaries', 'City Council Districts', 'Police Precincts'],
      dtype='object')

In [46]:
squirrel_features = squirrel_df[['X', 'Y', 'Shift', 'Age', 'Primary Fur Color', 'Location', 'Lat/Long']]

In [47]:
squirrel_df.head()

Unnamed: 0,X,Y,Unique Squirrel ID,Hectare,Shift,Date,Hectare Squirrel Number,Age,Primary Fur Color,Highlight Fur Color,...,Approaches,Indifferent,Runs from,Other Interactions,Lat/Long,Zip Codes,Community Districts,Borough Boundaries,City Council Districts,Police Precincts
0,-73.956134,40.794082,37F-PM-1014-03,37F,PM,10142018,3,,,,...,False,False,False,,POINT (-73.9561344937861 40.7940823884086),,19,4,19,13
1,-73.968857,40.783783,21B-AM-1019-04,21B,AM,10192018,4,,,,...,False,False,False,,POINT (-73.9688574691102 40.7837825208444),,19,4,19,13
2,-73.974281,40.775534,11B-PM-1014-08,11B,PM,10142018,8,,Gray,,...,False,False,False,,POINT (-73.97428114848522 40.775533619083),,19,4,19,13
3,-73.959641,40.790313,32E-PM-1017-14,32E,PM,10172018,14,Adult,Gray,,...,False,False,True,,POINT (-73.9596413903948 40.7903128889029),,19,4,19,13
4,-73.970268,40.776213,13E-AM-1017-05,13E,AM,10172018,5,Adult,Gray,Cinnamon,...,False,False,False,,POINT (-73.9702676472613 40.7762126854894),,19,4,19,13


In [51]:
squirrel_features.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  squirrel_features.dropna(inplace=True)


In [55]:
squirrel_features

Unnamed: 0,X,Y,Shift,Age,Primary Fur Color,Location,Lat/Long
4,-73.970268,40.776213,AM,Adult,Gray,Above Ground,POINT (-73.9702676472613 40.7762126854894)
6,-73.954120,40.793181,AM,Adult,Gray,Ground Plane,POINT (-73.9541201789795 40.7931811701082)
7,-73.958269,40.791737,AM,Adult,Gray,Ground Plane,POINT (-73.9582694312289 40.7917367820255)
8,-73.967429,40.782972,PM,Adult,Gray,Ground Plane,POINT (-73.9674285955293 40.7829723919744)
9,-73.972250,40.774288,AM,Adult,Gray,Above Ground,POINT (-73.9722500196844 40.7742879599026)
...,...,...,...,...,...,...,...
3017,-73.964544,40.781160,PM,Juvenile,Cinnamon,Ground Plane,POINT (-73.9645437409662 40.7811599933331)
3018,-73.963943,40.790868,AM,Adult,Gray,Ground Plane,POINT (-73.9639431360458 40.7908677445466)
3019,-73.970402,40.782560,PM,Adult,Gray,Ground Plane,POINT (-73.9704015859639 40.7825600069973)
3020,-73.966587,40.783678,PM,Adult,Gray,Ground Plane,POINT (-73.9665871993517 40.7836775064883)
