In [63]:
import pandas as pd
import requests
import json
import numpy as np

#### Parse 'nyc_geo.json' into Dataframe

In [64]:
f = open('../data/nyc_geo.json')
data = json.load(f)

In [65]:
bk_neighboorhoods = {}

In [66]:
# will parse only brooklyn to reduce scope of project
count = 0 
for i in data['features']:
    if i['properties']['borough'] == 'Brooklyn':
        bk_neighboorhoods[count] = { 
                                                      'latitude': i['geometry']['coordinates'][1],
                                                      'longitude': i['geometry']['coordinates'][0],
                                                      'neighborhood': i['properties']['name']
                                                     }
        count += 1

In [67]:
df = pd.DataFrame.from_dict(bk_neighboorhoods).T

In [68]:
df

Unnamed: 0,latitude,longitude,neighborhood
0,40.625801,-74.030621,Bay Ridge
1,40.611009,-73.99518,Bensonhurst
2,40.645103,-74.010316,Sunset Park
3,40.730201,-73.954241,Greenpoint
4,40.59526,-73.973471,Gravesend
...,...,...,...
65,40.703176,-73.988753,Dumbo
66,40.598525,-73.959185,Homecrest
67,40.681999,-73.890346,Highland Park
68,40.609378,-73.948415,Madison


In [69]:
df['latitude'] = pd.to_numeric(df['latitude'])

In [70]:
type(df.iloc[0]['latitude'])

numpy.float64

In [73]:
def neighborhood_classifier(lat):
    # print(lat)
    neighbor = df[df['latitude'] < lat]['latitude'].idxmax()
    return df.iloc[neighbor]['neighborhood']

In [77]:
neighborhood_classifier(rideshare.iloc[0]['start_lat'])

'Prospect Park South'

In [75]:
df.describe()

Unnamed: 0,latitude
count,70.0
mean,40.651089
std,0.039512
min,40.574293
25%,40.616786
50%,40.653845
75%,40.681634
max,40.730201


---

#### Loading brooklyn housing data

In [60]:
housing_df = pd.read_csv('../data/nyc_housing_prices_jul_2020.csv')
brooklyn_housing_data = housing_df[housing_df['borough'] == 'brooklyn']
brooklyn_housing_data.head(2)

Unnamed: 0,borough,neigborhood,studio,1_bedroom,2_bedroom,3_bedroom
33,brooklyn,Bedford-Stuyvesant,"$2,044","$2,297","$2,611","$3,214"
34,brooklyn,Boerum Hill,"$2,092","$2,854","$4,356","$4,921"


In [66]:
brooklyn_housing_data.rename(columns={'neigborhood': 'neighborhood'}, inplace=True)

In [64]:
merged_df = df.merge(brooklyn_housing_data, how='inner', on='neighborhood')

In [118]:
merged_df.head(2)

Unnamed: 0,latitude,longitude,neighborhood,borough,studio,1_bedroom,2_bedroom,3_bedroom
0,40.645103,-74.010316,Sunset Park,brooklyn,"$1,400","$1,912","$2,300","$2,500"
1,40.730201,-73.954241,Greenpoint,brooklyn,"$2,347","$2,812","$3,708","$5,278"


---

#### Google places API data (restaurants)

In [8]:
API_KEY = 'AIzaSyDIR1htuqa4Pctefq6f7JpH3--bmm5dZHg'

In [9]:
resp = requests.get(f'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=40.730201%2C-73.954241&radius=1500&type=restaurant&key={API_KEY}')

In [10]:
len(resp.json()['results'])

20

In [11]:
def get_restaruant_count(lat, long):
    resp = requests.get(f'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat}%2C{long}&radius=1500&type=restaurant&key={API_KEY}')
    return len(resp.json()['results'])

In [12]:
df['restaruant_count'] = df.apply(lambda x: get_restaruant_count(x.latitude, x.longitude), axis=1)

#### Squirrel Data

In [43]:
squirrel_df = pd.read_csv('../data/extra/2018_Central_Park_Squirrel_Census_-_Squirrel_Data.csv')

In [44]:
squirrel_df = squirrel_df[['X', 'Y', 'Shift', 'Age', 'Primary Fur Color', 'Location', 'Lat/Long']]

-------------------------------------

In [45]:
squirrel_df['neighborhood'] = squirrel_df.apply(lambda x: neighborhood_classifier(x.Y), axis=1)

In [48]:
type(squirrel_df.iloc[0].X)

numpy.float64

#### RideShare Data

In [101]:
rideshare = pd.read_csv('other-Lyft_B02510.csv')
rideshare.dropna(inplace=True)
rideshare.head(2)
rideshare = rideshare[['start_lat', 'start_lng']]

In [102]:
rideshare.head(2)

Unnamed: 0,start_lat,start_lng
5109,40.68379,-73.97592
5110,40.66873,-73.9328


In [98]:
rideshare = pd.to_numeric(rideshare.start_lat)

In [113]:
rideshare = rideshare[rideshare['start_lat'] > 40.574293]
rideshare = rideshare[rideshare['start_lat'] < 40.730201]

In [114]:
rideshare.describe()

Unnamed: 0,start_lat,start_lng
count,498.0,498.0
mean,40.699339,-73.960564
std,0.029769,0.056117
min,40.57552,-74.16172
25%,40.684508,-73.99585
50%,40.709525,-73.98076
75%,40.720818,-73.945943
max,40.7301,-73.74363


In [100]:
df.describe()

Unnamed: 0,latitude
count,70.0
mean,40.651089
std,0.039512
min,40.574293
25%,40.616786
50%,40.653845
75%,40.681634
max,40.730201


In [111]:
squirrel_df.describe()

Unnamed: 0,X,Y
count,3023.0,3023.0
mean,-73.967184,40.780853
std,0.007726,0.010285
min,-73.981159,40.764911
25%,-73.973102,40.771676
50%,-73.968594,40.778166
75%,-73.960189,40.791219
max,-73.949722,40.800119


In [115]:
rideshare.apply(lambda x: neighborhood_classifier(x.start_lat), axis=1)

5109          Cypress Hills
5110            Brownsville
5111            Cobble Hill
5113             North Side
5114        Paerdegat Basin
                ...        
267682    East Williamsburg
267683         Clinton Hill
267685    East Williamsburg
267690           North Side
267691           North Side
Length: 498, dtype: object