# Capstone Project Notebook

### This notebook will be the primary notebook in my Capstone project based on the Foursquare API

In [1]:
### Library Imports 
import pandas as pd
pd.set_option('display.max_columns',None)
#pd.set_option('display_max_rows',None)
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from geopy.geocoders import Nominatim

import json 
from pandas.io.json import json_normalize
import folium

In [2]:
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


### Data Gathering

The first step is to acquire all the relevant data for the study. Starting with the geojson data for the New York City boroughs and neighbourhoods. Data acquired from https://geo.nyu.edu/catalog/nyu_2451_34572

Data was downloaded as a geojson file and renamed.

In [28]:
with open('nyu_geojson.json') as json_data:
    total_newyork_data = json.load(json_data)

In [29]:
total_newyork_data['features'][0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [30]:
total_neighbourhoods = total_newyork_data['features']

In [31]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighbourhoods = pd.DataFrame(columns=column_names)

In [32]:
for data in total_neighbourhoods:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighbourhoods = neighbourhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [33]:
neighbourhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [34]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighbourhoods['Borough'].unique()),
        neighbourhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


Since I am only concerned with the Manhattan neighbourhoods, I will filter the dataframe accordingly. 

In [35]:
manhattan_neighbourhoods = neighbourhoods[neighbourhoods['Borough']=='Manhattan'].reset_index(drop=True)

In [36]:
manhattan_neighbourhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688
5,Manhattan,Manhattanville,40.816934,-73.957385
6,Manhattan,Central Harlem,40.815976,-73.943211
7,Manhattan,East Harlem,40.792249,-73.944182
8,Manhattan,Upper East Side,40.775639,-73.960508
9,Manhattan,Yorkville,40.77593,-73.947118


Get the coordinates for Manhattan Island and use it to centre the map

In [37]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [82]:
manhattan_map = folium.Map( location=[latitude,longitude], zoom_start = 11)

In [83]:
manhattan_map

In [84]:
with open('nyu-NTA.json') as json_data:
    data = json.load(json_data)
data['features'][0]['properties']['boroname']

'Bronx'

In [85]:
ls = []
for i in data['features']:
    if i['properties']['boroname'] == 'Manhattan':
        ls.append(i)

In [86]:
data_manhattan = data.copy()

data_manhattan['features'] = ls

In [87]:
columns = ['Neighbourhood', 'Latitude','Longitude']
manhattan_names = pd.DataFrame(columns=columns)

In [88]:
for i in data_manhattan['features']:
    
    
    neighbourhood = i['properties']["ntaname"]
    lat = (i['properties']["bbox"][1] + i['properties']["bbox"][3])/2
    long = (i['properties']["bbox"][0] + i['properties']["bbox"][2])/2
    manhattan_names = manhattan_names.append({'Neighbourhood':neighbourhood,
                           'Latitude':lat,
                           'Longitude':long}, ignore_index=True
    )

In [93]:
with open('nyccommunitydistricts.json') as json_data:
    dat = json.load(json_data)
dat

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'id': 0,
   'properties': {'communityDistrict': 101,
    '@id': 'http://nyc.pediacities.com/Resource/CommunityDistrict/101'},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-74.01115034338935, 40.725777216880076],
      [-74.01081238260726, 40.72578980255575],
      [-73.99931241700145, 40.71755024177738],
      [-74.0005783921612, 40.71557090292421],
      [-74.0004546306596, 40.71436504759887],
      [-74.0009317433238, 40.71326325304528],
      [-74.00086781464309, 40.71156815886214],
      [-74.00050986097233, 40.710984270453366],
      [-74.0014714023208, 40.70974655432324],
      [-73.99919451174895, 40.70794737635146],
      [-74.00118685262828, 40.70685982577175],
      [-74.00098276757775, 40.706416712992926],
      [-74.00140666691954, 40.70617938802063],
      [-74.0005829704798, 40.70543393364888],
      [-74.00143661245443, 40.70487217770522],
      [-74.00206142563805, 40.70541700463557],
      [

In [92]:
 manhattan_map.choropleth(
    geo_data=data_manhattan,
    fill_color='green', 
    fill_opacity=0.2, 
    line_opacity=0.2,
    legend_name='Immigration to Canada',
    reset=True
)

for latitude , longitude, neighborhood in zip(manhattan_names['Latitude'],manhattan_names['Longitude'],manhattan_names['Neighbourhood']):
    label=neighborhood
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [latitude,longitude],
    radius = 5,
    popup=label,
    fill=True,
    fill_color='blue',
    fill_opacity=0.2,
    parse_html=False).add_to(manhattan_map)
manhattan_map