In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [3]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [4]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [5]:
neighborhoods_data = newyork_data['features']

## Transform Data into Pandas Dataframe

In [6]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [7]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [8]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [9]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


### Use geopy library to get the latitude and longitude values of New York City.

In [10]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


##### Create a map of New York with neighborhoods superimposed on top.¶

In [11]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Explore the Manhattan  Neighborhood to answer client's question

In [12]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [13]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.


In [14]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

### Define Foursquare Credentials and Version

In [15]:
CLIENT_ID = 'ZUEECEU4REJMQLBB24SWVSY4WHD1HQGOTBAWWC50KCFKUZWB' # your Foursquare ID
CLIENT_SECRET = 'D5WHA5NLZZJSH55VWRKDPABNHH3HYXWJR0XML4DM1RMVNZZE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZUEECEU4REJMQLBB24SWVSY4WHD1HQGOTBAWWC50KCFKUZWB
CLIENT_SECRET:D5WHA5NLZZJSH55VWRKDPABNHH3HYXWJR0XML4DM1RMVNZZE


In [17]:
# function to repeat the exploring process to all the neighborhoods in Toronto
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=50, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']
            #results = requests.get(url).json()["response"]['venues']
            
            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
         pass

    return(nearby_venues)


In [19]:
# Food joints.
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 50 # define radius

manhattan_venues_food = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'],categoryIds='4d4b7105d754a06374d81259')



In [20]:
# Checking the resulting dataframe
print(manhattan_venues_food.shape)
manhattan_venues_food.head()

(394, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Chinatown,40.715618,-73.994279,Time Café,40.715675,-73.994039,Coffee Shop
1,Chinatown,40.715618,-73.994279,Off The Bridge,40.715622,-73.994237,Bike Shop
2,Chinatown,40.715618,-73.994279,Popeyes Louisiana Kitchen,40.715875,-73.99502,Fried Chicken Joint
3,Chinatown,40.715618,-73.994279,Möge Tea 愿茶,40.715464,-73.993771,Bubble Tea Shop
4,Chinatown,40.715618,-73.994279,GC Egg Rolls House,40.715256,-73.994334,Bakery


In [21]:
# check how many venues were returned for each neighborhood
manhattan_venues_food.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,2,2,2,2,2,2
Carnegie Hill,17,17,17,17,17,17
Central Harlem,4,4,4,4,4,4
Chelsea,9,9,9,9,9,9
Chinatown,15,15,15,15,15,15
Civic Center,9,9,9,9,9,9
Clinton,5,5,5,5,5,5
East Harlem,5,5,5,5,5,5
East Village,22,22,22,22,22,22
Financial District,16,16,16,16,16,16


In [22]:
# function to add markers for given venues to map
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [27]:
map_manhattan_food = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(manhattan_venues_food, 'red', map_manhattan_food)
map_manhattan_food

In [30]:
# Shops & Services.  checking for the number of shops within 50 radius in the manhattan neighborhood
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 50 # define radius

manhattan_venues_shop = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'],categoryIds='4d4b7105d754a06378d81259')


In [31]:
manhattan_venues_shop.shape

(541, 7)

In [33]:
map_manhattan_shop = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(manhattan_venues_shop, 'red', map_manhattan_shop)
map_manhattan_shop

In [34]:
# Offices.
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 50 # define radius

manhattan_venues_col = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'],categoryIds='4d4b7105d754a06372d81259')


In [35]:
manhattan_venues_col.shape

(86, 7)

In [36]:
map_manhattan_col = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(manhattan_venues_col, 'red', map_manhattan_col)
map_manhattan_col

In [37]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [38]:
man_data = manhattan_data.copy()

addColumn(man_data, 'Food Joints', manhattan_venues_food)
addColumn(man_data, 'Shops & Services', manhattan_venues_shop)
addColumn(man_data, 'Office', manhattan_venues_col)
man_data

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Food Joints,Shops & Services,Office
0,Manhattan,Marble Hill,40.876551,-73.91066,0.0,0.0,0.0
1,Manhattan,Chinatown,40.715618,-73.994279,15.0,16.0,0.0
2,Manhattan,Washington Heights,40.851903,-73.9369,0.0,1.0,0.0
3,Manhattan,Inwood,40.867684,-73.92121,23.0,25.0,0.0
4,Manhattan,Hamilton Heights,40.823604,-73.949688,2.0,2.0,0.0
5,Manhattan,Manhattanville,40.816934,-73.957385,1.0,0.0,1.0
6,Manhattan,Central Harlem,40.815976,-73.943211,4.0,12.0,1.0
7,Manhattan,East Harlem,40.792249,-73.944182,5.0,17.0,0.0
8,Manhattan,Upper East Side,40.775639,-73.960508,1.0,6.0,0.0
9,Manhattan,Yorkville,40.77593,-73.947118,7.0,13.0,0.0


In [48]:
# I will define some weights and give reasons
# negative weight, because Abeledata Restaurant and thus wants to avoid concurrence as much as possible
weight_food = -1

# positive weight, because shop owners and workers are good customers
weight_shop = 1

# positive weight because employees are even better customers
weight_office = 2

In [49]:
man_weight = man_data[['Neighborhood']].copy()

In [50]:
man_weight['Score'] = man_data['Food Joints'] * weight_food + man_data['Shops & Services'] * weight_shop + man_data['Office'] * weight_office
man_weight = man_weight.sort_values(by=['Score'], ascending=False)
man_weight

Unnamed: 0,Neighborhood,Score
15,Midtown,54.0
23,Soho,46.0
26,Morningside Heights,42.0
38,Flatiron,33.0
27,Gramercy,24.0
12,Upper West Side,16.0
32,Civic Center,14.0
7,East Harlem,12.0
29,Financial District,12.0
18,Greenwich Village,11.0


In [53]:
map_man_result = folium.Map(location=[latitude, longitude], zoom_start=14)

man_winner = manhattan_data[manhattan_data['Neighborhood'] == 'Midtown']

for lat, lng, local in zip(man_winner['Latitude'], man_winner['Longitude'], man_winner['Neighborhood']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_man_result) 

addToMap(manhattan_venues_food[manhattan_venues_food['Neighborhood'] == 'Midtown'], 'red', map_man_result)
addToMap(manhattan_venues_shop[manhattan_venues_shop['Neighborhood'] == 'Midtown'], 'green', map_man_result)
addToMap(manhattan_venues_col[manhattan_venues_col['Neighborhood'] == 'Midtown'], 'gold', map_man_result)

map_man_result

### Schools, offices and Public transportation facility around restaurant
The recommended restaurant location needs to have many Schools, offices and Public transportation venues nearby.
These data can be found by using FourSquare API to find these venues around the location. The radius of exploration distance is set to 500 meters, which is about 5 minutes walking distance.

Following type of venue category will be used to search

In [None]:
# Midtown is the best place to open the restaurant!