## Battle Of Neighborhoods - IBM Data Science Professioal Capstone
## Restaurant Location - Recommender System

### Introduction - Short story

A newbie in restaurants chain plans to open their branch in Manhattan, New York City and the vendor does not have any idea in picking an ideal location which is economic as well as profitable to them. So I being a data science practitioner would like to analyze the data and help them pick a right place

In [1]:
# Import all the necessary libraries
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium # map rendering library

print('Libraries imported.')


Libraries imported.


### Download the NewYork City data from https://ibm.box.com/shared/static/fbpwbovar7lf8p5sgddm06cgipa2rxpe.json
### Save it as 'newyork.json'

In [2]:
with open('newyork.json') as json_data:
    newyorkCity_data = json.load(json_data)
newyorkCity_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [3]:
neighborhoods_data = newyorkCity_data['features']

### Transform the data

Load the data from dictionary into pandas dataframe. Also label the necessaru columns and append the data to the dataframe

In [4]:
# define the pandas dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# Assign columns to a empty dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

### Filter the borough - Manhattan and preview the data

In [5]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


### Geocoder

Find the location details - latitude & longitude on Manhattan City using geocoder

In [6]:
address = 'Manhattan, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.


### create map of Manhattan using latitude and longitude values

In [7]:

map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

### FourSquare API Credentials

In [8]:
CLIENT_ID = 'YUWOO1JLRIVU0R40QZ05IODMDMDD54DX3DVH5SEXLSZKU5F4' # your Foursquare ID
CLIENT_SECRET = 'KEHYSKHETV4KGPOBLXX3WGGDNR4LGPFKY1Z3WS2PAJPQWZQL' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT=50
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YUWOO1JLRIVU0R40QZ05IODMDMDD54DX3DVH5SEXLSZKU5F4
CLIENT_SECRET:KEHYSKHETV4KGPOBLXX3WGGDNR4LGPFKY1Z3WS2PAJPQWZQL


In [9]:
search_query = 'restaurant'
radius = 5000

In [10]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}\
&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,\
                                                                    latitude, longitude, VERSION, search_query, radius, LIMIT)

In [11]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cbd428edb04f530c54aa629'},
 'response': {'venues': [{'id': '4a897cb1f964a5201f0820e3',
    'name': '3 Guys Restaurant',
    'location': {'address': '49 E 96th St',
     'crossStreet': 'Madison Ave',
     'lat': 40.787442622504265,
     'lng': -73.95403610873488,
     'labeledLatLngs': [{'label': 'display',
       'lat': 40.787442622504265,
       'lng': -73.95403610873488}],
     'distance': 570,
     'postalCode': '10128',
     'cc': 'US',
     'city': 'New York',
     'state': 'NY',
     'country': 'United States',
     'formattedAddress': ['49 E 96th St (Madison Ave)',
      'New York, NY 10128',
      'United States']},
    'categories': [{'id': '4bf58dd8d48988d147941735',
      'name': 'Diner',
      'pluralName': 'Diners',
      'shortName': 'Diner',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/diner_',
       'suffix': '.png'},
      'primary': True}],
    'delivery': {'id': '278300',
     'url': 'https://www.seamles

In [12]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)

In [13]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
manhattan_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
manhattan_filtered['categories'] = manhattan_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
manhattan_filtered.columns = [column.split('.')[-1] for column in manhattan_filtered.columns]

manhattan_filtered.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,3 Guys Restaurant,Diner,49 E 96th St,US,New York,United States,Madison Ave,570,"[49 E 96th St (Madison Ave), New York, NY 1012...","[{'label': 'display', 'lat': 40.78744262250426...",40.787443,-73.954036,,10128,NY,4a897cb1f964a5201f0820e3
1,Gabriela's Restaurant & Tequila Bar,Mexican Restaurant,688 Columbus Ave,US,New York,United States,at 93rd St.,761,"[688 Columbus Ave (at 93rd St.), New York, NY ...","[{'label': 'display', 'lat': 40.79123991711048...",40.79124,-73.968735,,10025,NY,49f3ab02f964a520d16a1fe3
2,Carmine's Italian Restaurant,Italian Restaurant,2450 Broadway,US,New York,United States,btwn W 90th & W 91st,1198,"[2450 Broadway (btwn W 90th & W 91st), New Yor...","[{'label': 'display', 'lat': 40.7910963, 'lng'...",40.791096,-73.973991,,10024,NY,4a7778a1f964a5209be41fe3
3,Fred's Restaurant,American Restaurant,476 Amsterdam Ave.,US,New York,United States,at W 83rd St,1492,"[476 Amsterdam Ave. (at W 83rd St), New York, ...","[{'label': 'display', 'lat': 40.78565778922316...",40.785658,-73.976539,,10024,NY,44281118f964a520ba311fe3
4,Junior's Restaurant & Bakery,American Restaurant,1515 Broadway,US,New York,United States,at W 45th St,4188,"[1515 Broadway (at W 45th St), New York, NY 10...","[{'label': 'display', 'lat': 40.75834934354621...",40.758349,-73.986513,Theater District,10036,NY,462a6065f964a520d9451fe3


### Drop the irrelevant columns and preview the dataset

In [14]:
manhattan_new=manhattan_filtered.drop(['categories','address','cc','city','country','crossStreet','distance','formattedAddress','labeledLatLngs','neighborhood','state'], axis=1)
manhattan_new.head()

Unnamed: 0,name,lat,lng,postalCode,id
0,3 Guys Restaurant,40.787443,-73.954036,10128,4a897cb1f964a5201f0820e3
1,Gabriela's Restaurant & Tequila Bar,40.79124,-73.968735,10025,49f3ab02f964a520d16a1fe3
2,Carmine's Italian Restaurant,40.791096,-73.973991,10024,4a7778a1f964a5209be41fe3
3,Fred's Restaurant,40.785658,-73.976539,10024,44281118f964a520ba311fe3
4,Junior's Restaurant & Bakery,40.758349,-73.986513,10036,462a6065f964a520d9451fe3


### Understand and Visualize the data

In [15]:
manhattan_new.describe()

Unnamed: 0,lat,lng
count,50.0,50.0
mean,40.780852,-73.964312
std,0.017153,0.015166
min,40.745056,-73.988838
25%,40.766806,-73.977296
50%,40.78165,-73.965284
75%,40.792142,-73.951439
max,40.813187,-73.935859


In [16]:
import matplotlib.pyplot as plt

plt.subplot(1,2,1)
#dataframe_filtered1.plot.box()
manhattan_new["lat"].plot.hist()
plt.subplot(1,2,2)
manhattan_new["lng"].plot.hist()
plt.show()
#data not normalized

<Figure size 640x480 with 2 Axes>

In [17]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=14) # generate map

# add popular spots to the map as blue circle markers

for lat, lng, label in zip(manhattan_new['lat'], manhattan_new['lng'], manhattan_new['name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6,
        parse_html=False
        ).add_to(venues_map)


venues_map

In [18]:
manhattan_new.shape

(50, 5)

### Venues located in each postal code

In [19]:
manhattan_new.groupby('postalCode').count()

Unnamed: 0_level_0,name,lat,lng,id
postalCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10001,1,1,1,1
10017,2,2,2,2
10019,4,4,4,4
10021,1,1,1,1
10022,1,1,1,1
10023,1,1,1,1
10024,3,3,3,3
10025,7,7,7,7
10026,2,2,2,2
10027,2,2,2,2


### Analyze and transform the data using One Hot Encoding

In [20]:
manhattan_new_onehot = pd.get_dummies(manhattan_new[['name']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_new_onehot['postalCode'] = manhattan_new['postalCode'] 

# move name column to the first column
fixed_columns = [manhattan_new_onehot.columns[-1]] + list(manhattan_new_onehot.columns[:-1])
manhattan_new_onehot = manhattan_new_onehot[fixed_columns]
manhattan_new_onehot.head()

Unnamed: 0,postalCode,3 Guys Restaurant,525LEX Restaurant & Lounge,88 Chinese Restaurant,Acosta Deli - Restaurant,Agora Turkish Restaurant,Bellini Italian Restaurant & Brick Oven Pizza,Carmine's Italian Restaurant,Carmine’s Italian Restaurant,Chinatown East Restaurant,...,Sylvia's Restaurant,The Carlyle Restaurant,The Flame Restaurant,The New Amity Restaurant,The NoMad Restaurant,The View Restaurant & Lounge,Three Guys Restaurant,Tom's Restaurant,Westside Restaurant,Yuka Japanese Restaurant
0,10128,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,10025,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,10024,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,10024,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,10036,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
manhattan_new_onehot.shape

(50, 50)

### Group the venues data based on postal code

In [22]:
manhattan_new_grouped = manhattan_new_onehot.groupby('postalCode').mean().reset_index()
manhattan_new_grouped

Unnamed: 0,postalCode,3 Guys Restaurant,525LEX Restaurant & Lounge,88 Chinese Restaurant,Acosta Deli - Restaurant,Agora Turkish Restaurant,Bellini Italian Restaurant & Brick Oven Pizza,Carmine's Italian Restaurant,Carmine’s Italian Restaurant,Chinatown East Restaurant,...,Sylvia's Restaurant,The Carlyle Restaurant,The Flame Restaurant,The New Amity Restaurant,The NoMad Restaurant,The View Restaurant & Lounge,Three Guys Restaurant,Tom's Restaurant,Westside Restaurant,Yuka Japanese Restaurant
0,10001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,10017,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10021,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,10022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,10023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6,10024,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,10025,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
8,10026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,10027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
manhattan_new_grouped.shape

(17, 50)

In [40]:
num_top_Restaurant = 10

for hood in manhattan_new_grouped['postalCode']:
    print("----"+hood+"----")
    temp = manhattan_new_grouped[manhattan_new_grouped['postalCode'] == hood].T.reset_index()
    temp.columns = ['Restaurant Name','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_Restaurant))
    print('\n')

----10001----
                        Restaurant Name  freq
0                  The NoMad Restaurant   1.0
1                     3 Guys Restaurant   0.0
2            Patsy's Italian Restaurant   0.0
3           Lolita's Mexican Restaurant   0.0
4                 Malecon Restaurant II   0.0
5                     Mama’s Restaurant   0.0
6                    Manna's Restaurant   0.0
7          Nick's Restaurant & Pizzeria   0.0
8          O'Donoghues Pub & Restaurant   0.0
9  P. J. Moran's Irish Pub & Restaurant   0.0


----10017----
                                 Restaurant Name  freq
0                     525LEX Restaurant & Lounge   0.5
1           P. J. Moran's Irish Pub & Restaurant   0.5
2                     Patsy's Italian Restaurant   0.0
3  Lips Drag Queen Show Palace, Restaurant & Bar   0.0
4                    Lolita's Mexican Restaurant   0.0
5                          Malecon Restaurant II   0.0
6                              Mama’s Restaurant   0.0
7                       

In [41]:
def return_most_common_venues(row, num_top_Restaurant):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_Restaurant]

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['postalCode']
for ind in np.arange(num_top_Restaurant):
    try:
        columns.append('{}{} Most Common Restaurant'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Restaurant'.format(ind+1))

# create a new dataframe
manhattan_new_sorted = pd.DataFrame(columns=columns)
manhattan_new_sorted['postalCode'] = manhattan_new_grouped['postalCode']

for ind in np.arange(manhattan_new_grouped.shape[0]):
    manhattan_new_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_new_grouped.iloc[ind, :], num_top_venues)

manhattan_new_sorted

Unnamed: 0,postalCode,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,10001,The NoMad Restaurant,Yuka Japanese Restaurant,Demarchelier Restaurant,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant,Gennaro Restaurant,Genesis Bar & Restaurant
1,10017,525LEX Restaurant & Lounge,P. J. Moran's Irish Pub & Restaurant,Yuka Japanese Restaurant,El Ranchito Restaurant,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant,Gennaro Restaurant
2,10019,The Flame Restaurant,SOUTHGATE Bar & Restaurant,Patsy's Italian Restaurant,Connolly's Pub & Restaurant,Yuka Japanese Restaurant,Demarchelier Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant
3,10021,Three Guys Restaurant,Yuka Japanese Restaurant,Demarchelier Restaurant,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant,Gennaro Restaurant,Genesis Bar & Restaurant
4,10022,"Lips Drag Queen Show Palace, Restaurant & Bar",Yuka Japanese Restaurant,El Ranchito Restaurant,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant,Gennaro Restaurant,Genesis Bar & Restaurant
5,10023,Westside Restaurant,El Ranchito Restaurant,Junior's Restaurant & Bakery,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant,Gennaro Restaurant,Genesis Bar & Restaurant
6,10024,Bellini Italian Restaurant & Brick Oven Pizza,Carmine's Italian Restaurant,Fred's Restaurant,Yuka Japanese Restaurant,El Ranchito Restaurant,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant
7,10025,Kouzan Japanese Restaurant,El Ranchito Restaurant,Tom's Restaurant,Acosta Deli - Restaurant,Malecon Restaurant II,Gennaro Restaurant,Gabriela's Restaurant & Tequila Bar,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant
8,10026,Lolita's Mexican Restaurant,Chocolat Restaurant & Bar,Yuka Japanese Restaurant,El Ranchito Restaurant,Judy's Restaurant,Jacob Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant,Gennaro Restaurant
9,10027,Jacob Restaurant,Sylvia's Restaurant,Yuka Japanese Restaurant,Demarchelier Restaurant,Judy's Restaurant,Indo Pak Halal Restaurant,Heidelberg Restaurant,Giovanna's Restaurant,Gennaro Restaurant,Genesis Bar & Restaurant


### Cluster the neighborhoods of Manhattan City

In [43]:
kclusters = 5


dataframe_grouped_clustering = manhattan_new_grouped.drop('postalCode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dataframe_grouped_clustering)

# check cluster labels generated for each row in the dataframe
labels=kmeans.labels_[0:19]
labels

array([0, 0, 0, 1, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0])

In [44]:
dataframe_grouped_clustering["Labels"] = labels
dataframe_grouped_clustering.head(5)

Unnamed: 0,3 Guys Restaurant,525LEX Restaurant & Lounge,88 Chinese Restaurant,Acosta Deli - Restaurant,Agora Turkish Restaurant,Bellini Italian Restaurant & Brick Oven Pizza,Carmine's Italian Restaurant,Carmine’s Italian Restaurant,Chinatown East Restaurant,Chocolat Restaurant & Bar,...,The Carlyle Restaurant,The Flame Restaurant,The New Amity Restaurant,The NoMad Restaurant,The View Restaurant & Lounge,Three Guys Restaurant,Tom's Restaurant,Westside Restaurant,Yuka Japanese Restaurant,Labels
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0
1,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4


In [45]:
dataframe_grouped_clustering.groupby('Labels').mean()

Unnamed: 0_level_0,3 Guys Restaurant,525LEX Restaurant & Lounge,88 Chinese Restaurant,Acosta Deli - Restaurant,Agora Turkish Restaurant,Bellini Italian Restaurant & Brick Oven Pizza,Carmine's Italian Restaurant,Carmine’s Italian Restaurant,Chinatown East Restaurant,Chocolat Restaurant & Bar,...,Sylvia's Restaurant,The Carlyle Restaurant,The Flame Restaurant,The New Amity Restaurant,The NoMad Restaurant,The View Restaurant & Lounge,Three Guys Restaurant,Tom's Restaurant,Westside Restaurant,Yuka Japanese Restaurant
Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.012821,0.038462,0.0,0.010989,0.012821,0.025641,0.025641,0.012821,0.012821,0.038462,...,0.038462,0.038462,0.019231,0.012821,0.076923,0.012821,0.0,0.010989,0.0,0.012821
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=14)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
#markers_colors = []
for lat, lng, postalCode, cluster in zip(manhattan_new['lat'], manhattan_new['lng'],\
                                                 manhattan_new['postalCode'],kmeans.labels_):
    label = folium.Popup(str(postalCode) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],fill=True,fill_color=rainbow[cluster-1],
        fill_opacity=0.6,
        parse_html=False).add_to(map_clusters)  
    
map_clusters

### We conclude that the postalCode area {10024, 10029, 10027} having moderate numbers of restaurants will be an ideal place to open a restaurant