In [230]:
#Opening French Restaurant in Toronto#
##This report will explore the Toronto area and see which Neighbourhood is more suitable for opening a French restaurant##

In [263]:
#Imports
from dotenv import load_dotenv
import os
import geocoder
from geopy.geocoders import Nominatim 
import pandas as pd
import numpy as np
import requests
import json
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer as TfV
from sklearn.preprocessing import StandardScaler
import folium
import random 
import matplotlib.pyplot as plt

In [20]:
#Getting Credentials for Foursquared
load_dotenv('.env')
CLIENT_ID=os.getenv('CLIENT_ID')
CLIENT_SECRET=os.getenv('CLIENT_SECRET')
VERSION=os.getenv('VERSION')

In [21]:
#Getting Toronto Coordinates
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [22]:
#setting up Foursqaured API Call url
LIMIT=1000
radius=1000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)

In [12]:
#Calling the API
request=requests.get(url).json()

In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
#Exploring
venues = request['response']['groups'][0]['items']
explore_venues = json_normalize(venues)
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
explore_venues =explore_venues.loc[:, filtered_columns]
explore_venues['venue.categories'] = explore_venues.apply(get_category_type, axis=1)
explore_venues.columns = [col.split(".")[-1] for col in explore_venues.columns]
explore_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,Indigo,Bookstore,43.653515,-79.380696
3,Chatime 日出茶太,Bubble Tea Shop,43.655542,-79.384684
4,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641


In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [23]:
#Creating Dataframe
wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_page = requests.get(wiki)
wiki_raw = pd.read_html(wiki_page.content, header = 0)[0]
df = wiki_raw[wiki_raw.Neighbourhood != 'Not assigned']
df.reset_index(inplace = True)
geo= 'http://cocl.us/Geospatial_data'
df_geo = pd.read_csv(geo)
df_geo.head()
df_toronto=pd.merge(df,df_geo,on='Postal Code')
df_venues=getNearbyVenues(names=df_toronto['Neighbourhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude'])
df_venues.head()

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,TTC stop #8380,43.752672,-79.326351,Bus Stop
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [252]:
#Restaurant locations
df_restaurant=df_venues[df_venues['Venue Category'].str.contains('Restaurant')].reset_index(drop=True)
df_restaurant

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
1,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant
2,"Regent Park, Harbourfront",43.654260,-79.360636,Impact Kitchen,43.656369,-79.356980,Restaurant
3,"Regent Park, Harbourfront",43.654260,-79.360636,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant
4,"Regent Park, Harbourfront",43.654260,-79.360636,Izumi,43.649970,-79.360153,Asian Restaurant
...,...,...,...,...,...,...,...
480,Church and Wellesley,43.665860,-79.383160,Asahi Sushi,43.669874,-79.382943,Sushi Restaurant
481,Church and Wellesley,43.665860,-79.383160,McDonald's,43.668854,-79.385962,Fast Food Restaurant
482,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Chick-n-Joy,43.665181,-79.321403,Fast Food Restaurant
483,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,The Green Wood,43.664728,-79.324117,Restaurant


In [253]:
df_restaurant['Cuisine Style']=None
for i in range(df_restaurant.shape[0]):
    if len(df_restaurant['Venue Category'][i]) > 10:
        df_restaurant['Cuisine Style'][i]=df_restaurant['Venue Category'][i].replace('Restaurant','')
    else:
        df_restaurant['Cuisine Style'][i]='No Category'

df_restaurant.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Cuisine Style
0,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant,Portuguese
1,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant,French
2,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant,No Category
3,"Regent Park, Harbourfront",43.65426,-79.360636,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant,French
4,"Regent Park, Harbourfront",43.65426,-79.360636,Izumi,43.64997,-79.360153,Asian Restaurant,Asian


In [254]:
#Counting the French competitors
print('There are',df_restaurant[df_restaurant['Cuisine Style']=='French '].count()['Cuisine Style'],'French competitors')

There are 11 French competitors


In [255]:
#Color generator
def generate_colors(n): 
  rgb_values = [] 
  hex_values = [] 
  r = int(random.random() * 256) 
  g = int(random.random() * 256) 
  b = int(random.random() * 256) 
  step = 256 / n 
  for _ in range(n): 
    r += step 
    g += step 
    b += step 
    r = int(r) % 256 
    g = int(g) % 256 
    b = int(b) % 256 
    r_hex = hex(r)[2:] 
    g_hex = hex(g)[2:] 
    b_hex = hex(b)[2:] 
    hex_values.append('#' + r_hex + g_hex + b_hex) 
    rgb_values.append((r,g,b)) 
  return rgb_values, hex_values

In [256]:
#Generate colors for cuisine
rgb_values, hex_values=generate_colors(len(df_restaurant['Cuisine Style'].unique()))
color_dict={'Cuisine Style':[],'Cuisine Color':[]}
df_color=pd.DataFrame(color_dict)
df_color['Cuisine Style']=df_restaurant['Cuisine Style'].unique()
k=0
for style,color in zip(df_restaurant['Cuisine Style'].unique(),hex_values):
    df_color['Cuisine Style'][k]=style
    df_color['Cuisine Color'][k]=color
    k+=1
df_restaurant=pd.merge(df_restaurant,df_color,on='Cuisine Style')
df_restaurant.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Cuisine Style,Cuisine Color
0,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant,Portuguese,#dc2d51
1,Central Bay Street,43.657952,-79.387383,Nando's,43.661728,-79.386391,Portuguese Restaurant,Portuguese,#dc2d51
2,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant,French,#e13256
3,"Regent Park, Harbourfront",43.65426,-79.360636,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant,French,#e13256
4,St. James Town,43.651494,-79.375418,Biff's Bistro,43.647085,-79.376342,French Restaurant,French,#e13256


In [257]:
#Plotting the cuisine on Toronto Map
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, label, color, style in zip(df_restaurant['Neighborhood Latitude'],df_restaurant['Neighborhood Longitude'],df_restaurant['Neighbourhood'],df_restaurant['Cuisine Color'], df_restaurant['Cuisine Style']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup='Neighbourhoods: {}\nCuisine Style: {}'.format(label,style),
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)
map_Toronto

We can see that there are more restaurants near the Billy Bishop Toronto City Airport. Next we can see where the French competitors are.

In [258]:
#Plotting the competitors
df_french=df_restaurant[df_restaurant['Cuisine Style']=='French ']
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, label, color, style in zip(df_french['Neighborhood Latitude'],df_french['Neighborhood Longitude'],df_french['Neighbourhood'],df_french['Cuisine Color'], df_french['Cuisine Style']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup='Neighbourhoods: {}\nCuisine Style: {}'.format(label,style),
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)
map_Toronto

As we can see, most French restaurants are crowded towards the airport. with only 1 French restarant in Victoria Village which is further away. Next we can see the most popular cuisine style in different Neighbourhoods.

In [259]:
cuisine_onehot=pd.get_dummies(df_restaurant[['Cuisine Style']], prefix="", prefix_sep="")
cuisine_onehot.insert(loc=0,column='Neighbourhood',value=df_restaurant['Neighbourhood'])
cuisine_onehot.head()

Unnamed: 0,Neighbourhood,American,Asian,Belgian,Brazilian,Cajun / Creole,Caribbean,Chinese,Colombian,Comfort Food,...,Portuguese,Ramen,Seafood,Sushi,Taiwanese,Thai,Theme,Tibetan,Vegetarian / Vegan,Vietnamese
0,Victoria Village,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,Central Bay Street,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,St. James Town,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [260]:
cuisine_grouped=cuisine_onehot.groupby('Neighbourhood').mean().reset_index()
cuisine_grouped.head()

Unnamed: 0,Neighbourhood,American,Asian,Belgian,Brazilian,Cajun / Creole,Caribbean,Chinese,Colombian,Comfort Food,...,Portuguese,Ramen,Seafood,Sushi,Taiwanese,Thai,Theme,Tibetan,Vegetarian / Vegan,Vietnamese
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Bedford Park, Lawrence Manor East",0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,...,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0
4,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,...,0.0,0.0,0.181818,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0


In [261]:
#Defining function to return most common cuisine
def return_most_common_cuisine(row, num_top_cuisine):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_cuisine]

In [262]:
num_top_cuisine = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top cuisine
columns = ['Neighbourhood']
for ind in np.arange(num_top_cuisine):
    try:
        columns.append('{}{} Most Common Cuisine'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Cuisine'.format(ind+1))

# create a new dataframe
neighborhoods_cuisine_sorted = pd.DataFrame(columns=columns)
neighborhoods_cuisine_sorted['Neighbourhood'] = cuisine_grouped['Neighbourhood']

for ind in np.arange(cuisine_grouped.shape[0]):
    neighborhoods_cuisine_sorted.iloc[ind, 1:] = return_most_common_cuisine(cuisine_grouped.iloc[ind, :], num_top_cuisine)

neighborhoods_cuisine_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Cuisine,2nd Most Common Cuisine,3rd Most Common Cuisine,4th Most Common Cuisine,5th Most Common Cuisine,6th Most Common Cuisine,7th Most Common Cuisine,8th Most Common Cuisine,9th Most Common Cuisine,10th Most Common Cuisine
0,Agincourt,Latin American,Vietnamese,Hakka,Gluten-free,German,French,Filipino,Fast Food,Falafel,Ethiopian
1,"Bathurst Manor, Wilson Heights, Downsview North",Sushi,No Category,Middle Eastern,Vietnamese,Doner,German,French,Filipino,Fast Food,Falafel
2,Bayview Village,Japanese,Chinese,Vietnamese,Dumpling,Gluten-free,German,French,Filipino,Fast Food,Falafel
3,"Bedford Park, Lawrence Manor East",Italian,Indian,No Category,Comfort Food,Fast Food,Greek,American,Thai,Sushi,Seafood
4,Berczy Park,Seafood,No Category,Greek,Vegetarian / Vegan,Thai,French,Japanese,Comfort Food,Eastern European,Dumpling


In [264]:
#Cluster model
clusters = KMeans(n_clusters=5, random_state=0).fit(cuisine_grouped.drop('Neighbourhood',axis=1))
df_cluster=pd.DataFrame(cuisine_grouped['Neighbourhood'])
df_cluster['Cluster Label']=clusters.labels_
df_restaurant=pd.merge(df_restaurant,df_cluster,on='Neighbourhood')
df_restaurant.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Cuisine Style,Cuisine Color,Cluster Label
0,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant,Portuguese,#dc2d51,0
1,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant,French,#e13256,0
2,Central Bay Street,43.657952,-79.387383,Nando's,43.661728,-79.386391,Portuguese Restaurant,Portuguese,#dc2d51,0
3,Central Bay Street,43.657952,-79.387383,Midi Bistro,43.655871,-79.392091,French Restaurant,French,#e13256,0
4,Central Bay Street,43.657952,-79.387383,Teriyaki Experience,43.659884,-79.387879,Restaurant,No Category,#e6375b,0


In [266]:
#Coloring
k=0
df_restaurant['Colors']=None
for clus in df_restaurant['Cluster Label']:
    if clus == 0:
        df_restaurant['Colors'].loc[k]='#818281'
    elif clus == 1:
        df_restaurant['Colors'].loc[k]='#094BF3'
    elif clus == 2:
        df_restaurant['Colors'].loc[k]='#F32509'
    elif clus == 3:
        df_restaurant['Colors'].loc[k]='#EFEE0C'
    else:
        df_restaurant['Colors'].loc[k]='#18E002'
    k+=1
df_restaurant.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Cuisine Style,Cuisine Color,Cluster Label,Colors
0,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant,Portuguese,#dc2d51,0,#818281
1,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant,French,#e13256,0,#818281
2,Central Bay Street,43.657952,-79.387383,Nando's,43.661728,-79.386391,Portuguese Restaurant,Portuguese,#dc2d51,0,#818281
3,Central Bay Street,43.657952,-79.387383,Midi Bistro,43.655871,-79.392091,French Restaurant,French,#e13256,0,#818281
4,Central Bay Street,43.657952,-79.387383,Teriyaki Experience,43.659884,-79.387879,Restaurant,No Category,#e6375b,0,#818281


In [267]:
#Plotting Map
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, label, color, style in zip(df_restaurant['Neighborhood Latitude'],df_restaurant['Neighborhood Longitude'],df_restaurant['Neighbourhood'],df_restaurant['Colors'], df_restaurant['Cuisine Style']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup='Neighbourhoods: {}\nCuisine Style: {}'.format(label,style),
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)
map_Toronto

In [297]:
#Clustered dataframe with most common cuisine
cluster_df=pd.merge(neighborhoods_cuisine_sorted,df_restaurant[['Neighbourhood','Cluster Label']],on='Neighbourhood')
cluster_df.drop_duplicates(inplace=True)
cluster_df.head()

Unnamed: 0,Neighbourhood,1st Most Common Cuisine,2nd Most Common Cuisine,3rd Most Common Cuisine,4th Most Common Cuisine,5th Most Common Cuisine,6th Most Common Cuisine,7th Most Common Cuisine,8th Most Common Cuisine,9th Most Common Cuisine,10th Most Common Cuisine,Cluster Label
0,Agincourt,Latin American,Vietnamese,Hakka,Gluten-free,German,French,Filipino,Fast Food,Falafel,Ethiopian,0
1,"Bathurst Manor, Wilson Heights, Downsview North",Sushi,No Category,Middle Eastern,Vietnamese,Doner,German,French,Filipino,Fast Food,Falafel,3
4,Bayview Village,Japanese,Chinese,Vietnamese,Dumpling,Gluten-free,German,French,Filipino,Fast Food,Falafel,2
6,"Bedford Park, Lawrence Manor East",Italian,Indian,No Category,Comfort Food,Fast Food,Greek,American,Thai,Sushi,Seafood,0
16,Berczy Park,Seafood,No Category,Greek,Vegetarian / Vegan,Thai,French,Japanese,Comfort Food,Eastern European,Dumpling,0


In [305]:
#Cluster 1 analysis
cluster_1=cluster_df[cluster_df['Cluster Label']==0].reset_index(drop=True)
print('Cluster 1 contains',len(cluster_1['Neighbourhood'].unique()),'neighbourhoods.')
print('Most common cuisines are:')
for k in cluster_1['1st Most Common Cuisine'].unique():
    print(k)

Cluster 1 contains 32 neighbourhoods.
Most common cuisines are:
Latin American 
Italian 
Seafood 
American 
Thai 
No Category
Sushi 
Indian 
Vegetarian / Vegan 
Vietnamese 
Fast Food 
Greek 
French 
Ramen 
Korean BBQ 


In [307]:
#Cluster 2 analysis
cluster_2=cluster_df[cluster_df['Cluster Label']==1].reset_index(drop=True)
print('Cluster 2 contains',len(cluster_2['Neighbourhood'].unique()),'neighbourhoods.')
print('Most common cuisines are:')
for k in cluster_2['1st Most Common Cuisine'].unique():
    print(k)

Cluster 2 contains 5 neighbourhoods.
Most common cuisines are:
Fast Food 


In [308]:
#Cluster 3 analysis
cluster_3=cluster_df[cluster_df['Cluster Label']==2].reset_index(drop=True)
print('Cluster 3 contains',len(cluster_3['Neighbourhood'].unique()),'neighbourhoods.')
print('Most common cuisines are:')
for k in cluster_3['1st Most Common Cuisine'].unique():
    print(k)

Cluster 3 contains 3 neighbourhoods.
Most common cuisines are:
Japanese 
Chinese 


In [309]:
#Cluster 4 analysis
cluster_4=cluster_df[cluster_df['Cluster Label']==3].reset_index(drop=True)
print('Cluster 4 contains',len(cluster_4['Neighbourhood'].unique()),'neighbourhoods.')
print('Most common cuisines are:')
for k in cluster_4['1st Most Common Cuisine'].unique():
    print(k)

Cluster 4 contains 18 neighbourhoods.
Most common cuisines are:
Sushi 
Italian 
Fast Food 
Japanese 
Mexican 
Caribbean 
No Category
Asian 


In [310]:
#Cluster 5 analysis
cluster_5=cluster_df[cluster_df['Cluster Label']==2].reset_index(drop=True)
print('Cluster 5 contains',len(cluster_5['Neighbourhood'].unique()),'neighbourhoods.')
print('Most common cuisines are:')
for k in cluster_5['1st Most Common Cuisine'].unique():
    print(k)

Cluster 5 contains 3 neighbourhoods.
Most common cuisines are:
Japanese 
Chinese 


As a result, we can see that French restaurant is not common in any clusters. Cluster 1 has the most number of Neighbourhoods hence has the largest cuisine variety. Cluster 4 has 18 Neighbourhoods, and moderate cuisine variety. Other clusters has very little cuisine variety and fewer Neighbourhoods.
If the restaurant opens near restaurants that belong to cluster 1, we expect the competition to be larger. If we open the restaurant near clusters 2,3,5, we expect low competition, but as theses Neighbourhoods are not accustomed to a large variety of cuisine, we expect introducing a new type of cuisine which is uncommon will be difficult.
Hence, opening a restaurant that is close to cluster 4 restaurants may be the better choice, since people are more accustomed to cuisine variety, and the competition is less fierce.