# The Battle of Neighborhoods

### 1. Setup dependencies

In [1]:
import pandas as pd
import numpy as np
import requests
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
from sklearn.cluster import KMeans

print('Libraries imported.')

Libraries imported.


### 2. Read neighborhood data into dataframe, and then filter and transform records per specifications

In [2]:
# Read HTML content
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0][1:]

# Rename columns
df.rename(columns={0:'PostalCode',1:'Borough',2:'Neighborhood'},inplace=True)

# Filter dataframe: drop rows with Borough as 'Not assigned'
df.drop(df[df.Borough == 'Not assigned'].index, inplace=True)

# Combine neigborhoods that have the same PostalCode and Borough
gdf = df.groupby(['PostalCode','Borough']).agg(lambda col: ', '.join(col)).reset_index()

# Assign Borough value to Neighborhood that are 'Not assigned'
gdf.Neighborhood = gdf.Borough.where(gdf.Neighborhood == 'Not assigned',gdf.Neighborhood)

print('Dataframe dimensions: ',gdf.shape)

gdf.head()

Dataframe dimensions:  (103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### 3. Fetch geocode file and load into dataframe

In [3]:
geocodes = pd.read_csv('http://cocl.us/Geospatial_data')
geocodes.rename(columns={'Postal Code': 'PostalCode'},inplace=True)
geocodes.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### 4. Merge neighborhood and geocode dataframes

In [4]:
neighborhoods = gdf.merge(geocodes, how='left', on=['PostalCode'])

print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

neighborhoods.head()

The dataframe has 11 boroughs and 103 neighborhoods.


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### 5. Get coordinates of Toronto

In [5]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### 6. Create map and add markers for neighborhoods

In [6]:
# create map of city using latitude and longitude values
map_city = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_city)  
    
map_city

### 7. Fetch Venue information from Foursquare
#### a) Define Foursquare Credentials and Version

In [7]:
CLIENT_ID = 'OZDKETLIZRVL1KFSUHPXRLG23WHUBHPIYXPXT3MKUXT0HLYA' # your Foursquare ID
CLIENT_SECRET = 'NELHDD0MILQIPTWV1TXPSCXGZAV5VSO3OIDYHI2I0MPJXP1T' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OZDKETLIZRVL1KFSUHPXRLG23WHUBHPIYXPXT3MKUXT0HLYA
CLIENT_SECRET:NELHDD0MILQIPTWV1TXPSCXGZAV5VSO3OIDYHI2I0MPJXP1T


#### b) Create function to explore venues based on neighborhood

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, section='topPicks', LIMIT=50):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&section={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            section, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['id'], 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue ID',               
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### c) Now execute the function to get the list of venues for each neighborhood and create dataframe called city_venues

In [9]:
# Create dataframe
city_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                 latitudes=neighborhoods['Latitude'],
                                 longitudes=neighborhoods['Longitude']
                                )

city_venues.sort_values(by=['Neighborhood'],inplace=True)

print('Pulled {} venues in {} neighborhoods.'.format(
    city_venues.shape[0],
    len(city_venues['Neighborhood'].unique())
))

city_venues.head()

Pulled 1360 venues in 98 neighborhoods.


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue ID,Venue,Venue Latitude,Venue Longitude,Venue Category
698,"Adelaide, King, Richmond",43.650571,-79.384568,4ad788c8f964a520e40b21e3,Apple Eaton Centre,43.652823,-79.380615,Electronics Store
709,"Adelaide, King, Richmond",43.650571,-79.384568,4ad8a5a7f964a5203c1321e3,Little India Restaurant,43.650319,-79.388998,Indian Restaurant
708,"Adelaide, King, Richmond",43.650571,-79.384568,4b68aed1f964a520de862be3,The Rex Hotel Jazz & Blues Bar,43.650505,-79.388577,Jazz Club
707,"Adelaide, King, Richmond",43.650571,-79.384568,549dd93f498eec33464a5fd2,Queen St. Warehouse,43.650117,-79.390316,Bar
706,"Adelaide, King, Richmond",43.650571,-79.384568,4b2a6eb8f964a52012a924e3,Indigo,43.653515,-79.380696,Bookstore


In [10]:
# OPTIONAL
# Export to CSV file
city_venues.to_csv('city_top_venues.csv')

# Read from CSV file
city_venues = pd.read_csv('city_top_venues.csv',index_col=0)

#### d) Let's check how many venues were returned for each neighborhood

In [11]:
city_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue ID,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Adelaide, King, Richmond",50,50,50,50,50,50,50
Agincourt,4,4,4,4,4,4,4
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3,3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",9,9,9,9,9,9,9
"Alderwood, Long Branch",7,7,7,7,7,7,7
"Bathurst Manor, Downsview North, Wilson Heights",8,8,8,8,8,8,8
Bayview Village,3,3,3,3,3,3,3
"Bedford Park, Lawrence Manor East",14,14,14,14,14,14,14
Berczy Park,42,42,42,42,42,42,42
"Birch Cliff, Cliffside West",2,2,2,2,2,2,2


#### e) Let's find out how many categories should be curated from all the returned venues

In [12]:
print('There are {} uniques categories.'.format(len(city_venues['Venue Category'].unique())))

#len(city_venues.groupby('Venue Category',as_index=False).filter(lambda x: len(x) > 10).reset_index(drop=True).index)

There are 199 uniques categories.


### 2. Analyze Each Neighborhood
#### a) One hot encode the data

In [13]:
# one hot encoding
city_onehot = pd.get_dummies(city_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
city_onehot['Neighborhood'] = city_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [city_onehot.columns[-1]] + list(city_onehot.columns[:-1])
city_onehot = city_onehot[fixed_columns]

print('Dimensions of one hot encoded dataframe: {}'.format(city_onehot.shape))

city_grouped = city_onehot.groupby('Neighborhood').mean().reset_index()

print('Dimensions of one hot encoded dataframe grouped by Neighborhood: {}'.format(city_grouped.shape))

Dimensions of one hot encoded dataframe: (1360, 200)
Dimensions of one hot encoded dataframe grouped by Neighborhood: (98, 200)


#### b) Let's print each neighborhood along with the top 5 most common venues

In [14]:
num_top_venues = 5

for hood in city_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = city_grouped[city_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0          Bar  0.08
1         Café  0.04
2  Pizza Place  0.04
3  Coffee Shop  0.04
4         Park  0.04


----Agincourt----
               venue  freq
0             Lounge  0.25
1     Sandwich Place  0.25
2       Skating Rink  0.25
3     Breakfast Spot  0.25
4  Accessories Store  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                      venue  freq
0                      Park  0.67
1                Playground  0.33
2         Accessories Store  0.00
3                 Nightclub  0.00
4  Mediterranean Restaurant  0.00


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0         Grocery Store  0.22
1  Fast Food Restaurant  0.11
2           Pizza Place  0.11
3              Pharmacy  0.11
4            Beer Store  0.11


----Alderwood, Long Branch----
          venue  freq
0          Pool  0.14
1          

                venue  freq
0      Discount Store  0.50
1          Hobby Shop  0.25
2  Chinese Restaurant  0.25
3          Poke Place  0.00
4               Plaza  0.00


----East Toronto----
               venue  freq
0  Convenience Store  0.33
1        Coffee Shop  0.33
2               Park  0.33
3  Accessories Store  0.00
4    Organic Grocery  0.00


----Emery, Humberlea----
                        venue  freq
0  Construction & Landscaping  0.33
1              Baseball Field  0.33
2      Furniture / Home Store  0.33
3           Accessories Store  0.00
4                   Nightclub  0.00


----Fairview, Henry Farm, Oriole----
              venue  freq
0    Clothing Store  0.23
1     Women's Store  0.05
2            Bakery  0.05
3  Toy / Game Store  0.05
4        Food Court  0.05


----First Canadian Place, Underground city----
         venue  freq
0         Café  0.08
1  Coffee Shop  0.06
2          Bar  0.06
3   Restaurant  0.06
4          Gym  0.04


----Flemingdon Park, Don Mills S

                      venue  freq
0         Convenience Store   0.5
1                Playground   0.5
2         Accessories Store   0.0
3                 Nightclub   0.0
4  Mediterranean Restaurant   0.0


----St. James Town----
         venue  freq
0         Café  0.08
1   Restaurant  0.08
2  Coffee Shop  0.06
3     Beer Bar  0.04
4       Bakery  0.04


----Stn A PO Boxes 25 The Esplanade----
                venue  freq
0          Restaurant  0.08
1                Café  0.04
2              Bakery  0.04
3      Farmers Market  0.04
4  Seafood Restaurant  0.04


----Studio District----
                 venue  freq
0                 Café  0.12
1  American Restaurant  0.06
2            Gastropub  0.06
3          Coffee Shop  0.06
4               Bakery  0.06


----The Annex, North Midtown, Yorkville----
          venue  freq
0          Café  0.16
1   Pizza Place  0.11
2   Coffee Shop  0.11
3  Burger Joint  0.05
4   Cheese Shop  0.05


----The Beaches----
               venue  freq
0       

#### c) Let's put that into a pandas dataframe
First, let's write a function to sort the venues in descending order.

In [15]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [16]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = city_grouped['Neighborhood']

for ind in np.arange(city_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Bar,Coffee Shop,Concert Hall,Park,Pizza Place,Thai Restaurant,Breakfast Spot,Gym,Café,American Restaurant
1,Agincourt,Lounge,Skating Rink,Sandwich Place,Breakfast Spot,Doner Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Fast Food Restaurant,Beer Store,Fried Chicken Joint,Sandwich Place,Coffee Shop,Pharmacy,Pizza Place,Ethiopian Restaurant,Electronics Store
4,"Alderwood, Long Branch",Pool,Skating Rink,Pizza Place,Pub,Pharmacy,Dance Studio,Gym,Gay Bar,Eastern European Restaurant,Drugstore


### 3. Cluster Neighborhoods
Run k-means to cluster the neighborhood into 5 clusters.

In [17]:
# set number of clusters
kclusters = 5

city_grouped_clustering = city_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(city_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [18]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

city_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
city_merged = city_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

# drop rows with no assigned clusters
city_merged.dropna(inplace=True)

city_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,1.0,Print Shop,Fast Food Restaurant,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,1.0,History Museum,Bar,Yoga Studio,Donut Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Breakfast Spot,Pizza Place,Electronics Store,Mexican Restaurant,Yoga Studio,Doner Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Insurance Office,Korean Restaurant,Doner Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Athletics & Sports,Bakery,Caribbean Restaurant,Thai Restaurant,Yoga Studio,Donut Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant


Finally, let's visualize the resulting clusters

In [19]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(city_merged['Latitude'], city_merged['Longitude'], city_merged['Neighborhood'], city_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    cluster = int(cluster)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 4. Examine Clusters

#### Cluster 1

In [20]:
city_merged.loc[city_merged['Cluster Labels'] == 0, city_merged.columns[[1] + [2] + list(range(5, city_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,Park,Playground,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
23,North York,York Mills West,0.0,Electronics Store,Park,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant
30,North York,"CFB Toronto, Downsview East",0.0,Park,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
40,East York,East Toronto,0.0,Convenience Store,Coffee Shop,Park,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
44,Central Toronto,Lawrence Park,0.0,Park,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
50,Downtown Toronto,Rosedale,0.0,Park,Playground,Trail,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
74,York,Caledonia-Fairbanks,0.0,Park,Market,Women's Store,Doner Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
98,York,Weston,0.0,Convenience Store,Park,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
100,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",0.0,Pizza Place,Park,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 2

In [21]:
city_merged.loc[city_merged['Cluster Labels'] == 1, city_merged.columns[[1] + [2] + list(range(5, city_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,"Rouge, Malvern",1.0,Print Shop,Fast Food Restaurant,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore
1,Scarborough,"Highland Creek, Rouge Hill, Port Union",1.0,History Museum,Bar,Yoga Studio,Donut Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store
2,Scarborough,"Guildwood, Morningside, West Hill",1.0,Breakfast Spot,Pizza Place,Electronics Store,Mexican Restaurant,Yoga Studio,Doner Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant
3,Scarborough,Woburn,1.0,Coffee Shop,Insurance Office,Korean Restaurant,Doner Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
4,Scarborough,Cedarbrae,1.0,Hakka Restaurant,Athletics & Sports,Bakery,Caribbean Restaurant,Thai Restaurant,Yoga Studio,Donut Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant
6,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",1.0,Discount Store,Chinese Restaurant,Hobby Shop,Doner Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
7,Scarborough,"Clairlea, Golden Mile, Oakridge",1.0,Bakery,Soccer Field,Park,Yoga Studio,Doner Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
9,Scarborough,"Birch Cliff, Cliffside West",1.0,Skating Rink,Café,Yoga Studio,Doner Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
10,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",1.0,Indian Restaurant,Brewery,Latin American Restaurant,Vietnamese Restaurant,Pet Store,Chinese Restaurant,Doner Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant
11,Scarborough,"Maryvale, Wexford",1.0,Shopping Mall,Middle Eastern Restaurant,Auto Garage,Bakery,Breakfast Spot,Yoga Studio,Drugstore,Fast Food Restaurant,Farmers Market,Falafel Restaurant


#### Cluster 3

In [22]:
city_merged.loc[city_merged['Cluster Labels'] == 2, city_merged.columns[[1] + [2] + list(range(5, city_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,Scarborough Village,2.0,Convenience Store,Playground,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
48,Central Toronto,"Moore Park, Summerhill East",2.0,Playground,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
73,York,Humewood-Cedarvale,2.0,Playground,Trail,Yoga Studio,Dog Run,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 4

In [23]:
city_merged.loc[city_merged['Cluster Labels'] == 3, city_merged.columns[[1] + [2] + list(range(5, city_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",3.0,American Restaurant,Yoga Studio,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 5 

In [24]:
city_merged.loc[city_merged['Cluster Labels'] == 4, city_merged.columns[[1] + [2] + list(range(5, city_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Etobicoke,"Humber Bay, King's Mill Park, Kingsway Park So...",4.0,Baseball Field,Yoga Studio,Doner Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Find number of venues in each cluster

In [25]:
print(city_venues.merge(city_merged,left_on=['Neighborhood'],right_on=['Neighborhood']).groupby('Cluster Labels')['Cluster Labels'].count())

Cluster Labels
0.0      22
1.0    1331
2.0       5
3.0       1
4.0       1
Name: Cluster Labels, dtype: int64


In [28]:
city_merged.loc[city_merged['Cluster Labels'] == 1].groupby('1st Most Common Venue').count()

Unnamed: 0_level_0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1st Most Common Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Asian Restaurant,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Bakery,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Bar,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Breakfast Spot,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Burger Joint,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Café,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12
Caribbean Restaurant,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Chinese Restaurant,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Clothing Store,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
Coffee Shop,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5


In [None]:
venue_details = getVenueDetails(city_venues['Venue ID'][0])
venue_details

#### c) Create function to fetch venue details based on Foursquare VENUE_ID

In [None]:
def getVenueDetails(venue_id):
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}'.format(
        venue_id,
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION)
    
    # make the GET request
    results = requests.get(url).json()["response"]['venue']
    
    return(results)

In [None]:
venue_details['rating']