# Exploring and Clustring Neighborhoods in Toronto Assignment week 3

In [42]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


### Scraping data using panda

In [43]:
#importing data
#!conda install -c conda-forge lxml --yes # uncomment this line if you haven't completed the Foursquare API lab

raw_dt = pd.read_html(r'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')


### Creating a dataframe from scraped data

In [44]:
neighbor = pd.DataFrame(raw_dt[0])
neighbor.head()


Unnamed: 0.1,Unnamed: 0,Postal Code,Borough,Neighborhood
0,0,M1A,Not assigned,Not assigned
1,1,M2A,Not assigned,Not assigned
2,2,M3A,North York,Parkwoods
3,3,M4A,North York,Victoria Village
4,4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Data Wrangling

In [45]:
#Data Wrangling
neighbor = neighbor[neighbor.Borough != 'Not assigned']
#neighbor = neighbor[neighbor.Neighborhood != 'Not assigned']
neighbor = neighbor.reset_index(drop = True)
neighbor.drop('Unnamed: 0', axis = 1, inplace = True)
neighbor

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### cleaning dataframe and adding lat , long columns

In [46]:
lst = []
for pc,bo,neigh in zip(neighbor.iloc[:,0],neighbor.Borough,neighbor.Neighborhood):
    if ',' in neigh:
        var = neigh.split(', ')
        lst.append({'Postal Code' : pc,'Borough' : bo, 'Neighborhood' : var[0]})
    else:
        lst.append({'Postal Code' : pc,'Borough' : bo, 'Neighborhood' : neigh})

neihgbor = pd.DataFrame(lst)
neighbor['lat'] = ''
neighbor['log'] = ''
neighbor

Unnamed: 0,Postal Code,Borough,Neighborhood,lat,log
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",,
3,M6A,North York,"Lawrence Manor, Lawrence Heights",,
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",,
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",,
6,M1B,Scarborough,"Malvern, Rouge",,
7,M3B,North York,Don Mills,,
8,M4B,East York,"Parkview Hill, Woodbine Gardens",,
9,M5B,Downtown Toronto,"Garden District, Ryerson",,


### Getting lat, long values using geolocator and adding those to dataframe

In [47]:
geolocator = Nominatim(user_agent="toranto_explorer")
print('running')
lst = []
count =  0
for PS,neighborhood, Borough,  in zip(neighbor.iloc[:,0],neihgbor.iloc[:,2],neighbor.iloc[:,1]):
    
    address = str(neighborhood) + ', '+ str(Borough) + ', Toronto, Ontario'
    #print(address)
    location = geolocator.geocode(address)
    try:
        latitude = location.latitude
        longitude = location.longitude
    except:
        latitude = None
        longitude = None
    count += 1
    lst.append({'Postal Code' : PS,'Borough' : Borough, 'Neighborhood' : neighborhood, 'lat' : latitude, 'log' : longitude})
neighbor = pd.DataFrame(lst)
print(count)
neighbor

running
103


Unnamed: 0,Postal Code,Borough,Neighborhood,lat,log
0,M3A,North York,Parkwoods,43.7588,-79.320197
1,M4A,North York,Victoria Village,43.732658,-79.311189
2,M5A,Downtown Toronto,Regent Park,,
3,M6A,North York,Lawrence Manor,43.722079,-79.437507
4,M7A,Downtown Toronto,Queen's Park,43.663217,-79.38629
5,M9A,Etobicoke,Islington Avenue,43.679484,-79.538909
6,M1B,Scarborough,Malvern,43.809196,-79.221701
7,M3B,North York,Don Mills,43.775347,-79.345944
8,M4B,East York,Parkview Hill,,
9,M5B,Downtown Toronto,Garden District,43.660483,-79.383602


### Sorting data according to postal code

In [48]:
neighbor_sorted = neighbor.sort_values('Postal Code',ascending = True)
neighbor_sorted = neighbor_sorted.reset_index(drop= True)
lst = neighbor_sorted[neighbor_sorted['lat'].isnull()].index.tolist()
neighbor_sorted


Unnamed: 0,Postal Code,Borough,Neighborhood,lat,log
0,M1B,Scarborough,Malvern,43.809196,-79.221701
1,M1C,Scarborough,Rouge Hill,43.780271,-79.130499
2,M1E,Scarborough,Guildwood,43.755225,-79.198229
3,M1G,Scarborough,Woburn,43.759824,-79.225291
4,M1H,Scarborough,Cedarbrae,43.756467,-79.226692
5,M1J,Scarborough,Scarborough Village,43.743742,-79.211632
6,M1K,Scarborough,Kennedy Park,43.724878,-79.253969
7,M1L,Scarborough,Golden Mile,43.727841,-79.287622
8,M1M,Scarborough,Cliffside,43.71117,-79.248177
9,M1N,Scarborough,Birch Cliff,43.691805,-79.264494


### Importing the given geospatial data file and sorting it

In [49]:
#using given csv file
neighbor_cord = pd.read_csv(r'Geospatial_Coordinates.csv')

neighbor_cord.sort_values('Postal Code',ascending = True)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### filling the None values in the dataframe from the data given by geospatial file

In [50]:
for i in lst:
    neighbor_sorted.iloc[i,3] = neighbor_cord.iloc[i,1]
    neighbor_sorted.iloc[i,4] = neighbor_cord.iloc[i,2]
toronto_neighbor = neighbor_sorted
toronto_neighbor

Unnamed: 0,Postal Code,Borough,Neighborhood,lat,log
0,M1B,Scarborough,Malvern,43.809196,-79.221701
1,M1C,Scarborough,Rouge Hill,43.780271,-79.130499
2,M1E,Scarborough,Guildwood,43.755225,-79.198229
3,M1G,Scarborough,Woburn,43.759824,-79.225291
4,M1H,Scarborough,Cedarbrae,43.756467,-79.226692
5,M1J,Scarborough,Scarborough Village,43.743742,-79.211632
6,M1K,Scarborough,Kennedy Park,43.724878,-79.253969
7,M1L,Scarborough,Golden Mile,43.727841,-79.287622
8,M1M,Scarborough,Cliffside,43.71117,-79.248177
9,M1N,Scarborough,Birch Cliff,43.691805,-79.264494


## Now we will Cluster the data

### We will use Foursquare API to get the nearby venues for the neighborhoods

In [51]:
# @hidden_cell

CLIENT_ID = '3TU4HP3LD4GXS13D210AWADDJ3I1NFBXH4MTQT1GPIUTVEUE' # your Foursquare ID
CLIENT_SECRET = '1ERY51VKPEHLJHL43IRSSEVVSWKTGEG0DDPUHUWWAIFBA2GY' # your Foursquare Secret
VERSION = '20200704' # Foursquare API version


### Now lets get top 100 in the radius of 500 meter venues for the first neighborhood in the dataframe

In [52]:
#define
RADIUS = 500
LIMIT = 100

#getting neghborhood data
neighborhood_name = toronto_neighbor.loc[0,'Neighborhood']
neighborhood_lat = toronto_neighbor.loc[0,'lat']
neighborhood_log = toronto_neighbor.loc[0,'log']

print(neighborhood_name,neighborhood_lat,neighborhood_log)

Malvern 43.8091955 -79.2217008


In [53]:
# foursquare url

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_lat, 
    neighborhood_log, 
    RADIUS, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=3TU4HP3LD4GXS13D210AWADDJ3I1NFBXH4MTQT1GPIUTVEUE&client_secret=1ERY51VKPEHLJHL43IRSSEVVSWKTGEG0DDPUHUWWAIFBA2GY&v=20200704&ll=43.8091955,-79.2217008&radius=500&limit=100'

In [54]:
import requests
#import json
result = requests.get(url).json()

In [55]:
print(result.keys())

dict_keys(['meta', 'response'])


In [56]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [57]:
venues = result['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

#filtering columns
filtered_columns = ['venue.name','venue.categories','venue.location.lat','venue.location.lng']
nearby_venues = nearby_venues[filtered_columns]

#get catogries name
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type,axis = 1)

nearby_venues.columns = [item.split('.')[1] for item in nearby_venues]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,location,location.1
0,Shoppers Drug Mart,Pharmacy,43.80961,-79.222729
1,Subway,Sandwich Place,43.806961,-79.221476
2,Pizza Hut,Pizza Place,43.808326,-79.220616
3,Malvern Arena,Skating Rink,43.808594,-79.216634
4,Pizza Pizza,Pizza Place,43.806613,-79.221243


### Total venues returned for one location

In [58]:
print('{} total nearby venues returned '.format(nearby_venues.shape[0]))

11 total nearby venues returned 


In [59]:
def get_nearby_venues(names,latitudes,longitudes,RADIUS = 500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            RADIUS, 
            LIMIT)
            
        # make the GET request
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
        except:
            continue
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

    

In [60]:
toronto = get_nearby_venues(names = toronto_neighbor['Neighborhood'],
                                 latitudes = toronto_neighbor['lat'],
                                 longitudes = toronto_neighbor['log'])


Malvern
Rouge Hill
Guildwood
Woburn
Cedarbrae
Scarborough Village
Kennedy Park
Golden Mile
Cliffside
Birch Cliff
Dorset Park
Wexford
Agincourt
Clarks Corners
Milliken
Steeles West
Upper Rouge
Hillcrest Village
Fairview
Bayview Village
York Mills
Willowdale
Willowdale
York Mills West
Willowdale
Parkwoods
Don Mills
Don Mills
Bathurst Manor
Northwood Park
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West
India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park
Summerhill West
Rosedale
St. James Town
Church and Wellesley
Regent Park
Garden District
St. James Town
Berczy Park
Central Bay Street
Richmond
Harbourfront East
Toronto Dominion Centre
Commerce Court
Bedford Park
Roselawn
Forest Hill North & West
The Annex
University of Toronto
Kensington Market
CN Tower
Stn A PO Boxes
First Canadian Place
Lawrence Manor
Glencairn
Humewood-Cedarvale
C

#### Lets check the size of data

In [61]:
print(toronto.shape)
toronto.head()

(2966, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Malvern,43.809196,-79.221701,Shoppers Drug Mart,43.80961,-79.222729,Pharmacy
1,Malvern,43.809196,-79.221701,Subway,43.806961,-79.221476,Sandwich Place
2,Malvern,43.809196,-79.221701,Pizza Hut,43.808326,-79.220616,Pizza Place
3,Malvern,43.809196,-79.221701,Malvern Arena,43.808594,-79.216634,Skating Rink
4,Malvern,43.809196,-79.221701,Pizza Pizza,43.806613,-79.221243,Pizza Place


#### Lets check how many venues were returned for each neighborhood

In [62]:
toronto.groupby(['Neighborhood']).count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,14,14,14,14,14,14
Alderwood,7,7,7,7,7,7
Bathurst Manor,7,7,7,7,7,7
Bayview Village,14,14,14,14,14,14
Bedford Park,2,2,2,2,2,2
Berczy Park,57,57,57,57,57,57
Birch Cliff,5,5,5,5,5,5
Brockton,17,17,17,17,17,17
Business reply mail Processing Centre,16,16,16,16,16,16
CN Tower,16,16,16,16,16,16


#### Lets check how many unique venues can be curated from all the venues

In [63]:
print('There are {} uniques categories.'.format(len(toronto['Venue Category'].unique())))

There are 285 uniques categories.


## Analyze each Neighborhood

In [64]:
#one hot encoding
toronto_onehot = pd.get_dummies(toronto[['Venue Category']],prefix ='', prefix_sep = '')

#adding Neighborhood to the one hot data frame
toronto_onehot['Neighborhood'] = toronto['Neighborhood']

#moving neighborhood column to first position
columns_set = ['Neighborhood'] + [item for item in toronto_onehot.columns if item != 'Neighborhood' ]
toronto_onehot = toronto_onehot[columns_set]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Stadium,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Big Box Store,Bike Shop,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Camera Store,Cantonese Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Stadium,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Costume Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Discount Store,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Court,Food Service,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hong Kong Restaurant,Hospital,Hotel,Hotel Bar,Housing Development,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Lake,Laser Tag,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Leather Goods Store,Library,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Luggage Store,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Outdoor Supply Store,Paintball Field,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pie Shop,Pilates Studio,Pizza Place,Plane,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Restaurant,River,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soup Place,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Tree,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Malvern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Malvern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Malvern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Malvern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Malvern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### New dataframe size

In [65]:
toronto_onehot.shape

(2966, 285)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [66]:
toronto_grouped = toronto_onehot.groupby(['Neighborhood']).mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Stadium,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Big Box Store,Bike Shop,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Camera Store,Cantonese Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Stadium,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Costume Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Discount Store,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Court,Food Service,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hong Kong Restaurant,Hospital,Hotel,Hotel Bar,Housing Development,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Lake,Laser Tag,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Leather Goods Store,Library,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Luggage Store,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Outdoor Supply Store,Paintball Field,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pie Shop,Pilates Studio,Pizza Place,Plane,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Restaurant,River,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soup Place,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Tree,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.214286,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0
1,Alderwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bathurst Manor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bedford Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
toronto_grouped.shape

(94, 285)

### Lets print top 5 venues for each neighborhood

In [68]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood'][:100]:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                  venue  freq
0    Chinese Restaurant  0.21
1         Train Station  0.07
2   Rental Car Location  0.07
3  Cantonese Restaurant  0.07
4                Bakery  0.07


----Alderwood----
            venue  freq
0     Pizza Place  0.29
1  Sandwich Place  0.14
2            Pool  0.14
3     Coffee Shop  0.14
4             Pub  0.14


----Bathurst Manor----
                venue  freq
0         Pizza Place  0.29
1          Bagel Shop  0.29
2              Bakery  0.14
3  Mexican Restaurant  0.14
4       Grocery Store  0.14


----Bayview Village----
                  venue  freq
0                  Bank  0.14
1       Bubble Tea Shop  0.07
2        Sandwich Place  0.07
3           Gas Station  0.07
4  Fast Food Restaurant  0.07


----Bedford Park----
                        venue  freq
0  Construction & Landscaping   0.5
1        Gym / Fitness Center   0.5
2           Accessories Store   0.0
3         Monument / Landmark   0.0
4                 Music Venue   0.0


#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [69]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [88]:
num_top_venues = 10

indicators = ['st','nd','rd']

columns = ['Neighborhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common place'.format(ind+1,indicators[ind]))
    except:
        columns.append('{}th Most Common place'.format(ind+1))

toronto_venues_sorted = pd.DataFrame(columns = columns)
toronto_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_venues_sorted.iloc[ind,1:] = return_most_common_venues(toronto_grouped.iloc[ind,:],num_top_venues)

toronto_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common place,2nd Most Common place,3rd Most Common place,4th Most Common place,5th Most Common place,6th Most Common place,7th Most Common place,8th Most Common place,9th Most Common place,10th Most Common place
0,Agincourt,Chinese Restaurant,Train Station,Bakery,Korean Restaurant,Shopping Mall,Vietnamese Restaurant,Hong Kong Restaurant,Cantonese Restaurant,Coffee Shop,Food Court
1,Alderwood,Pizza Place,Coffee Shop,Sandwich Place,Gym,Pub,Pool,Yoga Studio,Dog Run,Discount Store,Distribution Center
2,Bathurst Manor,Pizza Place,Bagel Shop,Grocery Store,Mexican Restaurant,Bakery,Farm,Farmers Market,Falafel Restaurant,Event Space,Electronics Store
3,Bayview Village,Bank,Fast Food Restaurant,Persian Restaurant,Outdoor Supply Store,Breakfast Spot,Pizza Place,Metro Station,Fish Market,Bubble Tea Shop,Burger Joint
4,Bedford Park,Gym / Fitness Center,Construction & Landscaping,Electronics Store,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant


## Clustering Neighborhoods 

Run k_means to cluster neighborhooda into 6 clusters

In [89]:
k_clusters = 5

toronto_clustered = toronto_grouped.drop('Neighborhood',1)

Kmeans = KMeans(n_clusters = k_clusters,random_state = 0).fit(toronto_clustered)

Kmeans.labels_[:10]

array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [90]:
#adding cluster labels to
toronto_venues_sorted.insert(0, 'Cluster Labels', Kmeans.labels_)
toronto_venues_sorted.head()

toronto_merged = toronto_neighbor

toronto_merged = toronto_merged.join(toronto_venues_sorted.set_index('Neighborhood'),on = 'Neighborhood')
toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,lat,log,Cluster Labels,1st Most Common place,2nd Most Common place,3rd Most Common place,4th Most Common place,5th Most Common place,6th Most Common place,7th Most Common place,8th Most Common place,9th Most Common place,10th Most Common place
0,M1B,Scarborough,Malvern,43.809196,-79.221701,0.0,Pharmacy,Fast Food Restaurant,Pizza Place,Grocery Store,Bubble Tea Shop,Sandwich Place,Skating Rink,Park,Concert Hall,Distribution Center
1,M1C,Scarborough,Rouge Hill,43.780271,-79.130499,4.0,Bus Line,Train Station,Yoga Studio,Discount Store,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,M1E,Scarborough,Guildwood,43.755225,-79.198229,4.0,Train Station,Baseball Field,Storage Facility,Yoga Studio,Dumpling Restaurant,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
3,M1G,Scarborough,Woburn,43.759824,-79.225291,0.0,Fast Food Restaurant,Discount Store,Bank,Coffee Shop,Pharmacy,Toy / Game Store,Paper / Office Supplies Store,Sandwich Place,Big Box Store,Beer Store
4,M1H,Scarborough,Cedarbrae,43.756467,-79.226692,0.0,Fast Food Restaurant,Coffee Shop,Grocery Store,Sandwich Place,Bar,Beer Store,Big Box Store,Shopping Mall,Liquor Store,Electronics Store


In [91]:
toronto_merged1 = toronto_merged.dropna()
toronto_merged1 = toronto_merged1.reset_index(drop = True)
toronto_merged1['Cluster Labels'] = toronto_merged1['Cluster Labels'].astype(int)

## Finally, lets visualize the clusters on Map

In [94]:
#getting toronto center latitude and longitude value
tor = geolocator.geocode("Toronto, Ontario, CA")
latitude = tor.latitude
longitude = tor.longitude

print(toronto_merged1.shape)
print(latitude,longitude)

(102, 16)
43.6534817 -79.3839347


In [105]:
#creating map instance
toronto_map_cluster1 = folium.Map(location = [latitude,longitude],zoom_start = 11)

#set colors for clusters
x = np.arange(k_clusters)
ys = [i + x + (i*x)**2 for i in range(k_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#adding marakers on the map
markers_color = []
for lat, lng, poi,cluster in zip(toronto_merged1['lat'],toronto_merged1['log'],toronto_merged1['Neighborhood'],toronto_merged1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster '+str(cluster),parse_html = True)
    folium.CircleMarker(
    location = [lat,lng],
    radius = 5,
    popup = label,
    color = rainbow[cluster-1],
    fill = True,
    fill_color = rainbow[cluster-1],
    fill_opacity = 0.7).add_to(toronto_map_cluster1)

toronto_map_cluster1

### Examining Clusters

#### Cluster 1

In [99]:
toronto_merged1.loc[toronto_merged1['Cluster Labels'] == 0, toronto_merged1.columns[[1] + list(range(5, toronto_merged1.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common place,2nd Most Common place,3rd Most Common place,4th Most Common place,5th Most Common place,6th Most Common place,7th Most Common place,8th Most Common place,9th Most Common place,10th Most Common place
0,Scarborough,0,Pharmacy,Fast Food Restaurant,Pizza Place,Grocery Store,Bubble Tea Shop,Sandwich Place,Skating Rink,Park,Concert Hall,Distribution Center
3,Scarborough,0,Fast Food Restaurant,Discount Store,Bank,Coffee Shop,Pharmacy,Toy / Game Store,Paper / Office Supplies Store,Sandwich Place,Big Box Store,Beer Store
4,Scarborough,0,Fast Food Restaurant,Coffee Shop,Grocery Store,Sandwich Place,Bar,Beer Store,Big Box Store,Shopping Mall,Liquor Store,Electronics Store
6,Scarborough,0,Fast Food Restaurant,Grocery Store,Asian Restaurant,Chinese Restaurant,Yoga Studio,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop
8,Scarborough,0,Pharmacy,Snack Place,Coffee Shop,Pizza Place,Breakfast Spot,Grocery Store,Pub,Sandwich Place,Dog Run,Discount Store
17,North York,0,Shopping Mall,Coffee Shop,Bank,Restaurant,Korean Restaurant,Sandwich Place,Pizza Place,Ice Cream Shop,Grocery Store,Pharmacy
18,North York,0,Mediterranean Restaurant,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Yoga Studio,Dumpling Restaurant,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant
19,North York,0,Bank,Fast Food Restaurant,Persian Restaurant,Outdoor Supply Store,Breakfast Spot,Pizza Place,Metro Station,Fish Market,Bubble Tea Shop,Burger Joint
28,North York,0,Pizza Place,Bagel Shop,Grocery Store,Mexican Restaurant,Bakery,Farm,Farmers Market,Falafel Restaurant,Event Space,Electronics Store
35,East York,0,Pizza Place,Bank,Pharmacy,Breakfast Spot,Gastropub,Gym / Fitness Center,Fast Food Restaurant,Athletics & Sports,Café,Intersection


#### Cluster 2

In [100]:
toronto_merged1.loc[toronto_merged1['Cluster Labels'] == 1, toronto_merged1.columns[[1] + list(range(5, toronto_merged1.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common place,2nd Most Common place,3rd Most Common place,4th Most Common place,5th Most Common place,6th Most Common place,7th Most Common place,8th Most Common place,9th Most Common place,10th Most Common place
5,Scarborough,1,Coffee Shop,Fast Food Restaurant,Chinese Restaurant,Pharmacy,Discount Store,Shopping Mall,Gym,Pub,Distribution Center,Dive Bar
7,Scarborough,1,Clothing Store,Sandwich Place,Furniture / Home Store,Kids Store,Hardware Store,Fast Food Restaurant,Shopping Plaza,Women's Store,Japanese Restaurant,Burrito Place
9,Scarborough,1,College Stadium,Skating Rink,General Entertainment,Farm,Café,Doner Restaurant,Distribution Center,Dive Bar,Doctor's Office,Dog Run
10,Scarborough,1,Accessories Store,Plaza,Fast Food Restaurant,Beer Store,Electronics Store,Chinese Restaurant,Asian Restaurant,Clothing Store,Indian Restaurant,Bowling Alley
11,Scarborough,1,Middle Eastern Restaurant,Grocery Store,Pizza Place,Coffee Shop,Asian Restaurant,Burger Joint,Smoke Shop,Fish Market,Restaurant,Rental Car Location
12,Scarborough,1,Chinese Restaurant,Train Station,Bakery,Korean Restaurant,Shopping Mall,Vietnamese Restaurant,Hong Kong Restaurant,Cantonese Restaurant,Coffee Shop,Food Court
13,Scarborough,1,Park,Convenience Store,Caribbean Restaurant,Gas Station,Yoga Studio,Dumpling Restaurant,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant
14,Scarborough,1,Chinese Restaurant,Japanese Restaurant,Bakery,Miscellaneous Shop,Asian Restaurant,Noodle House,Taiwanese Restaurant,Bank,Dessert Shop,Juice Bar
20,North York,1,Coffee Shop,Gym,Park,Thai Restaurant,Sandwich Place,Burrito Place,Food Court,Bus Station,Restaurant,French Restaurant
21,North York,1,Coffee Shop,Japanese Restaurant,Bank,Grocery Store,Sandwich Place,Gas Station,Korean Restaurant,Pharmacy,Fried Chicken Joint,Restaurant


#### Cluster 3

In [101]:
toronto_merged1.loc[toronto_merged1['Cluster Labels'] == 2, toronto_merged1.columns[[1] + list(range(5, toronto_merged1.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common place,2nd Most Common place,3rd Most Common place,4th Most Common place,5th Most Common place,6th Most Common place,7th Most Common place,8th Most Common place,9th Most Common place,10th Most Common place
15,Scarborough,2,Health & Beauty Service,Playground,Yoga Studio,Dumpling Restaurant,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop


#### Cluster 4

In [102]:
toronto_merged1.loc[toronto_merged1['Cluster Labels'] == 3, toronto_merged1.columns[[1] + list(range(5, toronto_merged1.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common place,2nd Most Common place,3rd Most Common place,4th Most Common place,5th Most Common place,6th Most Common place,7th Most Common place,8th Most Common place,9th Most Common place,10th Most Common place
16,Scarborough,3,Fast Food Restaurant,Park,Yoga Studio,Dumpling Restaurant,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop
29,North York,3,Park,Baseball Field,Yoga Studio,Eastern European Restaurant,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Electronics Store
94,Etobicoke,3,Park,Yoga Studio,Dumpling Restaurant,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
99,Etobicoke,3,Park,Yoga Studio,Dumpling Restaurant,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
100,Etobicoke,3,Park,Caribbean Restaurant,Baseball Field,Yoga Studio,Dumpling Restaurant,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop


#### Cluster 5

In [103]:
toronto_merged1.loc[toronto_merged1['Cluster Labels'] == 4, toronto_merged1.columns[[1] + list(range(5, toronto_merged1.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common place,2nd Most Common place,3rd Most Common place,4th Most Common place,5th Most Common place,6th Most Common place,7th Most Common place,8th Most Common place,9th Most Common place,10th Most Common place
1,Scarborough,4,Bus Line,Train Station,Yoga Studio,Discount Store,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,Scarborough,4,Train Station,Baseball Field,Storage Facility,Yoga Studio,Dumpling Restaurant,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant


### Thankyou for viewing this jupyter notebook.
### Made by Rahul Bansal for Applied Data Science Capstone course provided by IBM through Coursera