In [7]:
#Web scraping tutorial
# https://simpleanalytical.com/how-to-web-scrape-wikipedia-python-urllib-beautiful-soup-pandas
    
# import the library we use to open URLs
import urllib.request

# specify which URL/web page we are going to be scraping
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# open the url using urllib.request and put the HTML into the page variable
page = urllib.request.urlopen(url)

# import the BeautifulSoup library so we can parse HTML and XML documents
from bs4 import BeautifulSoup

# parse the HTML from our URL into the BeautifulSoup parse tree format
soup = BeautifulSoup(page, "lxml")

# use the 'find_all' function to bring back all instances of the 'table' tag in the HTML and store in 'all_tables' variable
all_tables=soup.find_all("table")
# all_tables

right_table=soup.find('table', class_='wikitable sortable')
# right_table

#Import the data from the table 
A=[]
B=[]
C=[]

for row in right_table.findAll('tr'):   #tr tag specifies a row in HTML 
    cells=row.findAll('td')             #td tag defines the cell of the table
    if len(cells) == 3 and cells[1].find(text = True) != 'Not assigned':
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

# print(A)
# print(B)
# print(C)

import pandas as pd
import numpy as np

#Dataframe consists of three columns
df = pd.DataFrame(A, columns = ['Postal Code'])
df['Borough'] = B
df['Neighborhood'] = C
df.shape #(180,3)

(180, 3)

In [8]:
#Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df.replace('Not assigned',np.nan, regex=True)
df.dropna(subset=["Borough"], axis=0, inplace=True)
df = df.reset_index(drop=True)

#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

#Checking to see if there are any with Neighborhood as 'Not assigned'; there are zero records
df[df['Neighborhood'] == 'Not Assigned'] 
df['Neighborhood'].isnull().sum()

#If there would be any Neighborhood with 'Not assigned', then replace with Borough value
df['Neighborhood']=df['Neighborhood'].replace('Not assigned', df['Borough'])

# df.head()
df.shape

(103, 3)

In [11]:
import pandas as pd
postal_code = pd.read_csv("C:/Users/siris/Downloads/Geospatial_Coordinates.csv")
postal_code.head()

!pip install geocoder

#Using the Geocoder Python package
import geocoder

#define the geocoder function
def get_geocoder(postal_code_from_df):
    lat_lng_coords = None  # initialize your variable to None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
          g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code_from_df))
          lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude, longitude

# create dataframe with latitude and longitude
post_TorontoNeigh = df
post_TorontoNeigh['Latitude'], post_TorontoNeigh['Longitude'] = zip(*post_TorontoNeigh['Postal Code'].apply(get_geocoder))
post_TorontoNeigh.head()



Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752935,-79.335641
1,M4A,North York,Victoria Village,43.728102,-79.31189
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939


In [12]:
#Working with Borough containing 'Toronto'
Toronto = post_TorontoNeigh[post_TorontoNeigh['Borough'].str.contains('Toronto', regex=False)]
Toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657491,-79.377529
15,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
19,M4E,East Toronto,The Beaches,43.678148,-79.295349


In [1]:
# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 

address = 'Downtown Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of DowntownToronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of DowntownToronto are 43.6541737, -79.38081164513409.


In [4]:
!conda install -c conda-forge folium=0.5.0 --yes 

import folium # map rendering library

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\siris\Anaconda3

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         713 KB

The following NEW packages will be INSTALLED:

  altair             conda-forge/noarch::altair-4.1.0-py_1
  branca             conda-forge/noarch::branca-0.4.1-py_0
  folium             conda-

In [13]:
# create map of Manhattan using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [14]:
# Define Foursquare Credentials and Version
CLIENT_ID = 'REX4ZUBTSNHVOUKTOVTXCOMWMXNSX0SKERFHZ5KDRGG5VB4Z' 
CLIENT_SECRET = 'JIQF10SJ3HXWOUQL4SBUVU2OWPOZW3SK5W1OETUUZFGABERL' 
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: REX4ZUBTSNHVOUKTOVTXCOMWMXNSX0SKERFHZ5KDRGG5VB4Z
CLIENT_SECRET:JIQF10SJ3HXWOUQL4SBUVU2OWPOZW3SK5W1OETUUZFGABERL


In [33]:
#Get the neighborhood name

# Toronto.drop('index', axis=1, inplace=True)
Toronto.loc[3, 'Neighborhood']

'St. James Town\n'

In [34]:
# Get the neighborhood's latitude and longitude values

neighborhood_latitude = Toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Toronto.loc[3, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of St. James Town
 are 43.65096410900003, -79.35304116399999.


In [35]:
# Now, let's get the top 100 venues that are in St. James Town within a radius of 500 meters.
# First, let's create the GET request URL. Name your URL url.


LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=REX4ZUBTSNHVOUKTOVTXCOMWMXNSX0SKERFHZ5KDRGG5VB4Z&client_secret=JIQF10SJ3HXWOUQL4SBUVU2OWPOZW3SK5W1OETUUZFGABERL&v=20180605&ll=43.65096410900003,-79.35304116399999&radius=500&limit=100'

In [37]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Send the GET request and examine the resutls
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ee99e80d72a4a1a20026af9'},
 'response': {'headerLocation': 'Downtown Toronto',
  'headerFullLocation': 'Downtown Toronto, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 27,
  'suggestedBounds': {'ne': {'lat': 43.65546411350003,
    'lng': -79.34683350482591},
   'sw': {'lat': 43.646464104500026, 'lng': -79.35924882317407}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '57e0745a498ea809dbf75f68',
       'name': 'Souk Tabule',
       'location': {'address': '494 Front St E',
        'crossStreet': 'at Bayview',
        'lat': 43.65375556880743,
        'lng': -79.35439006096168,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65375556880743,
          'lng': -79.35439006096168}],
      

In [38]:
# From the Foursquare lab in the previous module, we know that all the information is in the items key. 
# Before we proceed, let's borrow the get_category_type function from the Foursquare lab.

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [39]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Souk Tabule,Mediterranean Restaurant,43.653756,-79.35439
1,Young Centre for the Performing Arts,Performing Arts Venue,43.650825,-79.357593
2,Cluny Bistro & Boulangerie,French Restaurant,43.650565,-79.357843
3,BATLgrounds,Athletics & Sports,43.647088,-79.351306
4,SOMA chocolatemaker,Chocolate Shop,43.650622,-79.358127


In [40]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

27 venues were returned by Foursquare.


# Explore Neighborhoods in Toronto

In [42]:
# Let's create a function to repeat the same process to all the neighborhoods in Toronto

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [43]:
#  code to run the above function on each neighborhood and create a new dataframe called Toronto_venues

Toronto_venues = getNearbyVenues(names=Toronto['Neighborhood'],
                                   latitudes=Toronto['Latitude'],
                                   longitudes=Toronto['Longitude']
                                  )

Regent Park, Harbourfront

Queen's Park, Ontario Provincial Government

Garden District, Ryerson

St. James Town

The Beaches

Berczy Park

Central Bay Street

Christie

Richmond, Adelaide, King

Dufferin, Dovercourt Village

Harbourfront East, Union Station, Toronto Islands

Little Portugal, Trinity

The Danforth West, Riverdale

Toronto Dominion Centre, Design Exchange

Brockton, Parkdale Village, Exhibition Place

India Bazaar, The Beaches West

Commerce Court, Victoria Hotel

Studio District

Lawrence Park

Roselawn

Davisville North

Forest Hill North & West, Forest Hill Road Park

High Park, The Junction South

North Toronto West,  Lawrence Park

The Annex, North Midtown, Yorkville

Parkdale, Roncesvalles

Davisville

University of Toronto, Harbord

Runnymede, Swansea

Moore Park, Summerhill East

Kensington Market, Chinatown, Grange Park

Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park

CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay

In [44]:
# Let's check the size of the resulting dataframe
print(Toronto_venues.shape)
Toronto_venues.head()

(1595, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.650964,-79.353041,Souk Tabule,43.653756,-79.35439,Mediterranean Restaurant
1,"Regent Park, Harbourfront",43.650964,-79.353041,Young Centre for the Performing Arts,43.650825,-79.357593,Performing Arts Venue
2,"Regent Park, Harbourfront",43.650964,-79.353041,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant
3,"Regent Park, Harbourfront",43.650964,-79.353041,BATLgrounds,43.647088,-79.351306,Athletics & Sports
4,"Regent Park, Harbourfront",43.650964,-79.353041,SOMA chocolatemaker,43.650622,-79.358127,Chocolate Shop


In [45]:
# Let's check how many venues were returned for each neighborhood
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,66,66,66,66,66,66
"Brockton, Parkdale Village, Exhibition Place",44,44,44,44,44,44
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",100,100,100,100,100,100
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",65,65,65,65,65,65
Central Bay Street,56,56,56,56,56,56
Christie,12,12,12,12,12,12
Church and Wellesley,87,87,87,87,87,87
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,27,27,27,27,27,27
Davisville North,5,5,5,5,5,5


In [46]:
# Let's find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 230 uniques categories.


# Analyze Each Neighborhood

In [48]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [49]:
Toronto_onehot.shape

(1595, 230)

In [50]:
# Let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Berczy Park,0.015152,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.022727,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.017857,0.017857,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.011494,0.0,0.011494,0.011494,0.0,0.0,0.011494,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0
7,"Commerce Court, Victoria Hotel",0.01,0.0,0.0,0.04,0.01,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
Toronto_grouped.shape

(39, 230)

In [52]:
# Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park
----
                venue  freq
0         Coffee Shop  0.11
1          Restaurant  0.03
2              Bakery  0.03
3        Cocktail Bar  0.03
4  Seafood Restaurant  0.03


----Brockton, Parkdale Village, Exhibition Place
----
                    venue  freq
0             Coffee Shop  0.07
1                    Café  0.07
2             Pizza Place  0.05
3  Thrift / Vintage Store  0.05
4                   Diner  0.05


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto
----
              venue  freq
0       Coffee Shop  0.07
1             Hotel  0.05
2        Restaurant  0.03
3  Asian Restaurant  0.03
4              Café  0.03


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
----
                  venue  freq
0                  Café  0.06
1           Coffee Shop  0.06
2                  Park  0.05
3  Gym / Fitness Center  0.05
4     French Restaurant  0.05


----Central

In [53]:
# Let's put that into a pandas dataframe
# First, let's write a function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [54]:
# Now let's create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Bakery,Cheese Shop,Breakfast Spot,Lounge,Restaurant,Cocktail Bar,Café,Beer Bar,Seafood Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Gift Shop,Diner,Pizza Place,Thrift / Vintage Store,Japanese Restaurant,Brewery,French Restaurant,Indian Restaurant
2,"Business reply mail Processing Centre, South C...",Coffee Shop,Hotel,Taco Place,Café,Japanese Restaurant,Restaurant,Asian Restaurant,Mediterranean Restaurant,Steakhouse,Sushi Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Café,Coffee Shop,Park,Gym / Fitness Center,French Restaurant,Italian Restaurant,Bar,Speakeasy,Restaurant,Lounge
4,Central Bay Street,Coffee Shop,Sandwich Place,Middle Eastern Restaurant,Clothing Store,Plaza,Bubble Tea Shop,Chinese Restaurant,Japanese Restaurant,Spa,Fast Food Restaurant


 # Cluster Neighborhoods

In [70]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 10

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([6, 6, 6, 6, 6, 5, 6, 6, 6, 8])

In [73]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'ClusterLabels', kmeans.labels_)

TorontoMerged = Toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
TorontoMerged = TorontoMerged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

TorontoMerged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041,6,0,Pub,Café,Trail,Athletics & Sports,Cosmetics Shop,Thai Restaurant,Mediterranean Restaurant,Distribution Center,Mexican Restaurant,Food Truck
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939,6,0,Coffee Shop,Sushi Restaurant,Café,Yoga Studio,College Auditorium,Middle Eastern Restaurant,Distribution Center,Fried Chicken Joint,Italian Restaurant,Discount Store
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657491,-79.377529,6,0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Bar,Cosmetics Shop,Hotel,Café,Italian Restaurant,Movie Theater,Bakery
3,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554,6,0,Coffee Shop,Café,Cocktail Bar,American Restaurant,Cosmetics Shop,Gastropub,Gym,Restaurant,Clothing Store,Hotel
4,M4E,East Toronto,The Beaches,43.678148,-79.295349,6,0,Health Food Store,Trail,Pub,Church,Diner,Farm,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store


In [75]:
# let's visualize the resulting clusters

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(TorontoMerged['Latitude'], TorontoMerged['Longitude'], TorontoMerged['Neighborhood'], TorontoMerged['ClusterLabels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

#### Cluster 1

In [76]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 0, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Central Toronto,0,1,Garden,Gym Pool,Playground,Park,Women's Store,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store,Eastern European Restaurant


In [77]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 1, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Central Toronto,1,2,Park,Women's Store,Discount Store,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store,Eastern European Restaurant


In [78]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 2, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,East Toronto,2,0,Government Building,Night Market,Baseball Field,Women's Store,Discount Store,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Elementary School


In [79]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 3, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,West Toronto,3,0,Park,Metro Station,Gas Station,Women's Store,Discount Store,Farm,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store


In [80]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 4, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Central Toronto,4,4,Health & Beauty Service,IT Services,Diner,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store,Eastern European Restaurant


In [81]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 5, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Downtown Toronto,5,0,Grocery Store,Café,Park,Playground,Candy Store,Baby Store,Coffee Shop,Athletics & Sports,Eastern European Restaurant,Discount Store
9,West Toronto,5,0,Park,Grocery Store,Pharmacy,Café,Bus Line,Furniture / Home Store,Bank,Bakery,Athletics & Sports,Brazilian Restaurant
12,East Toronto,5,0,Park,Bus Line,Grocery Store,Business Service,Discount Store,Women's Store,Distribution Center,Farm,Falafel Restaurant,Ethiopian Restaurant
33,Downtown Toronto,5,1,Park,Playground,Grocery Store,Candy Store,Women's Store,Diner,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store


In [82]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 6, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,6,0,Pub,Café,Trail,Athletics & Sports,Cosmetics Shop,Thai Restaurant,Mediterranean Restaurant,Distribution Center,Mexican Restaurant,Food Truck
1,Downtown Toronto,6,0,Coffee Shop,Sushi Restaurant,Café,Yoga Studio,College Auditorium,Middle Eastern Restaurant,Distribution Center,Fried Chicken Joint,Italian Restaurant,Discount Store
2,Downtown Toronto,6,0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Bar,Cosmetics Shop,Hotel,Café,Italian Restaurant,Movie Theater,Bakery
3,Downtown Toronto,6,0,Coffee Shop,Café,Cocktail Bar,American Restaurant,Cosmetics Shop,Gastropub,Gym,Restaurant,Clothing Store,Hotel
4,East Toronto,6,0,Health Food Store,Trail,Pub,Church,Diner,Farm,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store
5,Downtown Toronto,6,0,Coffee Shop,Bakery,Cheese Shop,Breakfast Spot,Lounge,Restaurant,Cocktail Bar,Café,Beer Bar,Seafood Restaurant
6,Downtown Toronto,6,0,Coffee Shop,Sandwich Place,Middle Eastern Restaurant,Clothing Store,Plaza,Bubble Tea Shop,Chinese Restaurant,Japanese Restaurant,Spa,Fast Food Restaurant
8,Downtown Toronto,6,0,Coffee Shop,Café,Restaurant,Clothing Store,Hotel,Sushi Restaurant,Deli / Bodega,Thai Restaurant,Salad Place,Gym
11,West Toronto,6,0,Cocktail Bar,Coffee Shop,Bar,Wine Bar,Restaurant,Asian Restaurant,Yoga Studio,Miscellaneous Shop,Seafood Restaurant,Record Shop
13,Downtown Toronto,6,0,Coffee Shop,Hotel,Café,Restaurant,Japanese Restaurant,Italian Restaurant,American Restaurant,Seafood Restaurant,Salad Place,Bakery


In [83]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 7, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,7,0,Harbor / Marina,Theme Park,Farm,Park,Diner,Falafel Restaurant,Ethiopian Restaurant,Elementary School,Electronics Store,Eastern European Restaurant


In [84]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 8, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,Central Toronto,8,0,Breakfast Spot,Park,Gym,Food & Drink Shop,Department Store,Women's Store,Distribution Center,Farm,Falafel Restaurant,Ethiopian Restaurant


In [85]:
TorontoMerged.loc[TorontoMerged['ClusterLabels'] == 9, TorontoMerged.columns[[1] + list(range(5, TorontoMerged.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,9,3,Bus Line,Bus Stop,Swim School,Women's Store,Fish & Chips Shop,Farmers Market,Farm,Falafel Restaurant,Ethiopian Restaurant,Elementary School
