In [21]:
import pandas as pd
#libraries needed for webscrapping
import requests
from bs4 import BeautifulSoup

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_data = requests.get(url)


In [22]:
soup = BeautifulSoup(html_data.content, 'html.parser')

In [23]:
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"219b0089-02a0-4c0a-8975-c018874a2587","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":969510799,"wgRevisionId":969510799,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toron

In [24]:
#getting table from html
table = soup.findAll('table', {'class': 'sortable'})
print(len(table))
print()
print(table[0].prettify)

1

<bound method Tag.prettify of <table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned


In [25]:
#converting html into dataframe

table_rows = table[0].find_all('tr')
df = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    df.append(row)
columns = ['Postal Code', 'Borough', 'Neighborhood']   
data = pd.DataFrame(df, columns=columns)
data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,,,
1,M1A\n,Not assigned\n,Not assigned\n
2,M2A\n,Not assigned\n,Not assigned\n
3,M3A\n,North York\n,Parkwoods\n
4,M4A\n,North York\n,Victoria Village\n


In [26]:
#removing '\n'
data = data.replace('\n', '', regex=True)
data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,,,
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [27]:
#removing top row as it is None
data=data.reindex(data.index.drop(0)).reset_index(drop=True)
data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [28]:
#replacing 'not assigned' to nan, then removing nan
import numpy as np
data_new = data.replace('Not assigned', np.nan)
data_new.dropna(subset=['Borough'],axis=0, inplace=True)
data_new.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [29]:
#1st part assignment final
data_new.shape

(103, 3)

In [36]:
#getting geo data from csv file
geo_data = pd.read_csv('http://cocl.us/Geospatial_data')
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [37]:
# groupping two tables into one, 2nd part assignment final
df_new = data_new.merge(geo_data, left_on='Postal Code', right_on='Postal Code')
df_new.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [38]:
#getting boroughs containing word 'Toronto'
df_toronto = df_new[df_new['Borough'].astype(str).str.contains('Toronto')]
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [41]:
from geopy.geocoders import Nominatim 
!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.4.1               |             py_0          26 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    ------------------------------------------------------------
                       

In [43]:
#using geolocator library to get toronto coordinates
address = 'Toronto, ON, Canada'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [44]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [51]:
#choosing Downtown Toronto borough for further analysis
down_toronto = df_toronto[df_toronto['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
down_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [52]:
address = 'Downtown Toronto, Toronto, ON'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6563221, -79.3809161.


In [53]:
# create map of Downtown Toronto using latitude and longitude values
map_down = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(down_toronto['Latitude'], down_toronto['Longitude'], down_toronto['Borough'], down_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_down)  
    
map_down

In [54]:
CLIENT_ID = 'IIBLLQDI4NCI1BARSXHCFSTYEBPBAQWRJ1IZDPX5G5E2GBBU' # your Foursquare ID
CLIENT_SECRET = 'ZGJRL4X5CAXQ2QTNOFGUAHHXJST5R1HCCPY1EATTPYCINXNA' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IIBLLQDI4NCI1BARSXHCFSTYEBPBAQWRJ1IZDPX5G5E2GBBU
CLIENT_SECRET:ZGJRL4X5CAXQ2QTNOFGUAHHXJST5R1HCCPY1EATTPYCINXNA


In [57]:
#getting one of the neighborhoods in Downtown Toronto
down_toronto.loc[3, 'Neighborhood']

'St. James Town'

In [59]:
neighborhood_latitude = down_toronto.loc[3, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = down_toronto.loc[3, 'Longitude'] # neighborhood longitude value

neighborhood_name = down_toronto.loc[3, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of St. James Town are 43.6514939, -79.3754179.


In [60]:
# 20 venues in StJames town neighborhood within 200 m. radius
Limit = 20
radius = 200
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID,
                                                                                                                           CLIENT_SECRET,
                                                                                                                           VERSION, neighborhood_latitude,
                                                                                                                           neighborhood_longitude, radius, Limit)
url




'https://api.foursquare.com/v2/venues/explore?&client_id=IIBLLQDI4NCI1BARSXHCFSTYEBPBAQWRJ1IZDPX5G5E2GBBU&client_secret=ZGJRL4X5CAXQ2QTNOFGUAHHXJST5R1HCCPY1EATTPYCINXNA&v=20180605&ll=43.6514939,-79.3754179&radius=200&limit=20'

In [61]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f1e94b851d07f34bcaeadf5'},
 'response': {'headerLocation': 'St. Lawrence',
  'headerFullLocation': 'St. Lawrence, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 7,
  'suggestedBounds': {'ne': {'lat': 43.6532939018, 'lng': -79.37293481442669},
   'sw': {'lat': 43.6496938982, 'lng': -79.37790098557332}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '574ad72238fa943556d93b8e',
       'name': 'Gyu-Kaku Japanese BBQ',
       'location': {'address': '81 Church St',
        'crossStreet': 'at Adelaide St E',
        'lat': 43.651422275497914,
        'lng': -79.37504693687086,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.651422275497914,
          'lng': -79.37504693687086}],
        'dis

In [62]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [64]:
# extracting venue information

from pandas.io.json import json_normalize
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) 

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Gyu-Kaku Japanese BBQ,Japanese Restaurant,43.651422,-79.375047
1,Crepe TO,Creperie,43.650063,-79.374587
2,Terroni,Italian Restaurant,43.650927,-79.375602
3,Versus Coffee,Coffee Shop,43.651213,-79.375236
4,Pearl Diver,Gastropub,43.651481,-79.3736


In [65]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

7 venues were returned by Foursquare.


In [66]:
def getNearbyVenues(names, latitudes, longitudes, radius=200):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            Limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [67]:
# venues for all neighborhoods in Downtown Toronto

downtown_venues = getNearbyVenues(names=down_toronto['Neighborhood'],
                                   latitudes=down_toronto['Latitude'],
                                   longitudes=down_toronto['Longitude']
                                  )



Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


In [68]:
print('Downtown venues:', downtown_venues.shape)

Downtown venues: (230, 7)


In [71]:
downtown_venues.groupby('Neighborhood').count()
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 91 uniques categories.


In [72]:
# creating dataframe with all venue category types

down_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")
down_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 
fixed_columns = [down_onehot.columns[-1]] + list(down_onehot.columns[:-1])
down_onehot = down_onehot[fixed_columns]
down_onehot.head()

Unnamed: 0,Wine Bar,Adult Boutique,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Beer Bar,...,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [75]:
# analyzing each neighborhood 

downtown_group = down_onehot.groupby('Neighborhood').mean().reset_index()
downtown_group

Unnamed: 0,Neighborhood,Wine Bar,Adult Boutique,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,...,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Church and Wellesley,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.05
3,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0
4,"First Canadian Place, Underground city",0.0,0.0,0.05,0.05,0.0,0.0,0.05,0.0,0.0,...,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Garden District, Ryerson",0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,...,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0
6,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556,...,0.0,0.055556,0.0,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0
7,"Kensington Market, Chinatown, Grange Park",0.05,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
8,"Queen's Park, Ontario Provincial Government",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
9,"Regent Park, Harbourfront",0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [79]:
#function returning most common venues

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [80]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_group['Neighborhood']

for ind in np.arange(downtown_group.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_group.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"CN Tower, King and Spadina, Railway Lands, Har...",Performing Arts Venue,Vietnamese Restaurant,Coffee Shop,College Gym,College Rec Center,Colombian Restaurant,Comic Shop,Concert Hall,Creperie,Deli / Bodega
1,Central Bay Street,Coffee Shop,Sandwich Place,Smoothie Shop,Pharmacy,Café,Middle Eastern Restaurant,Bookstore,Italian Restaurant,Diner,Dessert Shop
2,Church and Wellesley,Burger Joint,Vietnamese Restaurant,Salon / Barbershop,Mexican Restaurant,Martial Arts Dojo,Japanese Restaurant,Italian Restaurant,Bubble Tea Shop,Breakfast Spot,Poke Place
3,"Commerce Court, Victoria Hotel",Café,Coffee Shop,Deli / Bodega,Restaurant,Hotel,Pub,Bookstore,Sandwich Place,Gym,Japanese Restaurant
4,"First Canadian Place, Underground city",Coffee Shop,Gym,Café,Restaurant,General Travel,Salad Place,Gluten-free Restaurant,Gym / Fitness Center,Deli / Bodega,Bookstore


In [81]:
# K means clustering

from sklearn.cluster import KMeans
kclusters = 3

downtown_clustering = downtown_group.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 1, 1, 1, 1, 1, 1, 2, 1], dtype=int32)

In [82]:
# add clustering labels

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_merged = down_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged.head() 

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,Breakfast Spot,Park,Furniture / Home Store,Bakery,Gym / Fitness Center,History Museum,Spa,Sandwich Place,Coffee Shop,Vietnamese Restaurant
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2.0,Thai Restaurant,Vietnamese Restaurant,Dessert Shop,Coffee Shop,College Gym,College Rec Center,Colombian Restaurant,Comic Shop,Concert Hall,Creperie
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1.0,Coffee Shop,Café,Music Venue,Plaza,Ramen Restaurant,Pizza Place,Burger Joint,Burrito Place,Diner,Sporting Goods Shop
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1.0,Gastropub,Camera Store,Poke Place,Creperie,Italian Restaurant,Japanese Restaurant,Coffee Shop,Deli / Bodega,College Gym,College Rec Center
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,,,,,,,,,,,


In [85]:
downtown_merged.drop('Postal Code', 1) #dropping postal code column

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,Breakfast Spot,Park,Furniture / Home Store,Bakery,Gym / Fitness Center,History Museum,Spa,Sandwich Place,Coffee Shop,Vietnamese Restaurant
1,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2.0,Thai Restaurant,Vietnamese Restaurant,Dessert Shop,Coffee Shop,College Gym,College Rec Center,Colombian Restaurant,Comic Shop,Concert Hall,Creperie
2,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1.0,Coffee Shop,Café,Music Venue,Plaza,Ramen Restaurant,Pizza Place,Burger Joint,Burrito Place,Diner,Sporting Goods Shop
3,Downtown Toronto,St. James Town,43.651494,-79.375418,1.0,Gastropub,Camera Store,Poke Place,Creperie,Italian Restaurant,Japanese Restaurant,Coffee Shop,Deli / Bodega,College Gym,College Rec Center
4,Downtown Toronto,Berczy Park,43.644771,-79.373306,,,,,,,,,,,
5,Downtown Toronto,Central Bay Street,43.657952,-79.387383,1.0,Coffee Shop,Sandwich Place,Smoothie Shop,Pharmacy,Café,Middle Eastern Restaurant,Bookstore,Italian Restaurant,Diner,Dessert Shop
6,Downtown Toronto,Christie,43.669542,-79.422564,,,,,,,,,,,
7,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,1.0,Coffee Shop,Steakhouse,Asian Restaurant,General Travel,Greek Restaurant,Opera House,Colombian Restaurant,Japanese Restaurant,Concert Hall,Hotel
8,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,1.0,Coffee Shop,Bank,New American Restaurant,Fried Chicken Joint,Bubble Tea Shop,Pizza Place,Plaza,Deli / Bodega,Gym,Bar
9,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,1.0,Coffee Shop,Restaurant,Deli / Bodega,Hotel,Pub,Bookstore,Café,Sandwich Place,Gym,Japanese Restaurant


In [86]:
downtown_merged.dropna(axis=0, inplace=True) #excluding any NA values

In [88]:
downtown_merged.shape

(16, 16)

In [91]:
#visualization of Downtown Toronto neighborhood in terms of clusters

import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters