## Neighborhoods in Vancouver : Data Preparing

### Import required Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import math

## Fetch Data
Fetch data of Postal code of Canada with code M

In [2]:
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_V'
response = requests.get(URL)
soup = BeautifulSoup(response.text,'html.parser')
table = soup.find('table').tbody

In [3]:
table

<tbody><tr>
<td valign="top" width="11.1%"><b>V1A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Kimberley,_British_Columbia" title="Kimberley, British Columbia">Kimberley</a></span>
</td>
<td valign="top" width="11.1%"><b>V2A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Penticton" title="Penticton">Penticton</a></span>
</td>
<td valign="top" width="11.1%"><b>V3A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Langley,_British_Columbia_(district_municipality)" title="Langley, British Columbia (district municipality)">Langley Township</a><br/>(Langley City)</span>
</td>
<td valign="top" width="11.1%"><b>V4A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Surrey,_British_Columbia" title="Surrey, British Columbia">Surrey</a><br/>Southwest</span>
</td>
<td valign="top" width="11.1%"><b>V5A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Burnaby" ti

Get rows in the table

In [4]:
rows = table.find_all('tr')
rows

[<tr>
 <td valign="top" width="11.1%"><b>V1A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Kimberley,_British_Columbia" title="Kimberley, British Columbia">Kimberley</a></span>
 </td>
 <td valign="top" width="11.1%"><b>V2A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Penticton" title="Penticton">Penticton</a></span>
 </td>
 <td valign="top" width="11.1%"><b>V3A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Langley,_British_Columbia_(district_municipality)" title="Langley, British Columbia (district municipality)">Langley Township</a><br/>(Langley City)</span>
 </td>
 <td valign="top" width="11.1%"><b>V4A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Surrey,_British_Columbia" title="Surrey, British Columbia">Surrey</a><br/>Southwest</span>
 </td>
 <td valign="top" width="11.1%"><b>V5A</b><br/><span style="font-size: smaller; line-height: 125%;"><a href="/wiki/Burnaby"

Get List of Columns in the table

In [5]:
len(rows)

20

## Clean and Prep Data

Clean and fetch relevant and required data like:
1. Ignore cells with a borough that is Not assigned
2. If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
3. If more than one neighborhood can exist in one postal code area then combined them into one row with the neighborhoods separated with a comma.

In [6]:
data = [] #(code, name, area)
for i in range(0,len(rows)):
    tds = rows[i].find_all('td')
    for td in tds:
        a = td.b.text
        temp = td.span.text.split('(')
        b = temp[0]
        if b.lower() == 'vancouver':
            c = temp[1].replace(' / ', ',').replace(')', '')   
            split_c = c.split(',')
            for temp_c in split_c:
                data.append((a, b, temp_c))
                print('(', a, ', ', b, ', ', temp_c, ')')

( V6A ,  Vancouver ,  Strathcona )
( V6A ,  Vancouver ,  Chinatown )
( V6A ,  Vancouver ,  Downtown Eastside )
( V6B ,  Vancouver ,  NE Downtown )
( V6B ,  Vancouver ,  Gastown )
( V6B ,  Vancouver ,  Harbour Centre )
( V6B ,  Vancouver ,  International Village )
( V6B ,  Vancouver ,  Victory Square )
( V6B ,  Vancouver ,  Yaletown )
( V6C ,  Vancouver ,  Waterfront )
( V6C ,  Vancouver ,  Coal Harbour )
( V6C ,  Vancouver ,  Canada Place )
( V6E ,  Vancouver ,  SE West End )
( V6E ,  Vancouver ,  Davie Village )
( V6G ,  Vancouver ,  NW West End )
( V6G ,  Vancouver ,  Stanley Park )
( V6H ,  Vancouver ,  West Fairview )
( V6H ,  Vancouver ,  Granville Island )
( V6H ,  Vancouver ,  NE Shaughnessy )
( V6J ,  Vancouver ,  NW Shaughnessy )
( V6J ,  Vancouver ,  East Kitsilano )
( V6J ,  Vancouver ,  Quilchena )
( V5K ,  Vancouver ,  North Hastings-Sunrise )
( V6K ,  Vancouver ,  Central Kitsilano )
( V6K ,  Vancouver ,  Greektown )
( V5L ,  Vancouver ,  North Grandview-Woodland )
( V6L 

In [7]:
len(data)

70

In [8]:
data[:3]

[('V6A', 'Vancouver', 'Strathcona'),
 ('V6A', 'Vancouver', 'Chinatown'),
 ('V6A', 'Vancouver', 'Downtown Eastside')]

Create DataFrame of the list created.

In [9]:
columns = ['Code', 'Name', 'Area']

In [10]:
postal_df = pd.DataFrame(data, columns=columns)
postal_df.head()

Unnamed: 0,Code,Name,Area
0,V6A,Vancouver,Strathcona
1,V6A,Vancouver,Chinatown
2,V6A,Vancouver,Downtown Eastside
3,V6B,Vancouver,NE Downtown
4,V6B,Vancouver,Gastown


Print the shape of DataFrame

In [11]:
postal_df.shape

(70, 3)

## Add coordinates for postal code

Download .csv file consisting of Geospatial Data for Postal Code

In [12]:
import numpy as np

In [13]:
from geopy.geocoders import Nominatim

In [14]:
lat_list = []
lon_list = []
for area in postal_df.Area.values:
    address = '{}, Vancouver,British Columbia, Canada'.format(area)

    geolocator = Nominatim(user_agent="BC_explorer", timeout=10)
    location = geolocator.geocode(address)
    if location != None:
        latitude = location.latitude
        longitude = location.longitude
        lat_list.append(latitude)
        lon_list.append(longitude)
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
    else:
        print('No location data on {}', address)
        lat_list.append(np.nan)
        lon_list.append(np.nan)

The geograpical coordinate of Strathcona, Vancouver,British Columbia, Canada are 49.279554, -123.0899788.
The geograpical coordinate of Chinatown, Vancouver,British Columbia, Canada are 49.2799809, -123.10408941422125.
The geograpical coordinate of Downtown Eastside, Vancouver,British Columbia, Canada are 49.2823992, -123.0994578.
The geograpical coordinate of NE Downtown, Vancouver,British Columbia, Canada are 49.283393, -123.1174563.
The geograpical coordinate of Gastown, Vancouver,British Columbia, Canada are 49.2836567, -123.1062358.
The geograpical coordinate of Harbour Centre, Vancouver,British Columbia, Canada are 49.28476745, -123.11206428918614.
The geograpical coordinate of International Village, Vancouver,British Columbia, Canada are 49.28021995, -123.10669595178601.
The geograpical coordinate of Victory Square, Vancouver,British Columbia, Canada are 49.2823247, -123.11012964839475.
The geograpical coordinate of Yaletown, Vancouver,British Columbia, Canada are 49.2763217, -1

In [15]:
postal_df['Latitude'] = lat_list
postal_df['Longitude'] = lon_list
postal_df

Unnamed: 0,Code,Name,Area,Latitude,Longitude
0,V6A,Vancouver,Strathcona,49.279554,-123.089979
1,V6A,Vancouver,Chinatown,49.279981,-123.104089
2,V6A,Vancouver,Downtown Eastside,49.282399,-123.099458
3,V6B,Vancouver,NE Downtown,49.283393,-123.117456
4,V6B,Vancouver,Gastown,49.283657,-123.106236
...,...,...,...,...,...
65,V5Y,Vancouver,West Riley Park-Little Mountain,,
66,V7Y,Vancouver,Pacific Centre,49.253241,-123.235331
67,V5Z,Vancouver,East Fairview,49.264113,-123.126835
68,V5Z,Vancouver,South Cambie,49.246685,-123.120915


In [16]:
postal_df.dropna(inplace=True)
postal_df

Unnamed: 0,Code,Name,Area,Latitude,Longitude
0,V6A,Vancouver,Strathcona,49.279554,-123.089979
1,V6A,Vancouver,Chinatown,49.279981,-123.104089
2,V6A,Vancouver,Downtown Eastside,49.282399,-123.099458
3,V6B,Vancouver,NE Downtown,49.283393,-123.117456
4,V6B,Vancouver,Gastown,49.283657,-123.106236
...,...,...,...,...,...
64,V5Y,Vancouver,West Mount Pleasant,49.263330,-123.096588
66,V7Y,Vancouver,Pacific Centre,49.253241,-123.235331
67,V5Z,Vancouver,East Fairview,49.264113,-123.126835
68,V5Z,Vancouver,South Cambie,49.246685,-123.120915


In [17]:
postal_df = postal_df.groupby(['Latitude', 'Longitude']).agg(min)
postal_df.reset_index(inplace=True)
postal_df

Unnamed: 0,Latitude,Longitude,Code,Name,Area
0,49.209223,-123.13615,V5X,Vancouver,East Marpole
1,49.218416,-123.073287,V5P,Vancouver,Victoria-Fraserview
2,49.219593,-123.090239,V5W,Vancouver,North Sunset
3,49.224274,-123.04625,V5S,Vancouver,Killarney
4,49.230829,-123.131134,V5W,Vancouver,NE Oakridge
5,49.2346,-123.183397,V6N,Vancouver,Musqueam
6,49.234673,-123.155389,V6M,Vancouver,NE Kerrisdale
7,49.240968,-123.167001,V6L,Vancouver,NW Arbutus Ridge
8,49.242024,-123.057679,V5M,Vancouver,North Renfrew-Collingwood
9,49.243838,-123.149094,V6J,Vancouver,Quilchena


In [18]:
areas = postal_df.Area.values
new_areas = []
words = ['NE ', 'NW ', 'SE ', 'SW ', 'North ', 'South ', 'East ', 'West ']
for i in range(len(areas)-1):
    for w in words:
        if areas[i].find(w)!=-1:
            areas[i] = areas[i].replace(w, '')
postal_df

Unnamed: 0,Latitude,Longitude,Code,Name,Area
0,49.209223,-123.13615,V5X,Vancouver,Marpole
1,49.218416,-123.073287,V5P,Vancouver,Victoria-Fraserview
2,49.219593,-123.090239,V5W,Vancouver,Sunset
3,49.224274,-123.04625,V5S,Vancouver,Killarney
4,49.230829,-123.131134,V5W,Vancouver,Oakridge
5,49.2346,-123.183397,V6N,Vancouver,Musqueam
6,49.234673,-123.155389,V6M,Vancouver,Kerrisdale
7,49.240968,-123.167001,V6L,Vancouver,Arbutus Ridge
8,49.242024,-123.057679,V5M,Vancouver,Renfrew-Collingwood
9,49.243838,-123.149094,V6J,Vancouver,Quilchena


## Neighborhoods in Vancouver : Clustering and Segmentation

### Import Libraries

In [19]:
import numpy as np # library to handle data in a vectorized manner

# Matplotlib and associated plotting modules
import branca.colormap as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# !pip install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


###  Getting Vancouver's Coordinates using geocode

In [20]:
address = 'Vancouver, British Columbia'

geolocator = Nominatim(user_agent="va_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Vancouver are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Vancouver are 49.2608724, -123.1139529.


###  Ploting all Postal Area in Vancouver

In [21]:
# create map of Vancouver using latitude and longitude values
map_vancouver = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, area in zip(postal_df['Latitude'], postal_df['Longitude'], postal_df['Area']):
    label = '{}'.format(area)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_vancouver)  
    
map_vancouver

In [22]:
CLIENT_ID = 'GKHFHCRG2K0F4B0OP4FHLLGKYA4PT2LVWMBL21GISWBJZFW0' # your Foursquare ID
CLIENT_SECRET = 'BLBVBLLKZQQ0L0B10T0MDX1AE1AN0XY1NQAMOF2MM2YIWWFO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GKHFHCRG2K0F4B0OP4FHLLGKYA4PT2LVWMBL21GISWBJZFW0
CLIENT_SECRET:BLBVBLLKZQQ0L0B10T0MDX1AE1AN0XY1NQAMOF2MM2YIWWFO


### Funtion to get nearby venues in radius of 500 meters if coordinates are given.

In [23]:
def getNearbyVenues(area, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for area, lat, lng in zip(area, latitudes, longitudes):
        print(area, 'Vancouver')
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&intent={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            'browse', 
            radius, 
            750)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        for v in results:
            v_lat = v['venue']['location']['lat']
            v_lng = v['venue']['location']['lng']
            if v['venue']['categories'][0]['name'].lower().find('restaurant') != -1:
                distance = math.sqrt((v_lat-lat)**2 + (v_lng-lng)**2)
                venues_list.append([(
                    area, 
                    lat, 
                    lng, 
                    v['venue']['name'],
                    v['venue']['id'],
                    v_lat, 
                    v_lng,  
                    distance, 
                    v['venue']['categories'][0]['name'])])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Area', 
                             'Area Latitude', 
                             'Area Longitude', 
                             'Venue', 
                             'Venue Id',
                             'Venue Latitude', 
                             'Venue Longitude', 
                             'Distance',
                             'Venue Category']
    
    return(nearby_venues)

### Using the above function creating dataframe for our borough in borough_df datasets and exploring the dataframe generated.

In [24]:
area_venues = getNearbyVenues(area=postal_df['Area'], 
                                 latitudes=postal_df['Latitude'], 
                                 longitudes=postal_df['Longitude']
                                  )

Marpole Vancouver
Victoria-Fraserview Vancouver
Sunset Vancouver
Killarney Vancouver
Oakridge Vancouver
Musqueam Vancouver
Kerrisdale Vancouver
Arbutus Ridge Vancouver
Renfrew-Collingwood Vancouver
Quilchena Vancouver
Cambie Vancouver
Kensington-Cedar Cottage Vancouver
Shaughnessy Vancouver
University Endowment Lands Vancouver
Pacific Centre Vancouver
Dunbar-Southlands Vancouver
UBC Vancouver
Grandview-Woodland Vancouver
Mount Pleasant Vancouver
Fairview Vancouver
Point Grey Vancouver
Central Kitsilano Vancouver
Grandview-Woodland Vancouver
Granville Island Vancouver
Jericho Vancouver
Yaletown Vancouver
Hastings-Sunrise Vancouver
Strathcona Vancouver
Chinatown Vancouver
International Village Vancouver
Davie Village Vancouver
Victory Square Vancouver
Downtown Eastside Vancouver
Downtown Vancouver
Gastown Vancouver
End Vancouver
Harbour Centre Vancouver
Waterfront Vancouver
Bentall Centre Vancouver
Canada Place Vancouver
Coal Harbour Vancouver
Stanley Park Vancouver


In [25]:
area_venues.shape

(824, 9)

In [26]:
area_venues.head()

Unnamed: 0,Area,Area Latitude,Area Longitude,Venue,Venue Id,Venue Latitude,Venue Longitude,Distance,Venue Category
0,Marpole,49.209223,-123.13615,Talay Thai Restaurant,4b2b066ef964a520cbb324e3,49.210817,-123.14056,0.00469,Thai Restaurant
1,Marpole,49.209223,-123.13615,Cafe de l'Orangerie,4c58ebbbd3aee21e052d6755,49.204258,-123.135332,0.005032,Japanese Restaurant
2,Marpole,49.209223,-123.13615,Red Star Seafood Restaurant 鴻星海鮮酒家,4adb4d6ff964a520b52521e3,49.210971,-123.140405,0.0046,Chinese Restaurant
3,Marpole,49.209223,-123.13615,Milltown Bar & Grill,5369c455498e0ee8dbe60b8f,49.202374,-123.144331,0.01067,Restaurant
4,Marpole,49.209223,-123.13615,Penang Delight Cafe,50fa18ede4b0b707cb12eb7e,49.204133,-123.135335,0.005155,Malay Restaurant


In [27]:
area_venues.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 824 entries, 0 to 823
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Area             824 non-null    object 
 1   Area Latitude    824 non-null    float64
 2   Area Longitude   824 non-null    float64
 3   Venue            824 non-null    object 
 4   Venue Id         824 non-null    object 
 5   Venue Latitude   824 non-null    float64
 6   Venue Longitude  824 non-null    float64
 7   Distance         824 non-null    float64
 8   Venue Category   824 non-null    object 
dtypes: float64(5), object(4)
memory usage: 58.1+ KB


In [28]:
area_venues['Venue Id'].value_counts()

55831809498eb7238a4bd305    10
4aa8005bf964a520a24e20e3    10
4aa7fe08f964a520914e20e3    10
551045fe498e2de04c7cc1b7    10
4abea310f964a520ff8e20e3    10
                            ..
5a878c33f8cbd46ab822ef69     1
4f8f7bd8e4b06b50aef2fd3f     1
4b7df028f964a5200adb2fe3     1
5075d46ee4b0c63048e3740e     1
51638b4ce4b09338e17bc2f7     1
Name: Venue Id, Length: 489, dtype: int64

In [29]:
area_venues['Venue Id'].nunique()

489

In [30]:
grouped_area = area_venues.groupby('Venue Id')
for name, group in grouped_area:
    print(name)
    print(group)
    print(type(group))

4aa17172f964a520904020e3
               Area  Area Latitude  Area Longitude         Venue  \
417        Yaletown      49.276322     -123.120956  JOEY Burrard   
541   Davie Village      49.281803     -123.133288  JOEY Burrard   
611        Downtown      49.283393     -123.117456  JOEY Burrard   
678             End      49.284131     -123.131795  JOEY Burrard   
745  Bentall Centre      49.286234     -123.119028  JOEY Burrard   
814    Coal Harbour      49.290375     -123.129281  JOEY Burrard   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
417  4aa17172f964a520904020e3       49.282864      -123.123495  0.007018   
541  4aa17172f964a520904020e3       49.282864      -123.123495  0.009850   
611  4aa17172f964a520904020e3       49.282864      -123.123495  0.006062   
678  4aa17172f964a520904020e3       49.282864      -123.123495  0.008396   
745  4aa17172f964a520904020e3       49.282864      -123.123495  0.005595   
814  4aa17172f964a520904020e3       49.282

278  Vegetarian / Vegan Restaurant  
<class 'pandas.core.frame.DataFrame'>
4aa9b6fbf964a520fe5420e3
                   Area  Area Latitude  Area Longitude                Venue  \
343  Grandview-Woodland      49.270559     -123.067942  Fets Whisky Kitchen   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
343  4aa9b6fbf964a520fe5420e3       49.273546      -123.069408  0.003328   

                Venue Category  
343  Cajun / Creole Restaurant  
<class 'pandas.core.frame.DataFrame'>
4aaa8fadf964a520695620e3
           Area  Area Latitude  Area Longitude       Venue  \
452  Strathcona      49.279554     -123.089979  Campagnolo   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
452  4aaa8fadf964a520695620e3        49.27538      -123.099862  0.010729   

         Venue Category  
452  Italian Restaurant  
<class 'pandas.core.frame.DataFrame'>
4aaaa2c7f964a520be5620e3
         Area  Area Latitude  Area Longitude               Venue  

       Area  Area Latitude  Area Longitude      Venue  \
123  Cambie      49.246685     -123.120915  Corner 23   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
123  4ac7bf8ff964a52081b920e3       49.250037      -123.115207   0.00662   

         Venue Category  
123  Chinese Restaurant  
<class 'pandas.core.frame.DataFrame'>
4ac90a50f964a52040be20e3
                         Area  Area Latitude  Area Longitude     Venue  \
151  Kensington-Cedar Cottage      49.247632     -123.084207  Pho Long   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
151  4ac90a50f964a52040be20e3       49.254995      -123.090001  0.009369   

            Venue Category  
151  Vietnamese Restaurant  
<class 'pandas.core.frame.DataFrame'>
4ac940fcf964a52066bf20e3
             Area  Area Latitude  Area Longitude          Venue  \
803  Coal Harbour      49.290375     -123.129281  Simba's Grill   

                     Venue Id  Venue Latitude  Venue Longit

               Area  Area Latitude  Area Longitude   Venue  \
237  Mount Pleasant       49.26333     -123.096588  Fassil   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
237  4b4e9e48f964a520bbf226e3       49.262422      -123.088276  0.008362   

           Venue Category  
237  Ethiopian Restaurant  
<class 'pandas.core.frame.DataFrame'>
4b50e7b9f964a520fb3727e3
                         Area  Area Latitude  Area Longitude  \
140  Kensington-Cedar Cottage      49.247632     -123.084207   

                    Venue                  Venue Id  Venue Latitude  \
140  Pho Quyen Restaurant  4b50e7b9f964a520fb3727e3       49.249865   

     Venue Longitude  Distance         Venue Category  
140      -123.089969   0.00618  Vietnamese Restaurant  
<class 'pandas.core.frame.DataFrame'>
4b513a38f964a5200b4827e3
                      Area  Area Latitude  Area Longitude    Venue  \
442             Strathcona      49.279554     -123.089979  Bao Bei   
463             

             Area  Area Latitude  Area Longitude  \
64     Kerrisdale      49.234673     -123.155389   
75  Arbutus Ridge      49.240968     -123.167001   

                           Venue                  Venue Id  Venue Latitude  \
64  Minerva's Pizza & Steakhouse  4be8bec4d837c9b6f7c2a506       49.234674   
75  Minerva's Pizza & Steakhouse  4be8bec4d837c9b6f7c2a506       49.234674   

    Venue Longitude  Distance      Venue Category  
64      -123.161774  0.006384  Italian Restaurant  
75      -123.161774  0.008181  Italian Restaurant  
<class 'pandas.core.frame.DataFrame'>
4bedd431e24d20a107117214
                   Area  Area Latitude  Area Longitude        Venue  \
28  Victoria-Fraserview      49.218416     -123.073287  La Tandoori   

                    Venue Id  Venue Latitude  Venue Longitude  Distance  \
28  4bedd431e24d20a107117214       49.225374      -123.065148  0.010708   

       Venue Category  
28  Indian Restaurant  
<class 'pandas.core.frame.DataFrame'>
4bef19822

435      -123.045439  0.008835  Chinese Restaurant  
<class 'pandas.core.frame.DataFrame'>
4d7423fe1a8aa35def520ca7
             Area  Area Latitude  Area Longitude  \
820  Coal Harbour      49.290375     -123.129281   

                                      Venue                  Venue Id  \
820  Pho Express Ankor - Delightful Cuisine  4d7423fe1a8aa35def520ca7   

     Venue Latitude  Venue Longitude  Distance    Venue Category  
820       49.291036      -123.134715  0.005474  Asian Restaurant  
<class 'pandas.core.frame.DataFrame'>
4d7be6f3645ea35deab33bf8
                   Area  Area Latitude  Area Longitude   Venue  \
93  Renfrew-Collingwood      49.242024     -123.057679  Hoi An   

                    Venue Id  Venue Latitude  Venue Longitude  Distance  \
93  4d7be6f3645ea35deab33bf8       49.238918      -123.065225   0.00816   

           Venue Category  
93  Vietnamese Restaurant  
<class 'pandas.core.frame.DataFrame'>
4d7c25a45a396dcb4d306ffb
         Area  Area Latitude  Ar

       Area  Area Latitude  Area Longitude  \
11  Marpole      49.209223      -123.13615   

                                  Venue                  Venue Id  \
11  Tai Tung Chinese Seafood Restaurant  4fde7e51e4b0e07803bad368   

    Venue Latitude  Venue Longitude  Distance      Venue Category  
11       49.212857       -123.14026  0.005486  Chinese Restaurant  
<class 'pandas.core.frame.DataFrame'>
4fe22f8fe4b0dd0924ff9469
             Area  Area Latitude  Area Longitude                        Venue  \
67     Kerrisdale      49.234673     -123.155389  Sofra Mediterannean Kitchen   
76  Arbutus Ridge      49.240968     -123.167001  Sofra Mediterannean Kitchen   

                    Venue Id  Venue Latitude  Venue Longitude  Distance  \
67  4fe22f8fe4b0dd0924ff9469       49.234728      -123.159181  0.003792   
76  4fe22f8fe4b0dd0924ff9469       49.234728      -123.159181  0.010004   

              Venue Category  
67  Mediterranean Restaurant  
76  Mediterranean Restaurant  
<class

               Area  Area Latitude  Area Longitude   Venue  \
720  Harbour Centre      49.284767     -123.112064  Gringo   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
720  5256672f11d2f8b0572a3da1       49.283076      -123.105229  0.007041   

         Venue Category  
720  Mexican Restaurant  
<class 'pandas.core.frame.DataFrame'>
5256c3bf498e4a720085f2cf
                   Area  Area Latitude  Area Longitude  \
355  Grandview-Woodland      49.270559     -123.067942   

                      Venue                  Venue Id  Venue Latitude  \
355  Sula Indian Restaurant  5256c3bf498e4a720085f2cf       49.274458   

     Venue Longitude  Distance     Venue Category  
355      -123.069494  0.004197  Indian Restaurant  
<class 'pandas.core.frame.DataFrame'>
528d86b211d248b375d095be
                      Area  Area Latitude  Area Longitude          Venue  \
441             Strathcona      49.279554     -123.089979  Ask for Luigi   
476              Chinato

786  Ramen Restaurant  
<class 'pandas.core.frame.DataFrame'>
566393f6498e5dafb969ccd5
                   Area  Area Latitude  Area Longitude    Venue  \
198  Grandview-Woodland      49.262493     -123.056896  Cabrito   
349  Grandview-Woodland      49.270559     -123.067942  Cabrito   

                     Venue Id  Venue Latitude  Venue Longitude  Distance  \
198  566393f6498e5dafb969ccd5       49.264432      -123.069678  0.012928   
349  566393f6498e5dafb969ccd5       49.264432      -123.069678  0.006368   

       Venue Category  
198  Tapas Restaurant  
349  Tapas Restaurant  
<class 'pandas.core.frame.DataFrame'>
566b3d6f498e58e746ff3b9e
       Area  Area Latitude  Area Longitude                 Venue  \
17  Marpole      49.209223      -123.13615  G8 Taiwanese Kitchen   

                    Venue Id  Venue Latitude  Venue Longitude  Distance  \
17  566b3d6f498e58e746ff3b9e       49.210562      -123.130201  0.006097   

          Venue Category  
17  Taiwanese Restaurant  
<clas

In [31]:
area_venues = area_venues.groupby('Venue Id').agg(min)
area_venues.reset_index(inplace=True)
area_venues

Unnamed: 0,Venue Id,Area,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Venue Category
0,4aa17172f964a520904020e3,Bentall Centre,49.276322,-123.133288,JOEY Burrard,49.282864,-123.123495,0.005595,New American Restaurant
1,4aa6a84cf964a520834a20e3,Mount Pleasant,49.263330,-123.096588,Martini's,49.263088,-123.108217,0.011631,Italian Restaurant
2,4aa6ad08f964a5209c4a20e3,Mount Pleasant,49.263330,-123.096588,Nirvana Indian Restaruant,49.264336,-123.100818,0.004348,Indian Restaurant
3,4aa6f0caf964a5208e4b20e3,Mount Pleasant,49.263330,-123.096588,Congee Noodle House 粥麵館 (Congee Noodle House),49.263029,-123.102105,0.005525,Chinese Restaurant
4,4aa7350bf964a520464c20e3,Grandview-Woodland,49.270559,-123.067942,Havana,49.273685,-123.069480,0.003484,Cuban Restaurant
...,...,...,...,...,...,...,...,...,...
484,5b2ef843b77c77002c371ede,Grandview-Woodland,49.270559,-123.067942,Pepino’s,49.279134,-123.070567,0.008968,Italian Restaurant
485,5b65213e005ac1002c47548d,Davie Village,49.281803,-123.133288,Mumbai Local,49.280910,-123.132244,0.001374,Indian Restaurant
486,5bba5b613149b9002c1e480b,Davie Village,49.281803,-123.133288,Horin Ramen + Sake,49.286252,-123.127441,0.007347,Ramen Restaurant
487,5c1c5efb56c89f002c116191,Victoria-Fraserview,49.218416,-123.073287,Weirdo Cafe,49.225729,-123.066254,0.010146,Restaurant


In [32]:
area_venues

Unnamed: 0,Venue Id,Area,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Venue Category
0,4aa17172f964a520904020e3,Bentall Centre,49.276322,-123.133288,JOEY Burrard,49.282864,-123.123495,0.005595,New American Restaurant
1,4aa6a84cf964a520834a20e3,Mount Pleasant,49.263330,-123.096588,Martini's,49.263088,-123.108217,0.011631,Italian Restaurant
2,4aa6ad08f964a5209c4a20e3,Mount Pleasant,49.263330,-123.096588,Nirvana Indian Restaruant,49.264336,-123.100818,0.004348,Indian Restaurant
3,4aa6f0caf964a5208e4b20e3,Mount Pleasant,49.263330,-123.096588,Congee Noodle House 粥麵館 (Congee Noodle House),49.263029,-123.102105,0.005525,Chinese Restaurant
4,4aa7350bf964a520464c20e3,Grandview-Woodland,49.270559,-123.067942,Havana,49.273685,-123.069480,0.003484,Cuban Restaurant
...,...,...,...,...,...,...,...,...,...
484,5b2ef843b77c77002c371ede,Grandview-Woodland,49.270559,-123.067942,Pepino’s,49.279134,-123.070567,0.008968,Italian Restaurant
485,5b65213e005ac1002c47548d,Davie Village,49.281803,-123.133288,Mumbai Local,49.280910,-123.132244,0.001374,Indian Restaurant
486,5bba5b613149b9002c1e480b,Davie Village,49.281803,-123.133288,Horin Ramen + Sake,49.286252,-123.127441,0.007347,Ramen Restaurant
487,5c1c5efb56c89f002c116191,Victoria-Fraserview,49.218416,-123.073287,Weirdo Cafe,49.225729,-123.066254,0.010146,Restaurant


import pickle

pickling_on = open("restaurant_data_new.pickle","wb")
pickle.dump(area_venues, pickling_on)
pickling_on.close()

In [33]:
area_venues.groupby('Area').count()

Unnamed: 0_level_0,Venue Id,Area Latitude,Area Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Venue Category
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arbutus Ridge,7,7,7,7,7,7,7,7
Bentall Centre,24,24,24,24,24,24,24,24
Cambie,11,11,11,11,11,11,11,11
Canada Place,7,7,7,7,7,7,7,7
Central Kitsilano,25,25,25,25,25,25,25,25
Chinatown,18,18,18,18,18,18,18,18
Coal Harbour,25,25,25,25,25,25,25,25
Davie Village,21,21,21,21,21,21,21,21
Downtown,1,1,1,1,1,1,1,1
Downtown Eastside,1,1,1,1,1,1,1,1


In [34]:
print('Number of unique venues in data : ', area_venues['Venue Category'].nunique())

Number of unique venues in data :  51


In [35]:
area_venues['Venue Category'].unique()

array(['New American Restaurant', 'Italian Restaurant',
       'Indian Restaurant', 'Chinese Restaurant', 'Cuban Restaurant',
       'Falafel Restaurant', 'Japanese Restaurant', 'Seafood Restaurant',
       'Malay Restaurant', 'Asian Restaurant', 'French Restaurant',
       'Vegetarian / Vegan Restaurant', 'Sushi Restaurant',
       'Thai Restaurant', 'Fast Food Restaurant',
       'Molecular Gastronomy Restaurant', 'Middle Eastern Restaurant',
       'Belgian Restaurant', 'Tapas Restaurant', 'American Restaurant',
       'Mexican Restaurant', 'Greek Restaurant',
       'Cajun / Creole Restaurant', 'Vietnamese Restaurant',
       'Ethiopian Restaurant', 'Restaurant', 'Latin American Restaurant',
       'Gluten-free Restaurant', 'African Restaurant',
       'Korean Restaurant', 'Eastern European Restaurant',
       'Portuguese Restaurant', 'Mediterranean Restaurant',
       'Caribbean Restaurant', 'Dim Sum Restaurant',
       'South American Restaurant', 'Ramen Restaurant',
       'Szec

### Transforming dataset using oneshot encoding for K-Means clustering.

In [36]:
vancouver_onehot = pd.get_dummies(area_venues['Venue Category'], prefix='', prefix_sep='')
vancouver_onehot['Area'] = area_venues['Area']
columns_list = [vancouver_onehot.columns[-1]] + list(vancouver_onehot.columns[:-1])
vancouver_onehot = vancouver_onehot[columns_list]
vancouver_onehot.head()

Unnamed: 0,Area,African Restaurant,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,...,South American Restaurant,South Indian Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Bentall Centre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Mount Pleasant,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Mount Pleasant,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Mount Pleasant,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,Grandview-Woodland,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
vancouver_onehot.shape

(489, 52)

In [38]:
vancouver_grouped_sum = vancouver_onehot.groupby('Area').sum().reset_index()
vancouver_grouped_sum.head()

Unnamed: 0,Area,African Restaurant,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,...,South American Restaurant,South Indian Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Arbutus Ridge,0,0,1,0,0,0,0,0,0,...,0,0,0,4,0,0,0,0,0,0
1,Bentall Centre,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,2,1
2,Cambie,0,0,0,0,0,0,1,0,2,...,0,0,0,2,0,0,0,0,1,2
3,Canada Place,0,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,0
4,Central Kitsilano,0,1,1,0,0,0,0,0,0,...,0,0,0,2,0,0,0,2,3,1


In [39]:
vancouver_grouped_sum.shape

(35, 52)

In [40]:
vancouver_grouped_mean = vancouver_onehot.groupby('Area').mean().reset_index()
vancouver_grouped_mean.head()

Unnamed: 0,Area,African Restaurant,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,...,South American Restaurant,South Indian Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Arbutus Ridge,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.571429,0.0,0.0,0.0,0.0,0.0,0.0
1,Bentall Centre,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.083333,0.041667
2,Cambie,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.181818,...,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.090909,0.181818
3,Canada Place,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0
4,Central Kitsilano,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.08,0.12,0.04


In [41]:
vancouver_grouped_mean.shape

(35, 52)

### Select top ten categories of venues from each postal code.

In [42]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [43]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Area']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
vancouver_venues_sorted = pd.DataFrame(columns=columns)
vancouver_venues_sorted['Area'] = vancouver_grouped_mean['Area']

for ind in np.arange(vancouver_grouped_mean.shape[0]):
    vancouver_venues_sorted.iloc[ind, 1:] = return_most_common_venues(vancouver_grouped_mean.iloc[ind, :], num_top_venues)

vancouver_venues_sorted.head()

Unnamed: 0,Area,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Arbutus Ridge,Sushi Restaurant,Asian Restaurant,Mediterranean Restaurant,Italian Restaurant,Vietnamese Restaurant,Ethiopian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant
1,Bentall Centre,Japanese Restaurant,Restaurant,Italian Restaurant,Vegetarian / Vegan Restaurant,New American Restaurant,Seafood Restaurant,Falafel Restaurant,Ramen Restaurant,Lebanese Restaurant,Vietnamese Restaurant
2,Cambie,Vietnamese Restaurant,Sushi Restaurant,Chinese Restaurant,Vegetarian / Vegan Restaurant,Greek Restaurant,Malay Restaurant,Cantonese Restaurant,Seafood Restaurant,Ethiopian Restaurant,Gluten-free Restaurant
3,Canada Place,Vegetarian / Vegan Restaurant,American Restaurant,Tapas Restaurant,Asian Restaurant,Belgian Restaurant,German Restaurant,French Restaurant,Ethiopian Restaurant,Indian Restaurant,Hawaiian Restaurant
4,Central Kitsilano,Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Sushi Restaurant,French Restaurant,Vietnamese Restaurant,Indian Restaurant,Italian Restaurant,Falafel Restaurant


In [44]:
vancouver_grouped_clustering = vancouver_grouped_mean.drop('Area', 1)

In [45]:
vancouver_grouped_clustering.head()

Unnamed: 0,African Restaurant,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Comfort Food Restaurant,...,South American Restaurant,South Indian Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.571429,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.083333,0.041667
2,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.181818,0.0,...,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.090909,0.181818
3,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0
4,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.08,0.12,0.04


In [46]:
vancouver_grouped_clustering.head()

Unnamed: 0,African Restaurant,American Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Comfort Food Restaurant,...,South American Restaurant,South Indian Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.571429,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.083333,0.041667
2,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.181818,0.0,...,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.090909,0.181818
3,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0
4,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.08,0.12,0.04


### K-Means Clustering

In [155]:
# set number of clusters
kclusters = 9

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters).fit(vancouver_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 8, 1, 8, 8, 8, 8, 8, 0, 5], dtype=int32)

### Add labes generate to the dataframe and join this dataframe to geospatial dataframe of borough.

In [156]:
if 'Cluster Labels' in vancouver_venues_sorted.columns:
    vancouver_venues_sorted.drop(columns=['Cluster Labels'], inplace=True)

In [157]:
# add clustering labels

vancouver_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

vancouver_merged = postal_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
vancouver_merged = vancouver_merged.join(vancouver_venues_sorted.set_index('Area'), on='Area')

vancouver_merged.head() # check the last columns!

Unnamed: 0,Latitude,Longitude,Code,Name,Area,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,49.209223,-123.13615,V5X,Vancouver,Marpole,1.0,Vietnamese Restaurant,Chinese Restaurant,Japanese Restaurant,Sushi Restaurant,Malay Restaurant,Falafel Restaurant,Dim Sum Restaurant,Restaurant,Indian Restaurant,Taiwanese Restaurant
1,49.218416,-123.073287,V5P,Vancouver,Victoria-Fraserview,1.0,Indian Restaurant,Sushi Restaurant,Middle Eastern Restaurant,Chinese Restaurant,Vietnamese Restaurant,Restaurant,Asian Restaurant,Falafel Restaurant,Hawaiian Restaurant,Greek Restaurant
2,49.219593,-123.090239,V5W,Vancouver,Sunset,1.0,Indian Restaurant,Chinese Restaurant,Restaurant,Vietnamese Restaurant,Cantonese Restaurant,Dim Sum Restaurant,Fast Food Restaurant,Japanese Restaurant,Korean Restaurant,South American Restaurant
3,49.224274,-123.04625,V5S,Vancouver,Killarney,1.0,Vietnamese Restaurant,Sushi Restaurant,Fast Food Restaurant,Italian Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Asian Restaurant,Falafel Restaurant,Indian Restaurant
4,49.230829,-123.131134,V5W,Vancouver,Oakridge,1.0,Vietnamese Restaurant,Sushi Restaurant,Fast Food Restaurant,Chinese Restaurant,Cantonese Restaurant,Restaurant,Asian Restaurant,Brazilian Restaurant,Falafel Restaurant,Hawaiian Restaurant


In [158]:
is_NaN = vancouver_merged.isnull()
row_has_NaN = is_NaN. any(axis=1)
rows_with_NaN = vancouver_merged[row_has_NaN]
rows_with_NaN

Unnamed: 0,Latitude,Longitude,Code,Name,Area,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,49.25216,-123.231426,V6S,Vancouver,University Endowment Lands,,,,,,,,,,,
24,49.274102,-123.196874,V6R,Vancouver,Jericho,,,,,,,,,,,
29,49.28022,-123.106696,V6B,Vancouver,International Village,,,,,,,,,,,
31,49.282325,-123.11013,V6B,Vancouver,Victory Square,,,,,,,,,,,
34,49.283657,-123.106236,V6B,Vancouver,Gastown,,,,,,,,,,,
37,49.28595,-123.111279,V6C,Vancouver,Waterfront,,,,,,,,,,,


In [159]:
vancouver_merged.dropna(inplace=True)

In [160]:
vancouver_merged = vancouver_merged.astype({'Cluster Labels': 'int32'})

In [161]:
vancouver_merged.head()

Unnamed: 0,Latitude,Longitude,Code,Name,Area,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,49.209223,-123.13615,V5X,Vancouver,Marpole,1,Vietnamese Restaurant,Chinese Restaurant,Japanese Restaurant,Sushi Restaurant,Malay Restaurant,Falafel Restaurant,Dim Sum Restaurant,Restaurant,Indian Restaurant,Taiwanese Restaurant
1,49.218416,-123.073287,V5P,Vancouver,Victoria-Fraserview,1,Indian Restaurant,Sushi Restaurant,Middle Eastern Restaurant,Chinese Restaurant,Vietnamese Restaurant,Restaurant,Asian Restaurant,Falafel Restaurant,Hawaiian Restaurant,Greek Restaurant
2,49.219593,-123.090239,V5W,Vancouver,Sunset,1,Indian Restaurant,Chinese Restaurant,Restaurant,Vietnamese Restaurant,Cantonese Restaurant,Dim Sum Restaurant,Fast Food Restaurant,Japanese Restaurant,Korean Restaurant,South American Restaurant
3,49.224274,-123.04625,V5S,Vancouver,Killarney,1,Vietnamese Restaurant,Sushi Restaurant,Fast Food Restaurant,Italian Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Asian Restaurant,Falafel Restaurant,Indian Restaurant
4,49.230829,-123.131134,V5W,Vancouver,Oakridge,1,Vietnamese Restaurant,Sushi Restaurant,Fast Food Restaurant,Chinese Restaurant,Cantonese Restaurant,Restaurant,Asian Restaurant,Brazilian Restaurant,Falafel Restaurant,Hawaiian Restaurant


### Ploting the Clusters

In [162]:
colors_array = cm.linear.Set1_09.scale(0, 10).to_step(10)
colors_array

In [163]:
colors_array.rgb_hex_str(2) #'#e41a1c'

'#4a73a7'

In [164]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12, control_scale=True, prefer_canvas=True)

# set color scheme for the clusters
# x = np.arange(kclusters)
# ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.linear.Set1_09.scale(0, 10).to_step(10)
map_clusters.add_child(colors_array)
# rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(vancouver_merged['Latitude'], vancouver_merged['Longitude'], vancouver_merged['Area'], vancouver_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster + 1), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=colors_array.rgb_hex_str(cluster+1),
        fill=True,
        fill_color=colors_array.rgb_hex_str(cluster+1),
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [165]:
def get_venue_counts(cluster):
    dict_cluster = {}
    
    for cat in area_venues['Venue Category'].unique():
        dict_cluster[cat] = 0
        
    for area in cluster.index:
        for key in cluster.loc[area][1:]:
            if key in dict_cluster.keys():
                dict_cluster[key] = dict_cluster[key] + vancouver_grouped_sum[vancouver_grouped_sum['Area'] == area][key].values[0]
                
    df_cluster = pd.DataFrame.from_dict(dict_cluster, orient='index', columns=['Counts'])
    df_cluster.sort_values(by='Counts', ascending=False, inplace=True)
    return df_cluster

Cluster 1

In [166]:
cluster1 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 0, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster1.set_index('Area', inplace=True)
cluster1.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Downtown,0,Restaurant,Japanese Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant,Fast Food Restaurant


In [167]:
len(cluster1)

1

In [168]:
df_cluster1 = get_venue_counts(cluster1)
df_cluster1.head()

Unnamed: 0,Counts
Restaurant,1
Filipino Restaurant,0
African Restaurant,0
Korean Restaurant,0
Eastern European Restaurant,0


Cluster 2

In [169]:
cluster2 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 1, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster2.set_index('Area', inplace=True)
cluster2.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Marpole,1,Vietnamese Restaurant,Chinese Restaurant,Japanese Restaurant,Sushi Restaurant,Malay Restaurant,Falafel Restaurant,Dim Sum Restaurant,Restaurant,Indian Restaurant,Taiwanese Restaurant
Victoria-Fraserview,1,Indian Restaurant,Sushi Restaurant,Middle Eastern Restaurant,Chinese Restaurant,Vietnamese Restaurant,Restaurant,Asian Restaurant,Falafel Restaurant,Hawaiian Restaurant,Greek Restaurant
Sunset,1,Indian Restaurant,Chinese Restaurant,Restaurant,Vietnamese Restaurant,Cantonese Restaurant,Dim Sum Restaurant,Fast Food Restaurant,Japanese Restaurant,Korean Restaurant,South American Restaurant
Killarney,1,Vietnamese Restaurant,Sushi Restaurant,Fast Food Restaurant,Italian Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Asian Restaurant,Falafel Restaurant,Indian Restaurant
Oakridge,1,Vietnamese Restaurant,Sushi Restaurant,Fast Food Restaurant,Chinese Restaurant,Cantonese Restaurant,Restaurant,Asian Restaurant,Brazilian Restaurant,Falafel Restaurant,Hawaiian Restaurant


In [170]:
len(cluster2)

14

In [171]:
df_cluster2 = get_venue_counts(cluster2)
df_cluster2.head()

Unnamed: 0,Counts
Vietnamese Restaurant,38
Chinese Restaurant,34
Japanese Restaurant,12
Sushi Restaurant,12
Indian Restaurant,12


Cluster 3

In [172]:
cluster3 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 2, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster3.set_index('Area', inplace=True)
cluster3.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Quilchena,2,Fast Food Restaurant,Vietnamese Restaurant,Eastern European Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant
Stanley Park,2,Fast Food Restaurant,Vietnamese Restaurant,Eastern European Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant


In [173]:
len(cluster3)

2

In [174]:
df_cluster3 = get_venue_counts(cluster3)
df_cluster3.head()

Unnamed: 0,Counts
Fast Food Restaurant,3
New American Restaurant,0
Latin American Restaurant,0
African Restaurant,0
Korean Restaurant,0


Cluster 4

In [175]:
cluster4 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 3, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster4.set_index('Area', inplace=True)
cluster4.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Arbutus Ridge,3,Sushi Restaurant,Asian Restaurant,Mediterranean Restaurant,Italian Restaurant,Vietnamese Restaurant,Ethiopian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant
Pacific Centre,3,Restaurant,Sushi Restaurant,Dim Sum Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant,Fast Food Restaurant
Dunbar-Southlands,3,Sushi Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Mexican Restaurant,Restaurant,Ethiopian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant


In [176]:
len(cluster4)

3

In [177]:
df_cluster4 = get_venue_counts(cluster4)
df_cluster4.head()

Unnamed: 0,Counts
Sushi Restaurant,8
Restaurant,2
Indian Restaurant,2
Italian Restaurant,2
Mediterranean Restaurant,1


Cluster 5

In [178]:
cluster5 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 4, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster5.set_index('Area', inplace=True)
cluster5.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Harbour Centre,4,Mexican Restaurant,Vietnamese Restaurant,Japanese Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant


In [179]:
df_cluster5 = get_venue_counts(cluster5)
df_cluster5.head()

Unnamed: 0,Counts
Mexican Restaurant,1
New American Restaurant,0
Latin American Restaurant,0
African Restaurant,0
Korean Restaurant,0


In [180]:
cluster6 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 5, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster6.set_index('Area', inplace=True)
cluster6.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Downtown Eastside,5,Asian Restaurant,Vietnamese Restaurant,Eastern European Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant


In [181]:
df_cluster6 = get_venue_counts(cluster6)
df_cluster6.head()

Unnamed: 0,Counts
Asian Restaurant,1
New American Restaurant,0
Filipino Restaurant,0
African Restaurant,0
Korean Restaurant,0


In [182]:
cluster7 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 6, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster7.set_index('Area', inplace=True)
cluster7.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Musqueam,6,Japanese Restaurant,Vietnamese Restaurant,Eastern European Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant


In [183]:
df_cluster7 = get_venue_counts(cluster7)
df_cluster7.head()

Unnamed: 0,Counts
Japanese Restaurant,1
New American Restaurant,0
Filipino Restaurant,0
African Restaurant,0
Korean Restaurant,0


In [184]:
cluster8 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 7, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster8.set_index('Area', inplace=True)
cluster8.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
End,7,Hawaiian Restaurant,Malay Restaurant,Vietnamese Restaurant,Japanese Restaurant,Indian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant,French Restaurant,Filipino Restaurant


In [185]:
df_cluster8 = get_venue_counts(cluster8)
df_cluster8.head()

Unnamed: 0,Counts
Hawaiian Restaurant,1
Malay Restaurant,1
Filipino Restaurant,0
African Restaurant,0
Korean Restaurant,0


In [186]:
cluster9 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 8, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster9.set_index('Area', inplace=True)
cluster9.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Shaughnessy,8,Korean Restaurant,Cajun / Creole Restaurant,Restaurant,Asian Restaurant,Ethiopian Restaurant,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,Gluten-free Restaurant,German Restaurant
UBC,8,Japanese Restaurant,Asian Restaurant,Fast Food Restaurant,Italian Restaurant,Falafel Restaurant,Indian Restaurant,Sushi Restaurant,Cantonese Restaurant,Chinese Restaurant,Hawaiian Restaurant
Fairview,8,Japanese Restaurant,Chinese Restaurant,Indian Restaurant,Asian Restaurant,Sushi Restaurant,Restaurant,Vietnamese Restaurant,Molecular Gastronomy Restaurant,Falafel Restaurant,Vegetarian / Vegan Restaurant
Point Grey,8,Sushi Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant,Middle Eastern Restaurant,Greek Restaurant,Malay Restaurant,Falafel Restaurant,Fast Food Restaurant,Thai Restaurant,Italian Restaurant
Central Kitsilano,8,Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Sushi Restaurant,French Restaurant,Vietnamese Restaurant,Indian Restaurant,Italian Restaurant,Falafel Restaurant


In [187]:
df_cluster9 = get_venue_counts(cluster9)
df_cluster9.head()

Unnamed: 0,Counts
Japanese Restaurant,29
Restaurant,15
Sushi Restaurant,15
Seafood Restaurant,12
Vegetarian / Vegan Restaurant,11


In [188]:
cluster10 = vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 9, vancouver_merged.columns[[4] + list(range(5, vancouver_merged.shape[1]))]]
cluster10.set_index('Area', inplace=True)
cluster10.head()

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [189]:
df_cluster10 = get_venue_counts(cluster10)
df_cluster10.head()

Unnamed: 0,Counts
New American Restaurant,0
Filipino Restaurant,0
African Restaurant,0
Korean Restaurant,0
Eastern European Restaurant,0


In [190]:
for i in vancouver_grouped_sum.index:
    cat = return_most_common_venues(vancouver_grouped_sum.loc[i], 5)
    val = vancouver_grouped_sum.loc[i][cat]
    freq = vancouver_grouped_sum.loc[i][cat]/vancouver_grouped_sum.loc[i][1:].sum(axis=0)
    print('Area : ', vancouver_grouped_sum.loc[i]['Area'])
    df = pd.DataFrame(columns=['Restaurant', 'Count', 'Frequency'])
    df['Restaurant'] = cat
    df['Count'] = val.values
    df['Frequency'] = freq.values
    print(df)
    print("==============================================================")
    

Area :  Arbutus Ridge
                 Restaurant Count Frequency
0          Sushi Restaurant     4  0.571429
1          Asian Restaurant     1  0.142857
2  Mediterranean Restaurant     1  0.142857
3        Italian Restaurant     1  0.142857
4     Vietnamese Restaurant     0         0
Area :  Bentall Centre
                      Restaurant Count  Frequency
0            Japanese Restaurant     4   0.166667
1                     Restaurant     3      0.125
2             Italian Restaurant     3      0.125
3  Vegetarian / Vegan Restaurant     2  0.0833333
4        New American Restaurant     2  0.0833333
Area :  Cambie
                      Restaurant Count  Frequency
0          Vietnamese Restaurant     2   0.181818
1               Sushi Restaurant     2   0.181818
2             Chinese Restaurant     2   0.181818
3  Vegetarian / Vegan Restaurant     1  0.0909091
4               Greek Restaurant     1  0.0909091
Area :  Canada Place
                      Restaurant Count Frequency
0  Veg