In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

#import beautifulsoup4
!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup
print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.2 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.21.0-py_0

The following packages will be UPDATED:

  openssl                                 1.1.1f-h516909a_0 --> 1.1.1g-h51

#### get the Toronto neighborhood data from Wikipedia page

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)
print(response.status_code)

200


### Get the table

In [3]:
page = response.text
html = BeautifulSoup(page)
table = html.find('table',"wikitable sortable")

### Get the header of the table
##### column name as columns

In [4]:
columns = []
for column in table.find_all('th'):
    columns.append(column.get_text().split('\n')[0])
print(columns)    

['Postal code', 'Borough', 'Neighborhood']


#### create the data frame table as df

In [5]:

df = pd.DataFrame([],columns = columns)

## the criteria
#### The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood  ##checked
#### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
#### More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.
#### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
#### Clean your Notebook and add Markdown cells to explain your work and any assumptions you are making.
#### In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.

## My definition
#### create col_0,1,2 list to store the cell data and assign it in the df table

In [6]:

cols_0 = []
cols_1 = []
cols_2 = []
for row in table.find_all('tr')[1:]: # the real data is begin from 1
    col = row.find_all('td') #get the each row data
    if col[1].get_text() == 'Not assigned\n': # just store the assigned data
        continue
    
    col_0 = col[0].get_text().split('\n')[0] # clean the cell data, get rid of the \n in the cell data and add it in col list
    col_1 = col[1].get_text().split('\n')[0]
    if col[2].get_text() == '\n':
        col_2 = col[1].get_text().split('\n')[0] # if no neighborhood ,give the value of Borough
    else:
        col_2 = col[2].get_text().split('\n')[0].replace('/',',') #reshape the multi neighborhood 
    cols_0.append(col_0)
    cols_1.append(col_1)
    cols_2.append(col_2)

## Add the column data into the df table

In [7]:
df.iloc[:,0] = cols_0
df.iloc[:,1] = cols_1
df.iloc[:,2] = cols_2
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [8]:
df.shape

(103, 3)

#### Get the location information from the URL

In [9]:
geo_df = pd.read_csv('http://cocl.us/Geospatial_data')
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merge the data with Toronto Data

In [10]:
df['Latitude'] = [0]*len(df.iloc[:,0])
df['Longitude'] = [0]*len(df.iloc[:,0])

In [11]:
for i, code in enumerate(df.iloc[:,0]):
    for j,p_code in enumerate(geo_df.iloc[:,0]):
        if p_code == code:
            df.iloc[i,3] = geo_df.iloc[j,1]
            df.iloc[i,4] = geo_df.iloc[j,2]
            
df.head()           

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494


#### Filter the data, which contains Toronto in Borough and as the new_df

In [12]:
new_df = df.iloc[['Toronto' in Borough for Borough in df['Borough']]]

In [13]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(new_df['Borough'].unique()),
        new_df.shape[0]
    )
)

The dataframe has 4 boroughs and 39 neighborhoods.


#### draw Toronto map

In [14]:
address = 'Toronto'

geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of  Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of  Toronto are 43.6534817, -79.3839347.


In [15]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(new_df['Latitude'], new_df['Longitude'], new_df['Borough'], new_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [16]:
new_df

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin , Dovercourt Village",43.669005,-79.442259


## Because the new_df['Neighborhood']may contain some neighborhood, so split this columns,which has more than one values

In [17]:
new_df = new_df.drop('Neighborhood', axis=1).join(new_df['Neighborhood'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('Neighborhood'))

In [18]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(new_df['Borough'].unique()),
        new_df.shape[0]
    )
)

The dataframe has 4 boroughs and 75 neighborhoods.


In [19]:
new_df['Borough'].unique()

array(['Downtown Toronto', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

#### Create 4 Borough dataset

In [20]:
Downtown_data = new_df[new_df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
Downtown_data.head()
East_data = new_df[new_df['Borough'] == 'East Toronto'].reset_index(drop=True)
East_data.head()
West_data = new_df[new_df['Borough'] == 'West Toronto'].reset_index(drop=True)
West_data.head()
Central_data = new_df[new_df['Borough'] == 'Central Toronto'].reset_index(drop=True)
Central_data.head()

Unnamed: 0,Postal code,Borough,Latitude,Longitude,Neighborhood
0,M4N,Central Toronto,43.72802,-79.38879,Lawrence Park
1,M5N,Central Toronto,43.711695,-79.416936,Roselawn
2,M4P,Central Toronto,43.712751,-79.390197,Davisville North
3,M5P,Central Toronto,43.696948,-79.411307,Forest Hill North & West
4,M4R,Central Toronto,43.715383,-79.405678,North Toronto West


###  Get each latitude and longitude for 4 borough 

In [21]:
address = 'Downtown Toronto, Toronto'

geolocator = Nominatim(user_agent="TO_explorer")
location_1 = geolocator.geocode(address)
latitude_1 = location_1.latitude
longitude_1 = location_1.longitude
print('The geograpical coordinate of Downtown Toronto, Toronto are {}, {}.'.format(latitude_1, longitude_1))

address = 'East Toronto, Toronto'

geolocator = Nominatim(user_agent="TO_explorer")
location_2 = geolocator.geocode(address)
latitude_2 = location_2.latitude
longitude_2 = location_2.longitude
print('The geograpical coordinate of East Toronto, Toronto are {}, {}.'.format(latitude_2, longitude_2))

address = 'West Toronto, Toronto'

geolocator = Nominatim(user_agent="TO_explorer")
location_3 = geolocator.geocode(address)
latitude_3 = location_3.latitude
longitude_3 = location_3.longitude
print('The geograpical coordinate of West Toronto, Toronto are {}, {}.'.format(latitude_3, longitude_3))

address = 'Central Toronto, Toronto'

geolocator = Nominatim(user_agent="TO_explorer")
location_4 = geolocator.geocode(address)
latitude_4 = location_4.latitude
longitude_4 = location_4.longitude
print('The geograpical coordinate of Central Toronto, Toronto are {}, {}.'.format(latitude_4, longitude_4))

The geograpical coordinate of Downtown Toronto, Toronto are 43.6541737, -79.38081164513409.
The geograpical coordinate of East Toronto, Toronto are 43.6534817, -79.3839347.
The geograpical coordinate of West Toronto, Toronto are 43.6534817, -79.3839347.
The geograpical coordinate of Central Toronto, Toronto are 43.6534817, -79.3839347.


In [22]:
# create map of Downtown Toronto using latitude and longitude values
map_Downtown = folium.Map(location=[latitude_1, longitude_1], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Downtown_data['Latitude'], Downtown_data['Longitude'], Downtown_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Downtown)  
    
map_Downtown

In [23]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Get the venues name and categories

### Give the Credential of foursquare

In [28]:
#insert the real client_id and client_secret
CLIENT_ID = 'XXXXX' # your Foursquare ID
CLIENT_SECRET = 'XXX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [31]:
LIMIT = 100
radius = 500


In [32]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### get all the nearby Venues for each neighborhood

In [33]:


Downtown_venues = getNearbyVenues(names=Downtown_data['Neighborhood'],
                                   latitudes=Downtown_data['Latitude'],
                                   longitudes=Downtown_data['Longitude']
                                  )


East_venues = getNearbyVenues(names=East_data['Neighborhood'],
                                   latitudes=East_data['Latitude'],
                                   longitudes=East_data['Longitude']
                                  )


West_venues = getNearbyVenues(names=West_data['Neighborhood'],
                                   latitudes=West_data['Latitude'],
                                   longitudes=West_data['Longitude']
                                  )


Central_venues = getNearbyVenues(names=Central_data['Neighborhood'],
                                   latitudes=Central_data['Latitude'],
                                   longitudes=Central_data['Longitude']
                                  )

Regent Park 
 Harbourfront
Queen's Park 
 Ontario Provincial Government
Garden District
 Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond 
 Adelaide 
 King
Harbourfront East 
 Union Station 
 Toronto Islands
Toronto Dominion Centre 
 Design Exchange
Commerce Court 
 Victoria Hotel
University of Toronto 
 Harbord
Kensington Market 
 Chinatown 
 Grange Park
CN Tower 
 King and Spadina 
 Railway Lands 
 Harbourfront West 
 Bathurst Quay 
 South Niagara 
 Island airport
Rosedale
Stn A PO Boxes
St. James Town 
 Cabbagetown
First Canadian Place 
 Underground city
Church and Wellesley
The Beaches
The Danforth West 
 Riverdale
India Bazaar 
 The Beaches West
Studio District
Business reply mail Processing CentrE
Dufferin 
 Dovercourt Village
Little Portugal 
 Trinity
Brockton 
 Parkdale Village 
 Exhibition Place
High Park 
 The Junction South
Parkdale 
 Roncesvalles
Runnymede 
 Swansea
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
North Toronto West
Th

In [34]:
print(Downtown_venues.shape)
Downtown_venues.head()

(2393, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Regent Park,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Regent Park,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Regent Park,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,Regent Park,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,Regent Park,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [35]:
Downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,94,94,94,94,94,94
Bathurst Quay,17,17,17,17,17,17
Cabbagetown,46,46,46,46,46,46
Chinatown,55,55,55,55,55,55
Design Exchange,100,100,100,100,100,100
Grange Park,55,55,55,55,55,55
Harbord,36,36,36,36,36,36
Harbourfront,48,48,48,48,48,48
Harbourfront West,17,17,17,17,17,17
Island airport,17,17,17,17,17,17


In [36]:
print('There are {} uniques categories.'.format(len(Downtown_venues['Venue Category'].unique())))

There are 204 uniques categories.


### Transforming data, according Venues categories


In [37]:
# one hot encoding
Downtown_onehot = pd.get_dummies(Downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Downtown_onehot['Neighborhood'] = Downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Downtown_onehot.columns[-1]] + list(Downtown_onehot.columns[:-1])
Downtown_onehot = Downtown_onehot[fixed_columns]


# one hot encoding
East_onehot = pd.get_dummies(East_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
East_onehot['Neighborhood'] = East_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [East_onehot.columns[-1]] + list(East_onehot.columns[:-1])
East_onehot = East_onehot[fixed_columns]



# one hot encoding
West_onehot = pd.get_dummies(West_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
West_onehot['Neighborhood'] = West_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [West_onehot.columns[-1]] + list(West_onehot.columns[:-1])
West_onehot = West_onehot[fixed_columns]



# one hot encoding
Central_onehot = pd.get_dummies(Central_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Central_onehot['Neighborhood'] = Central_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Central_onehot.columns[-1]] + list(Central_onehot.columns[:-1])
Central_onehot = Central_onehot[fixed_columns]

Central_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bank,Bar,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Construction & Landscaping,Cosmetics Shop,Dance Studio,Department Store,Dessert Shop,Diner,Dog Run,Donut Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gourmet Shop,Greek Restaurant,Gym,Gym / Fitness Center,Health & Beauty Service,History Museum,Hotel,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Light Rail Station,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Park,Pharmacy,Pizza Place,Pub,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,Lawrence Park,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Roselawn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [38]:
Central_onehot.shape

(227, 65)

### Group the data according the neighborhood 

In [39]:
Downtown_grouped = Downtown_onehot.groupby('Neighborhood').mean().reset_index()

East_grouped = East_onehot.groupby('Neighborhood').mean().reset_index()

West_grouped = West_onehot.groupby('Neighborhood').mean().reset_index()

Central_grouped = Central_onehot.groupby('Neighborhood').mean().reset_index()
Central_grouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bank,Bar,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Construction & Landscaping,Cosmetics Shop,Dance Studio,Department Store,Dessert Shop,Diner,Dog Run,Donut Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gourmet Shop,Greek Restaurant,Gym,Gym / Fitness Center,Health & Beauty Service,History Museum,Hotel,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Light Rail Station,Liquor Store,Mexican Restaurant,Middle Eastern Restaurant,Park,Pharmacy,Pizza Place,Pub,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Deer Park,0.058824,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.117647,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.0
1,Forest Hill SE,0.058824,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.117647,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.0
2,North Midtown,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.136364,0.0,0.0,0.090909,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.045455,0.045455,0.045455,0.0,0.0,0.0,0.136364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0
3,Rathnelly,0.058824,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.117647,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.0
4,South Hill,0.058824,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.117647,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.0
5,Summerhill East,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Yorkville,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.136364,0.0,0.0,0.090909,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.045455,0.045455,0.045455,0.0,0.0,0.0,0.136364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0
7,Davisville,0.027778,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.027778,0.0,0.083333,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.027778,0.055556,0.0,0.0,0.0,0.0,0.027778,0.055556,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.083333,0.0,0.0,0.027778,0.0,0.083333,0.027778,0.0,0.0,0.0,0.0,0.055556,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0
8,Davisville North,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Forest Hill North & West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0


In [40]:
Downtown_grouped.shape

(39, 204)

In [41]:
num_top_venues = 5

for hood in Downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Downtown_grouped[Downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Adelaide ----
           venue  freq
0    Coffee Shop  0.10
1           Café  0.05
2     Restaurant  0.04
3            Gym  0.03
4  Deli / Bodega  0.03


---- Bathurst Quay ----
              venue  freq
0   Airport Service  0.18
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3     Boat or Ferry  0.06
4   Harbor / Marina  0.06


---- Cabbagetown----
                venue  freq
0         Coffee Shop  0.07
1                Café  0.04
2                Park  0.04
3         Pizza Place  0.04
4  Italian Restaurant  0.04


---- Chinatown ----
                   venue  freq
0                   Café  0.09
1            Coffee Shop  0.07
2     Mexican Restaurant  0.05
3  Vietnamese Restaurant  0.05
4                 Bakery  0.05


---- Design Exchange----
                 venue  freq
0          Coffee Shop  0.12
1                Hotel  0.08
2                 Café  0.07
3  Japanese Restaurant  0.04
4           Restaurant  0.04


---- Grange Park----
                   venue  freq
0      

In [42]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Get the top 10 venues

In [43]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Downtown_grouped['Neighborhood']

for ind in np.arange(Downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
East_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
East_neighborhoods_venues_sorted['Neighborhood'] = East_grouped['Neighborhood']

for ind in np.arange(East_grouped.shape[0]):
    East_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(East_grouped.iloc[ind, :], num_top_venues)

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
West_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
West_neighborhoods_venues_sorted['Neighborhood'] = West_grouped['Neighborhood']

for ind in np.arange(West_grouped.shape[0]):
    West_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(West_grouped.iloc[ind, :], num_top_venues)

West_neighborhoods_venues_sorted.head()


num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Central_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
Central_neighborhoods_venues_sorted['Neighborhood'] = Central_grouped['Neighborhood']

for ind in np.arange(Central_grouped.shape[0]):
    Central_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Central_grouped.iloc[ind, :], num_top_venues)

Central_neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Deer Park,Coffee Shop,Pub,American Restaurant,Restaurant,Fried Chicken Joint,Vietnamese Restaurant,Health & Beauty Service,Light Rail Station,Liquor Store,Pizza Place
1,Forest Hill SE,Coffee Shop,Pub,American Restaurant,Restaurant,Fried Chicken Joint,Vietnamese Restaurant,Health & Beauty Service,Light Rail Station,Liquor Store,Pizza Place
2,North Midtown,Sandwich Place,Café,Coffee Shop,American Restaurant,Donut Shop,History Museum,Indian Restaurant,Liquor Store,Cosmetics Shop,Middle Eastern Restaurant
3,Rathnelly,Coffee Shop,Pub,American Restaurant,Restaurant,Fried Chicken Joint,Vietnamese Restaurant,Health & Beauty Service,Light Rail Station,Liquor Store,Pizza Place
4,South Hill,Coffee Shop,Pub,American Restaurant,Restaurant,Fried Chicken Joint,Vietnamese Restaurant,Health & Beauty Service,Light Rail Station,Liquor Store,Pizza Place


In [44]:
print(Downtown_grouped.columns)
Downtown_grouped.head()

Index(['Neighborhood', 'Yoga Studio', 'Airport', 'Airport Food Court',
       'Airport Gate', 'Airport Lounge', 'Airport Service', 'Airport Terminal',
       'American Restaurant', 'Antique Shop',
       ...
       'Thai Restaurant', 'Theater', 'Theme Restaurant', 'Trail',
       'Train Station', 'Vegetarian / Vegan Restaurant', 'Video Game Store',
       'Vietnamese Restaurant', 'Wine Bar', 'Women's Store'],
      dtype='object', length=204)


Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Butcher,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Auditorium,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Flower Shop,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hospital,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Lingerie Store,Liquor Store,Lounge,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Adelaide,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.010638,0.010638,0.0,0.010638,0.0,0.0,0.0,0.0,0.021277,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.010638,0.021277,0.0,0.0,0.010638,0.010638,0.010638,0.0,0.053191,0.0,0.0,0.0,0.0,0.0,0.0,0.031915,0.0,0.095745,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.021277,0.0,0.0,0.021277,0.0,0.010638,0.0,0.031915,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010638,0.0,0.010638,0.0,0.0,0.010638,0.0,0.010638,0.010638,0.0,0.0,0.0,0.031915,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031915,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.010638,0.0,0.0,0.010638,0.0,0.0,0.010638,0.0,0.0,0.010638,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.010638,0.0,0.010638,0.0,0.0,0.0,0.0,0.010638,0.010638,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.010638,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.042553,0.0,0.0,0.021277,0.010638,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.010638,0.0,0.010638,0.0,0.010638,0.0,0.0,0.021277,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.031915,0.010638,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.010638
1,Bathurst Quay,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cabbagetown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.043478,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.043478,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.021739,0.021739,0.043478,0.0,0.021739,0.021739,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chinatown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.054545,0.0,0.036364,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.018182,0.0,0.090909,0.0,0.018182,0.018182,0.0,0.0,0.0,0.0,0.018182,0.072727,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.018182,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054545,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.0,0.018182,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.036364,0.0,0.054545,0.018182,0.0
4,Design Exchange,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.07,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.01,0.0,0.03,0.01,0.01,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.0,0.02,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0


In [48]:
len(Central_grouped.columns)

65

In [49]:
# set number of clusters
kclusters = 3

Downtown_grouped_clustering = Downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1], dtype=int32)

In [50]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Downtown_merged = Downtown_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Downtown_merged = Downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Downtown_merged.head() # check the last columns!

Unnamed: 0,Postal code,Borough,Latitude,Longitude,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,43.65426,-79.360636,Regent Park,0,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Café,Theater,Yoga Studio,Cosmetics Shop,Restaurant
1,M5A,Downtown Toronto,43.65426,-79.360636,Harbourfront,0,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Café,Theater,Yoga Studio,Cosmetics Shop,Restaurant
2,M7A,Downtown Toronto,43.662301,-79.389494,Queen's Park,0,Coffee Shop,Sushi Restaurant,Diner,Gym,Mexican Restaurant,Juice Bar,Japanese Restaurant,Italian Restaurant,Hobby Shop,Fried Chicken Joint
3,M7A,Downtown Toronto,43.662301,-79.389494,Ontario Provincial Government,0,Coffee Shop,Sushi Restaurant,Diner,Gym,Mexican Restaurant,Juice Bar,Japanese Restaurant,Italian Restaurant,Hobby Shop,Fried Chicken Joint
4,M5B,Downtown Toronto,43.657162,-79.378937,Garden District,0,Clothing Store,Coffee Shop,Café,Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Bubble Tea Shop,Cosmetics Shop,Theater


In [51]:
# create map
map_clusters = folium.Map(location=[latitude_1, longitude_1], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Downtown_merged['Latitude'], Downtown_merged['Longitude'], Downtown_merged['Neighborhood'], Downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [52]:
map_clusters