# Assignment - Part 1

## Scrape data from HTML tables into a DataFrame using BeautifulSoup and Pandas

### Install libraries and import modules

In [1]:
!pip install bs4 # Install BeautifulSoup library



In [2]:
!pip install requests # Install requests library



In [3]:
from bs4 import BeautifulSoup # Import module for web scrapping

In [4]:
import requests # Import module for downloading a webpage

In [5]:
import pandas as pd # Import module for creating pandas dataframe

In [6]:
import numpy as np # library to handle data in a vectorized manner

In [7]:
# libraries for displaying images
from IPython.display import Image
from IPython.core.display import HTML

!pip install folium==0.5.0 
import folium

# plotting library
print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [8]:
import json # Library to handle JSON files

In [9]:
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

In [10]:
# Matplotlib and associated plotting modules 
import matplotlib.cm as cm 
import matplotlib.colors as colors

In [11]:
!conda install -c conda-forge geopy --yes # Install geopy

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [12]:
from geopy.geocoders import Nominatim # Convert an address into latitude and longitude values

In [13]:
from sklearn.cluster import KMeans

In [14]:
print('Libraries imported.') 

Libraries imported.


### Scrape wikipedia webpage containing the postal codes of Toronto, Canada

#### Specify the url for the website to be scrapped.

In [15]:
# Url of the webpage that contains the data required
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

#### Request for the data required from the website.

In [16]:
# Using 'get' function to obtain the contents of the webpage in a text format and store in a variable called 'data'
data = requests.get(url).text 

#### Extract the information required from the html.

In [17]:
# Create a BeautifulSoup called 'soup'
soup = BeautifulSoup(data, 'html5lib')

#### Isolate all the tables with the tag 'table'.

In [18]:
# Find the all the tables in the webpage with the tag 'table' and store in a variable called 'tables'
tables = soup.find_all('table')

#### Create a 'soup' of only the table that is required and save in the variable, "table_data".

In [19]:
# Find the first table in the webpage with the tag 'table' and store in a variable called 'table'
table = soup.find('table')

#### Create a list called 'table_data'. Populate 'table_data' and have the columns listed in the order 'PostalCode', 'Borough' and 'Neighborhood'.

In [20]:
# Find the contents of the table and place in the variable, table_data
table_data = [] # Create a list called table_data to hold the data within the table

# Create an elif statement to loop through the contents of the table 
for row in table.find_all('td'): # Create a for loop
    cell = {} # Create an empty dictionary, which to hold the contents of table per column
    if row.span.text =='Not assigned':  # Check for any text exactly stated as 'Not assigned'
        pass                           # Ignore, if found
    else:                                 # Otherwise
        cell['PostalCode'] = row.p.text[:3] # Define a key called "PostalCode" for the dictionary, cell 
        cell['Borough'] = (row.span.text).split('(')[0]  # Define a key called "Borough" for the dictionary, cell
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ') # Create a key called "Neighborhood" for the dictionary, cell
        table_data.append(cell)  # Fill the list (table_data) by adding the three keys ('PostalCode', 'Borough' and 'Neighborhood') to the dictionary (cell)

print(table_data)  # Print the list, table_data

[{'PostalCode': 'M3A', 'Borough': 'North York', 'Neighborhood': 'Parkwoods'}, {'PostalCode': 'M4A', 'Borough': 'North York', 'Neighborhood': 'Victoria Village'}, {'PostalCode': 'M5A', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Regent Park, Harbourfront'}, {'PostalCode': 'M6A', 'Borough': 'North York', 'Neighborhood': 'Lawrence Manor, Lawrence Heights'}, {'PostalCode': 'M7A', 'Borough': "Queen's Park", 'Neighborhood': 'Ontario Provincial Government'}, {'PostalCode': 'M9A', 'Borough': 'Etobicoke', 'Neighborhood': 'Islington Avenue'}, {'PostalCode': 'M1B', 'Borough': 'Scarborough', 'Neighborhood': 'Malvern, Rouge'}, {'PostalCode': 'M3B', 'Borough': 'North York', 'Neighborhood': 'Don Mills North'}, {'PostalCode': 'M4B', 'Borough': 'East York', 'Neighborhood': 'Parkview Hill, Woodbine Gardens'}, {'PostalCode': 'M5B', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Garden District, Ryerson'}, {'PostalCode': 'M6B', 'Borough': 'North York', 'Neighborhood': 'Glencairn'}, {'PostalCode': 'M9

#### Create the dataframe (df) using the contents of the list (table_data)

In [21]:
df = pd.DataFrame(table_data) # Create the dataframe, df
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


#### Group the content of the dataframe by Postal Code and Borough.

In [22]:
df = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(','.join).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### Clean the dataframe

In [23]:
# Clean the information in the 'Borough' column
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df['Borough'].head()

0    Scarborough
1    Scarborough
2    Scarborough
3    Scarborough
4    Scarborough
Name: Borough, dtype: object

#### Display the first 12 rows of the resulting dataframe.

In [24]:
df.head(12)  # Display the cleaned dataframe, df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### Confirm the number of rows of the dataframe

In [25]:
print('The dataframe has {} rows.'.format(df.shape[0]))

The dataframe has 103 rows.


# Assignment - Part 2

## Add the latitude and the longitude coordinates of each neighborhood in Toronto to the dataframe, df

#### Create a dataframe from the csv file that contains the latitude and longitude values of Toronto's Postal Codes

In [26]:
geo_url = 'https://cocl.us/Geospatial_data'
df_geodata = pd.read_csv(geo_url, index_col='Postal Code')
df_geodata.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [27]:
df_geodata.shape

(103, 2)

#### Confirm the number of rows for the 'df' and 'df_geodata'.

In [28]:
print('The dataframe, df has {} rows and the dataframe, df_geodata has {} rows.'.format(df.shape[0], df_geodata.shape[0]))

The dataframe, df has 103 rows and the dataframe, df_geodata has 103 rows.


#### Merge the two dataframes (df and Postal Code)

In [29]:
toronto_geo = df.join(df_geodata, on='PostalCode') # To get the latitude and longitude with respect to neighbourhood and not boroughs
toronto_geo.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [30]:
toronto_geo.shape # Confirm the number of rows in the merged dataframe is the same as that of the individual dataframes
print('The dataframe,"df" has {} rows, the dataframe "df_geodata" has {} rows and the dataframe, "toronto_geo" has {} rows.'.format(df.shape[0], df_geodata.shape[0], toronto_geo.shape[0]))

The dataframe,"df" has 103 rows, the dataframe "df_geodata" has 103 rows and the dataframe, "toronto_geo" has 103 rows.


#### Check if any value is missing in the dataframe.

In [31]:
toronto_geo.isnull().sum() # Check for all null values and sum the number of occurences to confirm that all the borough's coordinates were merged.

PostalCode      0
Borough         0
Neighborhood    0
Latitude        0
Longitude       0
dtype: int64

#### Determine how many boroughs and neighborhoods exists in Toronto.

In [32]:
# State how many boroughs and neighborhoods exist in Toronto
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(toronto_geo['Borough'].unique()), toronto_geo.shape[0]))

The dataframe has 15 boroughs and 103 neighborhoods.


# Assignment - Part 3

## Draw a map of neighborhoods in Toronto

#### Use geopy library to get the latitude and longitude values of Toronto, Canada

In [33]:
# Define an instance of the geocoder and define a user agent, namely 'toronto_explorer'
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Create a map of Toronto with neighborhoods superimposed on top

In [34]:
# Create map of Toronto using its latitude and longitude values.
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
 
# Add markers to map using the dataframe, "toronto_geo" that contains the name and coordinates of each neighborhood in Toronto.
for lat, lng, borough, neighborhood in zip(toronto_geo['Latitude'], toronto_geo['Longitude'], toronto_geo['Borough'], toronto_geo['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng], 
        radius=5, 
        popup=label, 
        color='blue', 
        fill=True, 
        fill_color='#3186cc', 
        fill_opacity=0.7, 
        parse_html=False).add_to(map_toronto) 

map_toronto 

#### The map above displays the neighborhoods in Toronto, just click each label to view it.

#### Next, create the map that all boroughs in Toronto that have "Toronto" in its name.

In [35]:
# Slice the original dataframe, toronto_geo and create a new dataframe that contains all boroughs in Toronto that has "Toronto" in its name.
toronto_data = toronto_geo[toronto_geo['Borough'].str.contains('Toronto', regex=False)]
toronto_data.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
40,M4J,East York/East Toronto,The Danforth East,43.685347,-79.338106
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


In [36]:
toronto_data.shape

(39, 5)

In [37]:
print('There are {} boroughs in Toronto that have "toronto" in its name.'.format(toronto_data.shape[0]))

There are 39 boroughs in Toronto that have "toronto" in its name.


### Create a map of boroughs with 'Toronto' included in its name

In [38]:
# Create map of Boroughs with 'Toronto' using its latitude and longitude values
map_boroughT = folium.Map(location=[latitude, longitude], zoom_start=10)
 
# Add markers to map 
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng], 
        radius=5, 
        popup=label, 
        color='blue', 
        fill=True, 
        fill_color='#3186cc', 
        fill_opacity=0.7, 
        parse_html=False).add_to(map_boroughT) 

map_boroughT

#### The map above displays all the boroughs in Toronto that has 'toronto' as part of it name.

### Next, we select the fisrt borough in the borough called "The Beaches" and explore for venues that are in that borough.

#### Define the Foursquare API credentials and version.

In [39]:
# @Hidden Cell

# Define Foursquare Credentials and Version
CLIENT_ID = 'AS2R10DN0Z3N1JL4JCYT2XJOAPEPNVHYGX4NRUHG2VF12UJK' # your Foursquare ID
CLIENT_SECRET = 'BXAQAIRL4P0BE2XASU1XEV42OSDMDJ45RSK5UETBPRKHFA4Z' # your Foursquare Secret 
VERSION = '20210524' # Foursquare API version 
LIMIT = 100 # A default Foursquare API limit value 

## Explore "The Beaches" - the first neighborhood in the dataframe, "toronto_data".

#### Find the first listed location in the dataframe, toronto_data

In [40]:
toronto_data.loc[37, 'Neighborhood'] # Get the name of the first listed location in the dataframe, toronto_data

'The Beaches'

#### Get the longitude and latitude values for 'Regent Park, Harbourfront'

In [41]:
neighborhood_latitude = toronto_data.loc[37, 'Latitude'] # neighborhood latitude value 
neighborhood_longitude = toronto_data.loc[37, 'Longitude'] # neighborhood longitude value 

neighborhood_name = toronto_data.loc[37, 'Neighborhood'] # neighborhood name 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, neighborhood_latitude, neighborhood_longitude)) 

Latitude and longitude values of The Beaches are 43.6763574, -79.2930312.


#### Get the top 100 venues in 'The Beaches' within a radius of 500 meters

In [42]:
# Create the URL for the GET request URL and name it url_beaches.

radius = 500 # defined radius
limit = 100 # defined number of venues

url_beaches = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)

In [43]:
# Send the GET request and examine the results
results_beaches = requests.get(url_beaches).json()

#### Use the get_category_type function. Information required are in the "items" key as seen in the cell 

In [44]:
# State the function that extracts the category of the venue 

def get_category_type(row):
    try: 
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name'] 

In [45]:
# clean the json and structure it into a pandas dataframe noting that the required information is in the 'items'key

venues = results_beaches['response']['groups'][0]['items']

nearby_venues = json_normalize(venues) # flatten JSON 

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns] 

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1) 

# clean columns 
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()



Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,Textile Museum of Canada,Art Museum,43.654396,-79.3865
3,Chatime 日出茶太,Bubble Tea Shop,43.655542,-79.384684
4,Poke Guys,Poke Place,43.654895,-79.385052


In [46]:
# Number of venues returned by Foursquare
print('{} venues in "The Beaches" were returned by Foursquare'.format(nearby_venues.shape[0]))

70 venues in "The Beaches" were returned by Foursquare


In [47]:
# Determine how many unique venue categories can be curated from all the returned venues
print('There are {} unique venue categories in "The Beaches" borough.'.format(len(nearby_venues['categories'].unique())))

There are 51 unique venue categories in "The Beaches" borough.


In [48]:
nearby_venues['categories'].value_counts().head()

Clothing Store         6
Coffee Shop            5
American Restaurant    2
Seafood Restaurant     2
Café                   2
Name: categories, dtype: int64

In [49]:
nearby_venues.mode()['categories'][0]

'Clothing Store'

#### As shown above, of the 51 unique venue categories in the "The Beaches" borough, the most common venue category is "Clothing Store".

## Next, explore all neighborhoods in Toronto, create a cluster of venues and determine the most popular venue in each cluster.

#### Define a funciton to get all venues in Toronto.

In [50]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [51]:
# Use the function above to get all venues in each neighborhood in Toronto and then create a new dataframe called toronto_venues 

toronto_venues = getNearbyVenues(names=toronto_geo['Neighborhood'], latitudes=toronto_geo['Latitude'], longitudes=toronto_geo['Longitude'])

toronto_venues.head() # Display the first five rows of the dataframe

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Don Mills South
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
The Danforth  East
The Danforth West, Riverdale


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Affordable Toronto Movers,43.787919,-79.162977,Moving Target
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


#### Get the total number of venues that are operating at this time of the day different neighborhoods in Toronto

In [52]:
# Check the size of the resulting dataframe, toronto_venues
print(toronto_venues.shape)

(2112, 7)


#### Determine the total number of venues that are operating currently in each neighborhood in Toronto

In [53]:
# Check how many venues were returned for each neighborhood
toronto_venues.groupby('Neighborhood').count().head()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Downsview North",23,23,23,23,23,23
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24


#### Determine how many of each venue catergory are operating in Toronto currently

In [54]:
# Count the number of elements in the specific columns
toronto_venues['Venue Category'].value_counts().head()

Coffee Shop    190
Café            92
Restaurant      63
Park            53
Pizza Place     46
Name: Venue Category, dtype: int64

In [55]:
# Check how many venue categories were returned for each neighborhood
toronto_venues.groupby('Venue Category').count().head()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Accessories Store,2,2,2,2,2,2
Adult Boutique,1,1,1,1,1,1
Afghan Restaurant,1,1,1,1,1,1
Airport,2,2,2,2,2,2
Airport Food Court,1,1,1,1,1,1


#### Check how many venues in Toronto have missing data within the "toronto_venues" dataframe

In [56]:
toronto_venues.isnull().sum() # Check for all null values and sum the number of occurences to confirm that all the borough's coordinates were merged.

Neighborhood              0
Neighborhood Latitude     0
Neighborhood Longitude    0
Venue                     0
Venue Latitude            0
Venue Longitude           0
Venue Category            0
dtype: int64

#### Determine the total number of unique venue categories exist in Toronto.

In [57]:
# Determine how many unique venue categories can be curated from all the returned venues
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 271 unique categories.


#### Analyze each neighborhood using "One Hot" encoding, which changes all the categorical values into either a "0" or "1".

In [58]:
# One hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# Add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# Define a list of column names
cols = toronto_onehot.columns.tolist()
cols

# Move the column name to the beggining
cols.insert(0, cols.pop(cols.index('Neighborhood')))
cols

# Then use .reindex() function to reorder
toronto_onehot = toronto_onehot.reindex(columns= cols)

#### Display the first 5 rows of the dataframe of the toronto venues (toronto_venues) as created by "One Hot".

In [59]:
# Examine the first 5 rows of the result
toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Truck Stop,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Display the last 5 rows of the dataframe of the toronto venues (toronto_venues) as created by "One Hot".

In [60]:
# Examine the last 5 rows of the result
toronto_onehot.tail()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Truck Stop,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
2107,"Clairville, Humberwood, Woodbine Downs, West H...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2108,"Clairville, Humberwood, Woodbine Downs, West H...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2109,"Clairville, Humberwood, Woodbine Downs, West H...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2110,"Clairville, Humberwood, Woodbine Downs, West H...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2111,"Clairville, Humberwood, Woodbine Downs, West H...",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


#### Display the total number of neighborhoods and venue categories exist based on the "One Hot" encoding dataframe.

In [61]:
# Determine the size of the new dataframe
toronto_onehot.shape

(2112, 271)

In [62]:
# Group the rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Truck Stop,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Willowdale West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
96,"Willowdale, Newtonbrook",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
97,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
98,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0


#### Display the total number of

In [63]:
# Confirm the size of the new dataframe, toronto_grouped
toronto_grouped.shape

(100, 271)

### Print the top 5 most common venues for each neighborhood in Toronto

In [64]:
num_top_venues = 5 

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0  Latin American Restaurant  0.25
1               Skating Rink  0.25
2                     Lounge  0.25
3             Breakfast Spot  0.25
4        Monument / Landmark  0.00


----Alderwood, Long Branch----
         venue  freq
0  Pizza Place  0.22
1   Playground  0.11
2         Pool  0.11
3  Coffee Shop  0.11
4          Pub  0.11


----Bathurst Manor, Wilson Heights, Downsview North----
                       venue  freq
0                       Bank  0.09
1                Coffee Shop  0.09
2                   Pharmacy  0.04
3                  Gift Shop  0.04
4  Middle Eastern Restaurant  0.04


----Bayview Village----
                 venue  freq
0  Japanese Restaurant  0.25
1                 Café  0.25
2   Chinese Restaurant  0.25
3                 Bank  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0          Restaurant  0.08
1      Sandwich Place  0.08
2         Coffee 

In [65]:
# Create a function to sort the top 5 venues in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Print the top 10 most common venues for each neighborhood in Toronto

#### Create a dataframe for the top 10 venues in each neighborhood

In [66]:
# Create a new dataframe for the top 10 venues for each neighbourhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Skating Rink,Lounge,Breakfast Spot,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Accessories Store
1,"Alderwood, Long Branch",Pizza Place,Playground,Pool,Coffee Shop,Pub,Sandwich Place,Pharmacy,Gym,Hookah Bar,Music Venue
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Pharmacy,Gift Shop,Middle Eastern Restaurant,Shopping Mall,Mobile Phone Shop,Sandwich Place,Fried Chicken Joint,Supermarket
3,Bayview Village,Japanese Restaurant,Café,Chinese Restaurant,Bank,Accessories Store,Movie Theater,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant
4,"Bedford Park, Lawrence Manor East",Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Pub,Boutique,Pizza Place,Butcher,Café,Liquor Store


In [67]:
neighborhoods_venues_sorted.shape

(100, 11)

### Cluster the neighborhoods

#### K-means

In [68]:
# Set number of clusters to 5
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# Run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 2], dtype=int32)

#### Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [69]:
# Add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_geo

# merge toronto_grouped with toronto_geo to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,4.0,Fast Food Restaurant,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Moving Target,Bar,Mobile Phone Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop,Martial Arts School
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Electronics Store,Rental Car Location,Medical Center,Bank,Intersection,Restaurant,Donut Shop,Mexican Restaurant,Breakfast Spot,Miscellaneous Shop
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Korean BBQ Restaurant,Accessories Store,Mobile Phone Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Fried Chicken Joint,Bakery,Gas Station,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Bank,Hakka Restaurant,Middle Eastern Restaurant,Miscellaneous Shop


In [70]:
toronto_merged.shape

(103, 16)

In [71]:
toronto_merged.describe()

Unnamed: 0,Latitude,Longitude,Cluster Labels
count,103.0,103.0,100.0
mean,43.704608,-79.397153,1.19
std,0.052463,0.097146,0.597892
min,43.602414,-79.615819,0.0
25%,43.660567,-79.464763,1.0
50%,43.696948,-79.38879,1.0
75%,43.74532,-79.340923,1.0
max,43.836125,-79.160497,4.0


In [72]:
toronto_merged.loc

<pandas.core.indexing._LocIndexer at 0x7f40bc3adf50>

#### Visualize the resulting clusters of toronto_merged

In [73]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to the map
markers_colors = []

for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon], 
        radius=5, 
        popup=label, 
        color='blue', 
        fill=True, 
        fill_color='#c9fce8', 
        fill_opacity=0.7).add_to(map_clusters)

map_clusters

### Examine the clusters and determine the most common venue category in each cluster.

#### Now, examine each cluster.  Determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, assign a name to each cluster.

### Cluster Label 0.0

In [74]:
Cluster0 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
Cluster0

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,North York,0.0,Baseball Field,Food Truck,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant
91,Etobicoke,0.0,Breakfast Spot,Baseball Field,Accessories Store,Miscellaneous Shop,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant
97,North York,0.0,Baseball Field,Accessories Store,Mobile Phone Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop,Martial Arts School


#### Determine the venue that occurs the most in Cluster Label 0.0

In [75]:
# Count the number of elements in specific columns of Cluster Label 0.0
Cluster0['1st Most Common Venue'].value_counts().head()

Baseball Field    2
Breakfast Spot    1
Name: 1st Most Common Venue, dtype: int64

In [76]:
#Determine the highest occuring venue in the Cluster Label 0.0
Cluster0.mode()['1st Most Common Venue'][0]

'Baseball Field'

In [77]:
print('This cluster will be called "{}" area in Toronto.'.format(Cluster0.mode()['1st Most Common Venue'][0]))

This cluster will be called "Baseball Field" area in Toronto.


### Cluster Label 1.0

In [78]:
Cluster1 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
Cluster1

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,1.0,Moving Target,Bar,Mobile Phone Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop,Martial Arts School
2,Scarborough,1.0,Electronics Store,Rental Car Location,Medical Center,Bank,Intersection,Restaurant,Donut Shop,Mexican Restaurant,Breakfast Spot,Miscellaneous Shop
3,Scarborough,1.0,Coffee Shop,Korean BBQ Restaurant,Accessories Store,Mobile Phone Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop
4,Scarborough,1.0,Fried Chicken Joint,Bakery,Gas Station,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Bank,Hakka Restaurant,Middle Eastern Restaurant,Miscellaneous Shop
6,Scarborough,1.0,Discount Store,Hobby Shop,Convenience Store,Department Store,Coffee Shop,Accessories Store,Molecular Gastronomy Restaurant,Movie Theater,Motel,Moroccan Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
95,Etobicoke,1.0,Convenience Store,Beer Store,Shopping Plaza,Café,Coffee Shop,Liquor Store,Pet Store,Pharmacy,Pizza Place,Metro Station
98,York,1.0,Jewelry Store,Convenience Store,Accessories Store,Miscellaneous Shop,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant
99,Etobicoke,1.0,Pizza Place,Coffee Shop,Intersection,Chinese Restaurant,Sandwich Place,Discount Store,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Mexican Restaurant
101,Etobicoke,1.0,Grocery Store,Pharmacy,Fried Chicken Joint,Pizza Place,Fast Food Restaurant,Beer Store,Sandwich Place,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant


#### Determine the venue that occurs the most in Cluster Label 1.0

In [79]:
# Count the number of elements in specific columns in Cluster Label 1.0
Cluster1['1st Most Common Venue'].value_counts().head()

Coffee Shop          20
Café                  6
Pizza Place           6
Convenience Store     3
Discount Store        3
Name: 1st Most Common Venue, dtype: int64

In [80]:
#Determine the highest occuring venue in the Cluster Label 1.0
Cluster1.mode()['1st Most Common Venue'][0]

'Coffee Shop'

In [81]:
print('This cluster will be called "{}" area in Toronto.'.format(Cluster1.mode()['1st Most Common Venue'][0]))

This cluster will be called "Coffee Shop" area in Toronto.


### Cluster Label 2.0

In [82]:
Cluster2 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
Cluster2

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,2.0,Playground,Park,Intersection,Pet Store,Motel,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station
21,North York,2.0,Park,Accessories Store,Middle Eastern Restaurant,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant
23,North York,2.0,Convenience Store,Park,Electronics Store,Accessories Store,Miscellaneous Shop,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop
25,North York,2.0,Fast Food Restaurant,Park,Food & Drink Shop,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop
30,North York,2.0,Airport,Park,Accessories Store,Miscellaneous Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop
44,Central Toronto,2.0,Bus Line,Park,Swim School,Accessories Store,Miscellaneous Shop,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant
50,Downtown Toronto,2.0,Park,Playground,Trail,Miscellaneous Shop,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant
64,Central Toronto,2.0,Park,Jewelry Store,Trail,Sushi Restaurant,Accessories Store,Miscellaneous Shop,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant
74,York,2.0,Park,Women's Store,Pool,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop
90,Etobicoke,2.0,Park,River,Accessories Store,Miscellaneous Shop,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Middle Eastern Restaurant


#### Determine the venue that occurs the most in Cluster Label 2.0

In [83]:
# Count the number of elements in specific columns of Cluster Label 2.0
Cluster2['1st Most Common Venue'].value_counts().head()

Park                    6
Playground              1
Bus Line                1
Airport                 1
Fast Food Restaurant    1
Name: 1st Most Common Venue, dtype: int64

In [84]:
#Determine the highest occuring venue in the Cluster Label 2.0
Cluster2.mode()['1st Most Common Venue'][0]

'Park'

In [85]:
print('This cluster will be called "{}" area in Toronto.'.format(Cluster2.mode()['1st Most Common Venue'][0]))

This cluster will be called "Park" area in Toronto.


### Cluster Label 3.0

In [86]:
Cluster3 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
Cluster3

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,3.0,Playground,Pizza Place,Health Food Store,Martial Arts School,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant
72,North York,3.0,Japanese Restaurant,Pizza Place,Bakery,Asian Restaurant,Mobile Phone Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant
94,Etobicoke,3.0,Bakery,Accessories Store,Mobile Phone Shop,Motel,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Miscellaneous Shop,Martial Arts School
96,North York,3.0,Pizza Place,Intersection,Pharmacy,Pet Store,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant


#### Determine the venue that occurs the most in Cluster Label 3.0

In [87]:
# Count the number of elements in specific columns in Cluster Label 3.0
Cluster3['1st Most Common Venue'].value_counts().head()

Bakery                 1
Playground             1
Japanese Restaurant    1
Pizza Place            1
Name: 1st Most Common Venue, dtype: int64

In [88]:
#Determine the highest occuring venue in the Cluster Label 3.0
Cluster3.mode()['1st Most Common Venue'][0]

'Bakery'

In [89]:
print('This cluster will be called "{}" area in Toronto.'.format(Cluster3.mode()['1st Most Common Venue'][0]))

This cluster will be called "Bakery" area in Toronto.


### Cluster Label 4.0

In [90]:
Cluster4 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
Cluster4

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,4.0,Fast Food Restaurant,Accessories Store,Movie Theater,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop


#### Determine the venue that occurs the most in Cluster Label 4.0

In [91]:
# Count the number of elements in specific columns of Cluster Label 4.0
Cluster4['1st Most Common Venue'].value_counts().head()

Fast Food Restaurant    1
Name: 1st Most Common Venue, dtype: int64

In [92]:
#Determine the highest occuring venue in the Cluster Label 4.0
Cluster4.mode()['1st Most Common Venue'][0]

'Fast Food Restaurant'

In [93]:
print('This cluster will be called "{}" area in Toronto.'.format(Cluster4.mode()['1st Most Common Venue'][0]))

This cluster will be called "Fast Food Restaurant" area in Toronto.
