# Module 1

In [1]:
import pandas as pd #provides high-performance & creates data frames
import requests # to read from html using Requests API.
from bs4 import BeautifulSoup #Getting data out of HTML, XML, and other markup languages.

In [2]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'lxml') #reading the HTML page
My_table = soup.find('table',{'class':'wikitable sortable'}) #selecting only the table

Postcode=[]
Borough=[]
Neighborhood=[]

for row in My_table.find_all('tr')[1:]:
    col=row.find_all('td')
    col1=col[0].text.strip()
    Postcode.append(col1)
    col2=col[1].text.strip()
    Borough.append(col2)
    col3=col[2].text.strip()
    Neighborhood.append(col3)
    
columns={'Postcode':Postcode,'Borough':Borough,'Neighborhood':Neighborhood} #form the table with column name
#Create a dataframe from the columns variable
df_full = pd.DataFrame(columns)

df_full['Neighborhood'][df_full['Neighborhood'] == 'Not assigned'] = df_full['Borough'] #Replace 'Not assigned' Neighborhoods with Borough name
df_mid=df_full[df_full.Borough != 'Not assigned'] #Remove the 'Not assigned' Boroughs
df_mid.reset_index(drop = True, inplace = True) #Reset the index

df2_final=(df_mid.groupby('Postcode').agg(lambda x: ','.join(set(x))).reset_index()) #Concatenating the data
print(df2_final.shape)
df2_final.head(10)

(103, 3)


Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Rouge Hill,Highland Creek,Port Union"
2,M1E,Scarborough,"West Hill,Morningside,Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park,East Birchmount Park,Ionview"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffside,Scarborough Village West,Cliffcrest"
9,M1N,Scarborough,"Cliffside West,Birch Cliff"


# Module 2

**Install Geocoder package**

In [3]:
! pip install geocoder 

Requirement not upgraded as not directly required: geocoder in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages
Requirement not upgraded as not directly required: future in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: six in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: click in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: ratelim in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: requests in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: decorator in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from ratelim->geocoder)
Requirement not upgraded as not directly required: chardet<3.1.0,

**Import the package the get the value**

In [4]:
import geocoder
g = geocoder.google('Mountain View, CA')
g

<[OVER_QUERY_LIMIT] Google - Geocode [empty]>

**Since Geocode failed to fetch data, using the CSV to fetch the data**

In [5]:
#Reading the CSV file & renaming the column to match teh first dataframe
df_cord=pd.read_csv("http://cocl.us/Geospatial_data")
df_cord.rename(index=str, columns={"Postal Code":"Postcode"},inplace=True)
df_cord.head(10)

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


**Merging both the dataframes**

In [6]:
df_toronto=pd.merge(df2_final, df_cord, on='Postcode')
df_toronto.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Highland Creek,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Morningside,Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park,East Birchmount Park,Ionview",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside,Scarborough Village West,Cliffcrest",43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West,Birch Cliff",43.692657,-79.264848


# Module 3

Download all the packages that needed.

In [7]:
import numpy as np # library to handle data in a vectorized manner
import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.17.0                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge


Let's get the geographical coordinates of Toronto.

In [8]:

address = 'Toronto, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))





The geograpical coordinate of Toronto are 43.653963, -79.387207.


Visualizating Toronto the neighborhoods in the map.

In [9]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Define Foursquare Credentials and Version

In [10]:
CLIENT_ID = '4BHK2XBPCKFPKJLNJYU3EWIJTI1CV4F151OYTE5XXNQMBODF' # your Foursquare ID
CLIENT_SECRET = 'BL12WZBHZUM3PRRUSHWKGTTDSD400ZV5FYJ0IP20F3TZQHWF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4BHK2XBPCKFPKJLNJYU3EWIJTI1CV4F151OYTE5XXNQMBODF
CLIENT_SECRET:BL12WZBHZUM3PRRUSHWKGTTDSD400ZV5FYJ0IP20F3TZQHWF


# Explore the first Neighborhood in our dataframe.

Get the Neighborhood's name & coordinates.

In [11]:
df_toronto.loc[0, 'Neighborhood']
neighborhood_latitude = df_toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_toronto.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge,Malvern are 43.806686299999996, -79.19435340000001.


#### Now, let's get the top 30 venues that are in Malvern & Rouge within a radius of 500 meters.
First, let's create the GET request URL. Name your URL **url**.

In [12]:
# type your answer here
LIMIT=30
radius=500


url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url



'https://api.foursquare.com/v2/venues/explore?client_id=4BHK2XBPCKFPKJLNJYU3EWIJTI1CV4F151OYTE5XXNQMBODF&client_secret=BL12WZBHZUM3PRRUSHWKGTTDSD400ZV5FYJ0IP20F3TZQHWF&ll=43.653963,-79.387207&v=20180605&radius=500&limit=30'

In [13]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5be3c0af1ed219051531f93e'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-5227bb01498e17bf485e6202-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/neighborhood_',
          'suffix': '.png'},
         'id': '4f2a25ac4b909258e854f55f',
         'name': 'Neighborhood',
         'pluralName': 'Neighborhoods',
         'primary': True,
         'shortName': 'Neighborhood'}],
       'id': '5227bb01498e17bf485e6202',
       'location': {'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'distance': 174,
        'formattedAddress': ['Toronto ON', 'Canada'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          'lng': -79.38529600606677}],
        'lat': 43.6532

Use **get_category_type** function to get the categories of each venue.

In [14]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
venues = results['response']['groups'][0]['items']
    
nearby_venues_temp = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues_temp =nearby_venues_temp.loc[:, filtered_columns]

# filter the category for each row
nearby_venues_temp['venue.categories'] = nearby_venues_temp.apply(get_category_type, axis=1)

# clean columns
nearby_venues=nearby_venues_temp[nearby_venues_temp['venue.categories'] != 'Neighborhood']
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head(10)


Unnamed: 0,name,categories,lat,lng
1,Textile Museum of Canada,Art Museum,43.654396,-79.3865
2,Japango,Sushi Restaurant,43.655268,-79.385165
3,Sansotei Ramen 三草亭,Ramen Restaurant,43.655157,-79.386501
4,Cafe Plenty,Café,43.654571,-79.38945
5,Tsujiri,Tea Room,43.655374,-79.385354
6,Manpuku まんぷく,Japanese Restaurant,43.653612,-79.390613
7,Rolltation,Japanese Restaurant,43.654918,-79.387424
8,Chatime 日出茶太,Bubble Tea Shop,43.655542,-79.384684
9,Uncle Tetsu's Japanese Angel Cafe,Café,43.655001,-79.386899
10,Poke Guys,Poke Place,43.654895,-79.385052


In [16]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

29 venues were returned by Foursquare.


## Explore Neighborhoods in Toronto
#### Let's create a function to repeat the same process to all the neighborhoods in Toronto

In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    
    nearby_venues_temp = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues_temp.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    nearby_venues=nearby_venues_temp[nearby_venues_temp['Venue Category'] != 'Neighborhood']
    
    return(nearby_venues)

#### Run the above function on each neighborhood and create a new dataframe called *toronto_venues*

In [18]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )

Rouge,Malvern
Rouge Hill,Highland Creek,Port Union
West Hill,Morningside,Guildwood
Woburn
Cedarbrae
Scarborough Village
Kennedy Park,East Birchmount Park,Ionview
Clairlea,Golden Mile,Oakridge
Cliffside,Scarborough Village West,Cliffcrest
Cliffside West,Birch Cliff
Dorset Park,Wexford Heights,Scarborough Town Centre
Wexford,Maryvale
Agincourt
Sullivan,Clarks Corners,Tam O'Shanter
Milliken,L'Amoreaux East,Steeles East,Agincourt North
L'Amoreaux West,Steeles West
Upper Rouge
Hillcrest Village
Henry Farm,Fairview,Oriole
Bayview Village
York Mills,Silver Hills
Newtonbrook,Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park,Don Mills South
Downsview North,Bathurst Manor,Wilson Heights
York University,Northwood Park
Downsview East,CFB Toronto
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Parkview Hill,Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West,Riverdale
The Beac

#### Size of the resulting dataframe, after merging the Postcode & Borough from original dataframe

In [19]:
toronto_venues_final=pd.merge(toronto_venues, df_toronto[['Neighborhood','Postcode','Borough']], on='Neighborhood') #merging the Postcode & Borough with venue list
print(toronto_venues_final.shape)


(1321, 9)


In [20]:
toronto_venues_final.head() #Sample first 5 records

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Postcode,Borough
0,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant,M1B,Scarborough
1,"Rouge Hill,Highland Creek,Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar,M1C,Scarborough
2,"West Hill,Morningside,Guildwood",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place,M1E,Scarborough
3,"West Hill,Morningside,Guildwood",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store,M1E,Scarborough
4,"West Hill,Morningside,Guildwood",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant,M1E,Scarborough


In [21]:
toronto_venues_final.tail() #Sample last 5 records

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Postcode,Borough
1316,"Beaumond Heights,Humbergate,South Steeles,Albi...",43.739416,-79.588437,McDonald's,43.741635,-79.584446,Fast Food Restaurant,M9V,Etobicoke
1317,"Beaumond Heights,Humbergate,South Steeles,Albi...",43.739416,-79.588437,Dollarama,43.741745,-79.591205,Discount Store,M9V,Etobicoke
1318,"Beaumond Heights,Humbergate,South Steeles,Albi...",43.739416,-79.588437,NORI SUSHI,43.742775,-79.586985,Japanese Restaurant,M9V,Etobicoke
1319,Northwest,43.706748,-79.594054,Economy Rent A Car,43.708471,-79.589943,Rental Car Location,M9W,Etobicoke
1320,Northwest,43.706748,-79.594054,Saand Rexdale,43.705072,-79.598725,Drugstore,M9W,Etobicoke


#### Total venues were returned for each neighborhood & Borough

In [22]:
toronto_venues_final.groupby('Neighborhood').count() # Total venue for each neighborhood

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Postcode,Borough
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Agincourt,5,5,5,5,5,5,5,5
Bayview Village,4,4,4,4,4,4,4,4
"Beaumond Heights,Humbergate,South Steeles,Albion Gardens,Jamestown,Thistletown,Silverstone,Mount Olive",11,11,11,11,11,11,11,11
"Bedford Park,Lawrence Manor East",26,26,26,26,26,26,26,26
Berczy Park,30,30,30,30,30,30,30,30
Business reply mail Processing Centre969 Eastern,16,16,16,16,16,16,16,16
"Cabbagetown,St. James Town",30,30,30,30,30,30,30,30
Caledonia-Fairbanks,6,6,6,6,6,6,6,6
Canada Post Gateway Processing Centre,11,11,11,11,11,11,11,11
Cedarbrae,7,7,7,7,7,7,7,7


In [23]:
toronto_venues_final.groupby('Borough').count() # Total venues for each Borough

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Postcode
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Central Toronto,111,111,111,111,111,111,111,111
Downtown Toronto,482,482,482,482,482,482,482,482
East Toronto,96,96,96,96,96,96,96,96
East York,73,73,73,73,73,73,73,73
Etobicoke,77,77,77,77,77,77,77,77
Mississauga,11,11,11,11,11,11,11,11
North York,201,201,201,201,201,201,201,201
Queen's Park,30,30,30,30,30,30,30,30
Scarborough,85,85,85,85,85,85,85,85
West Toronto,136,136,136,136,136,136,136,136


#### Unique venue categories

In [24]:
print('There are {} uniques categories.'.format(len(toronto_venues_final['Venue Category'].unique())))

There are 232 uniques categories.


## 3. Analyze Each Neighborhood

In [25]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues_final[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues_final['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill,Highland Creek,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"West Hill,Morningside,Guildwood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"West Hill,Morningside,Guildwood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"West Hill,Morningside,Guildwood",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
toronto_onehot.shape # Dataframe Size

(1321, 233)

#### Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [27]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
1,Bayview Village,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2,"Beaumond Heights,Humbergate,South Steeles,Albi...",0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
3,"Bedford Park,Lawrence Manor East",0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.038462,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
4,Berczy Park,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
5,Business reply mail Processing Centre969 Eastern,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.062500
6,"Cabbagetown,St. James Town",0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
7,Caledonia-Fairbanks,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.166667,0.000000
8,Canada Post Gateway Processing Centre,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.090909,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
9,Cedarbrae,0.0,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000


In [28]:
toronto_grouped.shape # New Dataframe Size

(100, 233)

#### Printing each neighborhood along with the top 10 most common venues in dataframe

In [29]:
   
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Clothing Store,Lounge,Chinese Restaurant,Breakfast Spot,Sandwich Place,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run
1,Bayview Village,Chinese Restaurant,Bank,Japanese Restaurant,Café,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore
2,"Beaumond Heights,Humbergate,South Steeles,Albi...",Grocery Store,Pharmacy,Fried Chicken Joint,Coffee Shop,Pizza Place,Sandwich Place,Beer Store,Discount Store,Japanese Restaurant,Fast Food Restaurant
3,"Bedford Park,Lawrence Manor East",Italian Restaurant,Coffee Shop,Fast Food Restaurant,Pharmacy,Pizza Place,Pub,Café,Butcher,Sushi Restaurant,Sandwich Place
4,Berczy Park,Cocktail Bar,Seafood Restaurant,Bakery,Farmers Market,Tea Room,Burger Joint,Liquor Store,Bistro,Belgian Restaurant,Restaurant
5,Business reply mail Processing Centre969 Eastern,Light Rail Station,Yoga Studio,Auto Workshop,Pizza Place,Comic Shop,Recording Studio,Restaurant,Burrito Place,Brewery,Skate Park
6,"Cabbagetown,St. James Town",Restaurant,Coffee Shop,Indian Restaurant,Italian Restaurant,Café,Pet Store,Beer Store,Butcher,Pub,Caribbean Restaurant
7,Caledonia-Fairbanks,Park,Pharmacy,Women's Store,Market,Fast Food Restaurant,Curling Ice,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
8,Canada Post Gateway Processing Centre,Coffee Shop,Hotel,Gym / Fitness Center,Mediterranean Restaurant,American Restaurant,Burrito Place,Fried Chicken Joint,Sandwich Place,Middle Eastern Restaurant,College Gym
9,Cedarbrae,Bank,Athletics & Sports,Thai Restaurant,Hakka Restaurant,Fried Chicken Joint,Bakery,Caribbean Restaurant,Yoga Studio,Dessert Shop,Dim Sum Restaurant


# Cluster Neighborhoods by k-Means

#### Running k-means to cluster the neighborhood into 5 clusters

In [30]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 


array([1, 1, 0, 1, 1, 1, 1, 4, 1, 1], dtype=int32)

#### Creating a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [31]:
a=['M1X','M2M','M9A']
df_toronto_new = df_toronto[~df_toronto.Postcode.isin(a)]
df_toronto_new.reset_index(drop = True, inplace = True)


toronto_merged = df_toronto_new

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,1,Fast Food Restaurant,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,Diner
1,M1C,Scarborough,"Rouge Hill,Highland Creek,Port Union",43.784535,-79.160497,1,Bar,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
2,M1E,Scarborough,"West Hill,Morningside,Guildwood",43.763573,-79.188711,0,Electronics Store,Pizza Place,Breakfast Spot,Medical Center,Rental Car Location,Mexican Restaurant,Dance Studio,Dumpling Restaurant,Drugstore,Dog Run
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Coffee Shop,Pharmacy,Soccer Field,Korean Restaurant,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Bank,Athletics & Sports,Thai Restaurant,Hakka Restaurant,Fried Chicken Joint,Bakery,Caribbean Restaurant,Yoga Studio,Dessert Shop,Dim Sum Restaurant


#### Visualizing the resulting clusters

In [32]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examining each Cluster

#### Cluster 1

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,0,Electronics Store,Pizza Place,Breakfast Spot,Medical Center,Rental Car Location,Mexican Restaurant,Dance Studio,Dumpling Restaurant,Drugstore,Dog Run
23,North York,0,Fast Food Restaurant,Park,Food & Drink Shop,Yoga Studio,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
24,North York,0,Baseball Field,Café,Gym / Fitness Center,Japanese Restaurant,Pool,Caribbean Restaurant,Diner,Dessert Shop,Dim Sum Restaurant,Yoga Studio
33,East York,0,Pizza Place,Fast Food Restaurant,Pet Store,Pharmacy,Café,Gastropub,Gym / Fitness Center,Bank,Athletics & Sports,Rock Climbing Spot
39,East Toronto,0,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Spa,Pizza Place,Juice Bar,Indian Restaurant,Health Food Store,Grocery Store,Fruit & Vegetable Store
46,Central Toronto,0,Playground,Park,Intersection,Restaurant,Yoga Studio,Curling Ice,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
60,North York,0,Italian Restaurant,Coffee Shop,Fast Food Restaurant,Pharmacy,Pizza Place,Pub,Café,Butcher,Sushi Restaurant,Sandwich Place
69,North York,0,Clothing Store,Furniture / Home Store,Miscellaneous Shop,Shoe Store,Boutique,Coffee Shop,Event Space,Accessories Store,Vietnamese Restaurant,Garden Center
71,York,0,Hockey Arena,Park,Field,Trail,Yoga Studio,Dim Sum Restaurant,Deli / Bodega,Department Store,Dessert Shop,Discount Store
73,Downtown Toronto,0,Grocery Store,Café,Park,Coffee Shop,Convenience Store,Diner,Italian Restaurant,Baby Store,Restaurant,Athletics & Sports


#### Cluster 2

In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,1,Fast Food Restaurant,Yoga Studio,Dance Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,Diner
1,Scarborough,1,Bar,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
3,Scarborough,1,Coffee Shop,Pharmacy,Soccer Field,Korean Restaurant,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
4,Scarborough,1,Bank,Athletics & Sports,Thai Restaurant,Hakka Restaurant,Fried Chicken Joint,Bakery,Caribbean Restaurant,Yoga Studio,Dessert Shop,Dim Sum Restaurant
5,Scarborough,1,Grocery Store,Playground,Yoga Studio,Curling Ice,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
6,Scarborough,1,Discount Store,Convenience Store,Train Station,Coffee Shop,Department Store,Hobby Shop,Bus Station,Dessert Shop,Dim Sum Restaurant,Diner
8,Scarborough,1,Motel,American Restaurant,Yoga Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,Diner
9,Scarborough,1,College Stadium,General Entertainment,Skating Rink,Café,Yoga Studio,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run
10,Scarborough,1,Indian Restaurant,Pet Store,Latin American Restaurant,Vietnamese Restaurant,Chinese Restaurant,Diner,Department Store,Dessert Shop,Dim Sum Restaurant,Dog Run
11,Scarborough,1,Middle Eastern Restaurant,Breakfast Spot,Bakery,Sandwich Place,Auto Garage,Yoga Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore


#### Cluster 3

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,North York,2,Coffee Shop,Massage Studio,Bar,Miscellaneous Shop,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run


#### Cluster 4

In [36]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
97,Etobicoke,3,Pizza Place,Park,Mobile Phone Shop,Curling Ice,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,Diner


#### Cluster 5

In [37]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Scarborough,4,Bakery,Bus Line,Intersection,Metro Station,Ice Cream Shop,Soccer Field,Yoga Studio,Dessert Shop,Electronics Store,Eastern European Restaurant
21,North York,4,Electronics Store,Park,Bank,Yoga Studio,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
26,North York,4,Coffee Shop,Pharmacy,Frozen Yogurt Shop,Restaurant,Bank,Deli / Bodega,Sandwich Place,Bridal Shop,Sushi Restaurant,Fried Chicken Joint
32,North York,4,Coffee Shop,Portuguese Restaurant,Intersection,Hockey Arena,Curling Ice,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
40,East Toronto,4,Sandwich Place,Pet Store,Fish & Chips Shop,Pizza Place,Movie Theater,Pub,Burrito Place,Burger Joint,Brewery,Liquor Store
48,Downtown Toronto,4,Park,Playground,Trail,Yoga Studio,Curling Ice,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,Discount Store
52,Downtown Toronto,4,Café,Clothing Store,Plaza,Beer Bar,Japanese Restaurant,Diner,Sandwich Place,Burger Joint,Burrito Place,Ramen Restaurant
53,Downtown Toronto,4,Gastropub,Coffee Shop,Restaurant,Japanese Restaurant,Hotel,BBQ Joint,Park,Cosmetics Shop,Diner,Speakeasy
54,Downtown Toronto,4,Cocktail Bar,Seafood Restaurant,Bakery,Farmers Market,Tea Room,Burger Joint,Liquor Store,Bistro,Belgian Restaurant,Restaurant
56,Downtown Toronto,4,Steakhouse,Café,American Restaurant,Hotel,Opera House,Bar,Speakeasy,Seafood Restaurant,Plaza,Pizza Place
