In [117]:
import re
import requests

import pandas as pd
import numpy as np
from pandas.io.json import json_normalize

pd.set_option('display.max_columns', None)

import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import seaborn as sns
import folium

from geopy.geocoders import Nominatim
import geocoder

from sklearn.cluster import KMeans
%matplotlib inline

In [6]:
toronto_df = pd.read_csv(r'Data Files\Toronto Postal Codes.csv')
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


In [7]:
toronto_df = toronto_df.sort_values(by = ['PostalCode'], ascending = True).reset_index(drop = True)
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.806686,-79.194353
1,M1B,Scarborough,Malvern,43.806686,-79.194353
2,M1C,Scarborough,Port Union,43.784535,-79.160497
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
4,M1C,Scarborough,Highland Creek,43.784535,-79.160497


In [8]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent = 'my-application')
location = geolocator.geocode(address)
latitude_tor = location.latitude
longitude_tor = location.longitude

print("The Latitude of Toronto: {} and the Longitude of Toronto: {}".format(latitude_tor, longitude_tor))

The Latitude of Toronto: 43.653963 and the Longitude of Toronto: -79.387207


In [9]:
map_toronto = folium.Map(location = [latitude_tor, longitude_tor], zoom_start = 10)

for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
    [lat, lng], 
    radius = 5, 
    popup = label, 
    color = 'blue', 
    fill = True, 
    fill_color = '#3186cc', 
    fill_opacity = 0.7).add_to(map_toronto)
    
map_toronto

In [10]:
scar_df = toronto_df[toronto_df['Borough'] == 'Scarborough'].reset_index(drop = True)
scar_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.806686,-79.194353
1,M1B,Scarborough,Malvern,43.806686,-79.194353
2,M1C,Scarborough,Port Union,43.784535,-79.160497
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497
4,M1C,Scarborough,Highland Creek,43.784535,-79.160497


In [11]:
address = 'Scarborough, ON'

geolocator = Nominatim(user_agent = 'my-application')
location = geolocator.geocode(address)
latitude_scar = location.latitude
longitude_scar = location.longitude

print('The Latitude of Scarborough: {} and the longitude is: {}'.format(latitude_scar, longitude_scar))

The Latitude of Scarborough: 43.773077 and the longitude is: -79.257774


In [12]:
map_scar = folium.Map(location = [latitude_scar, longitude_scar], zoom_start = 10)

for lat, lng, neighborhood, borough in zip(scar_df['Latitude'], scar_df['Longitude'], scar_df['Neighborhood'], scar_df['Borough']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
    [lat, lng], 
    radius = 5, 
    popup = label, 
    color = 'blue', 
    fill = True, 
    fill_color = '#3186cc', 
    fill_opacity = 0.7).add_to(map_scar)
    
map_scar

In [13]:
neighborhood_latitude = scar_df.loc[0, 'Latitude'] # get latitude of first entry
neighborhood_longitude = scar_df.loc[0, 'Longitude'] #get longitude of first entry
neighborhood_name = scar_df.loc[0, 'Neighborhood']

print("Latitude: {}, \nLongitude: {}, \nNeighborhood Name: {}".format(neighborhood_latitude, 
                       neighborhood_longitude, 
                       neighborhood_name))

Latitude: 43.8066863, 
Longitude: -79.19435340000003, 
Neighborhood Name: Rouge


In [15]:
CLIENT_ID = 'BL4OMDGY22KNROQIDK1SH2B0HKWTRZRDTXUTZVQBOJ1YZXLP' # Change when replicating
CLIENT_SECRET = 'WUYFQMZ2L1CN512RXLUIZCWDBDPNBLR5EAMAWDYDJIZ3QEEY' # Change when replicating
VERSION = '20181204'

print("My Credentials: ")
print("CLIENT_ID: " + CLIENT_ID)
print("CLIENT_SECRET: " + CLIENT_SECRET)

My Credentials: 
CLIENT_ID: BL4OMDGY22KNROQIDK1SH2B0HKWTRZRDTXUTZVQBOJ1YZXLP
CLIENT_SECRET: WUYFQMZ2L1CN512RXLUIZCWDBDPNBLR5EAMAWDYDJIZ3QEEY


In [16]:
LIMIT = 100
# Radius is made 1000 because, with radius as 500, the results generated are not much.
radius = 1000

# using the url convention, create the url to get the requests
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius,
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=BL4OMDGY22KNROQIDK1SH2B0HKWTRZRDTXUTZVQBOJ1YZXLP&client_secret=WUYFQMZ2L1CN512RXLUIZCWDBDPNBLR5EAMAWDYDJIZ3QEEY&v=20181204&ll=43.8066863,-79.19435340000003&radius=1000&limit=100'

In [17]:
results = requests.get(url).json() # Get the data in JSON format
results

{'meta': {'code': 200, 'requestId': '5c1a42e01ed2194bd37fce6a'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 19,
  'suggestedBounds': {'ne': {'lat': 43.815686309000014,
    'lng': -79.18190576146083},
   'sw': {'lat': 43.79768629099999, 'lng': -79.20680103853923}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d669cba83865481c948fa53',
       'name': 'Images Salon & Spa',
       'location': {'address': '8130 Sheppard Ave E',
        'crossStreet': 'Morningside Ave',
        'lat': 43.80228301948931,
        'lng': -79.19856472801668,
        'labeledLatLngs'

In [18]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [19]:
venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues) # Flatten the JSON to tabular data

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis = 1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Caribbean Wave,Caribbean Restaurant,43.798558,-79.195777
2,Wendy's,Fast Food Restaurant,43.802008,-79.19808
3,Harvey's,Fast Food Restaurant,43.800106,-79.198258
4,Wendy's,Fast Food Restaurant,43.807448,-79.199056


In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
scar_venues = getNearbyVenues(names=scar_df['Neighborhood'],
                                   latitudes=scar_df['Latitude'],
                                   longitudes=scar_df['Longitude']
                                  )

Rouge
Malvern
Port Union
Rouge Hill
Highland Creek
Guildwood
Morningside
West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park
Ionview
Kennedy Park
Golden Mile
Oakridge
Clairlea
Cliffcrest
Scarborough Village West
Cliffside
Cliffside West
Birch Cliff
Wexford Heights
Dorset Park
Scarborough Town Centre
Maryvale
Wexford
Agincourt
Sullivan
Clarks Corners
Tam O'Shanter
Milliken
Agincourt North
L'Amoreaux East
Steeles East
Steeles West
L'Amoreaux West
Upper Rouge


In [22]:
print(scar_venues.shape)
scar_venues.head()

(206, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rouge,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,Malvern,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,Port Union,43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
3,Port Union,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,Rouge Hill,43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course


In [23]:
scar_venues.loc[-1] = ['Upper Rouge', 43.83455, -79.2069, "Al's Wine", 43.83455, -79.2069, "Bar"]

In [24]:
scar_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rouge,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,Malvern,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,Port Union,43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
3,Port Union,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,Rouge Hill,43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course


In [25]:
scar_onehot = pd.get_dummies(scar_venues[['Venue Category']], prefix = '', prefix_sep = "")

scar_onehot['Neighborhood'] = scar_venues['Neighborhood']

fixed_columns = [scar_onehot.columns[-1]] + list(scar_onehot.columns[:-1])
scar_onehot = scar_onehot[fixed_columns]

scar_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Golf Course,Grocery Store,Gym Pool,Hakka Restaurant,Hobby Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motel,Movie Theater,Nail Salon,Noodle House,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Thrift / Vintage Store,Train Station,Vietnamese Restaurant
0,Rouge,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Malvern,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Port Union,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Port Union,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Rouge Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [94]:
scar_grouped = scar_onehot.groupby('Neighborhood').mean().reset_index()
scar_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Golf Course,Grocery Store,Gym Pool,Hakka Restaurant,Hobby Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motel,Movie Theater,Nail Salon,Noodle House,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Thrift / Vintage Store,Train Station,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Birch Cliff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0
4,Clairlea,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0


In [95]:
scar_grouped_neighbor = scar_grouped['Neighborhood']

In [96]:
scar_grouped = scar_grouped.loc[:, scar_grouped.columns.str.endswith('Restaurant')]
scar_grouped.head()

Unnamed: 0,American Restaurant,Caribbean Restaurant,Chinese Restaurant,Fast Food Restaurant,Hakka Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Thai Restaurant,Vietnamese Restaurant
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0
4,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [97]:
scar_grouped['Neighborhood'] = scar_grouped_neighbor
scar_grouped.head()

Unnamed: 0,American Restaurant,Caribbean Restaurant,Chinese Restaurant,Fast Food Restaurant,Hakka Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Thai Restaurant,Vietnamese Restaurant,Neighborhood
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Agincourt
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Agincourt North
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Birch Cliff
3,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,Cedarbrae
4,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Clairlea


In [104]:
cols = list(scar_grouped)
cols.insert(0, cols.pop(cols.index('Neighborhood')))
scar_grouped = scar_grouped.loc[:, cols]
scar_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Caribbean Restaurant,Chinese Restaurant,Fast Food Restaurant,Hakka Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Thai Restaurant,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Birch Cliff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0
4,Clairlea,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [106]:
num_top_venues = 5

for hood in scar_grouped['Neighborhood']:
    print('------- ' + hood + ' -------')
    temp = scar_grouped[scar_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['Venue', 'Frequency']
    temp = temp.iloc[1:]
    temp['Frequency'] = temp['Frequency'].astype(float)
    temp = temp.round({'Frequency': 2})
    print(temp.sort_values('Frequency', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

------- Agincourt -------
                  Venue  Frequency
0   American Restaurant        0.0
1  Caribbean Restaurant        0.0
2    Chinese Restaurant        0.0
3  Fast Food Restaurant        0.0
4      Hakka Restaurant        0.0


------- Agincourt North -------
                  Venue  Frequency
0   American Restaurant        0.0
1  Caribbean Restaurant        0.0
2    Chinese Restaurant        0.0
3  Fast Food Restaurant        0.0
4      Hakka Restaurant        0.0


------- Birch Cliff -------
                  Venue  Frequency
0   American Restaurant        0.0
1  Caribbean Restaurant        0.0
2    Chinese Restaurant        0.0
3  Fast Food Restaurant        0.0
4      Hakka Restaurant        0.0


------- Cedarbrae -------
                  Venue  Frequency
0  Caribbean Restaurant       0.14
1      Hakka Restaurant       0.14
2       Thai Restaurant       0.14
3   American Restaurant       0.00
4    Chinese Restaurant       0.00


------- Clairlea -------
               

                       Venue  Frequency
0  Middle Eastern Restaurant       0.12
1        American Restaurant       0.00
2       Caribbean Restaurant       0.00
3         Chinese Restaurant       0.00
4       Fast Food Restaurant       0.00


------- Wexford Heights -------
                       Venue  Frequency
0          Indian Restaurant       0.29
1         Chinese Restaurant       0.14
2  Latin American Restaurant       0.14
3      Vietnamese Restaurant       0.14
4        American Restaurant       0.00


------- Woburn -------
                  Venue  Frequency
0     Korean Restaurant       0.25
1   American Restaurant       0.00
2  Caribbean Restaurant       0.00
3    Chinese Restaurant       0.00
4  Fast Food Restaurant       0.00




In [107]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [109]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scar_grouped['Neighborhood']

for ind in np.arange(scar_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scar_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Hakka Restaurant
1,Agincourt North,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Hakka Restaurant
2,Birch Cliff,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Hakka Restaurant
3,Cedarbrae,Thai Restaurant,Hakka Restaurant,Caribbean Restaurant,Vietnamese Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant
4,Clairlea,Fast Food Restaurant,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant
5,Clarks Corners,Thai Restaurant,Italian Restaurant,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant
6,Cliffcrest,American Restaurant,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant
7,Cliffside,American Restaurant,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant
8,Cliffside West,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Hakka Restaurant
9,Dorset Park,Indian Restaurant,Vietnamese Restaurant,Latin American Restaurant,Chinese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant


In [114]:
kclusters = 5

scar_grouped_clustering = scar_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scar_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 0, 0, 0, 0, 4, 3, 3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 4, 1, 0, 0, 0,
       0, 0, 1, 0, 2, 0, 3, 0, 4, 4, 4, 0, 0, 0, 2, 0])

In [115]:
scar_merged = scar_df

scar_merged['Cluster Labels'] = kmeans.labels_

scar_merged = scar_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scar_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,Rouge,43.806686,-79.194353,0,Fast Food Restaurant,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant
1,M1B,Scarborough,Malvern,43.806686,-79.194353,0,Fast Food Restaurant,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant
2,M1C,Scarborough,Port Union,43.784535,-79.160497,0,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Hakka Restaurant
3,M1C,Scarborough,Rouge Hill,43.784535,-79.160497,0,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Hakka Restaurant
4,M1C,Scarborough,Highland Creek,43.784535,-79.160497,0,Vietnamese Restaurant,Thai Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Indian Restaurant,Hakka Restaurant


In [118]:
map_clusters = folium.Map(location = [latitude_scar, longitude_scar], zoom_start = 11)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(scar_merged['Latitude'], scar_merged['Longitude'], scar_merged['Neighborhood'], scar_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
map_clusters

In [10]:
paris_df = pd.read_csv(r'Data Files\Paris Postal Codes.csv')
paris_df.head()

Unnamed: 0,Postal Code,Borough,Latitude,Longitude
0,75001,Louvre,48.8592,2.3417
1,75002,Bourse,48.8655,2.3426
2,75003,Temple,48.8637,2.3615
3,75004,Hôtel-de-Ville,48.8601,2.3507
4,75005,Panthéon,48.8448,2.3471


In [11]:
address = 'Paris, FR'

geolocator = Nominatim(user_agent = 'my-application')
location = geolocator.geocode(address)
latitude_fr = location.latitude
longitude_fr = location.longitude

print("The Latitude of Paris: {} and the Longitude of Paris: {}".format(latitude_fr, longitude_fr))

The Latitude of Paris: 48.8566101 and the Longitude of Paris: 2.3514992


In [12]:
map_paris = folium.Map(location = [latitude_fr, longitude_fr], zoom_start = 12)
for lat, lng, borough in zip(paris_df['Latitude'], paris_df['Longitude'], paris_df['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
    [lat, lng], 
    radius = 5, 
    popup = label, 
    color = 'blue', 
    fill = True, 
    fill_color = '#3186cc', 
    fill_opacity = 0.7).add_to(map_paris)
    
map_paris