In [1]:
import numpy as np
import pandas as pd
import json
from geopy.geocoders import Nominatim
import requests 
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

### Using Dataset that created in last exercise


In [2]:
data=pd.read_csv("Canada_Postal_code.csv")

In [3]:
data.drop("Unnamed: 0",axis=1,inplace=True)

In [4]:
data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [5]:
address='Toronto, Ontario Canada'

geolocator=Nominatim(user_agent="ny_explorer")
location=geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude

In [6]:
latitude

43.6534817

In [7]:
map_toronto=folium.Map(location=[latitude,longitude],zoom_start=10)

for lat,lng,borough,neighborhood in zip(data['Latitude'],data['Longitude'],data['Borough'],data['Neighbourhood']):
    label='{}, {}'.format(neighborhood,borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False
        ).add_to(map_toronto)
    
map_toronto

### Now, I am going to randomly select one neighborhood of Toronto

In [8]:
ran=np.random.randint(0,len(data))
neighbour=data.loc[ran,'Neighbourhood']

In [9]:
data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [10]:
neighbour_latitude=data.loc[ran,'Latitude']
neighbour_longitude=data.loc[ran,'Longitude']

### Foursquare Credentials and Version

In [11]:
client_id='CDC0BC1OFLXFNDMO44XMF3GFZCCOZDUIUD5W0S2KYCZMHHKN'
client_secret='PTYI2XEMPZRKGIQPVHIWVOOGOJ4ZXBTAOEVS1YZ4L1GAK5P5'
version='20180605'
limit=100

In [12]:
radius=500
url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    client_id, 
    client_secret, 
    version, 
    neighbour_latitude, 
    neighbour_longitude, 
    radius, 
    limit)

In [13]:
results=requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fed6ac976cf540ef11d9ee1'},
 'response': {'headerLocation': 'Rexdale',
  'headerFullLocation': 'Rexdale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 9,
  'suggestedBounds': {'ne': {'lat': 43.743916404500006,
    'lng': -79.58222007762089},
   'sw': {'lat': 43.7349163955, 'lng': -79.59465372237912}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4c633939e1621b8d48842553',
       'name': 'Subway',
       'location': {'address': '6210 Finch Ave West, Store 103',
        'crossStreet': 'at Albion Rd.',
        'lat': 43.74264512142215,
        'lng': -79.58964323010724,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.74264512142215,
          'lng': -79.58964323010724}],
        'distan

In [14]:
def get_category_type(row):
    try:
        categories_list=row['categories']
    except:
        categories_list=row['venue.categories']
        
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
venues=results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name','venue.categories','venue.location.lat','venue.location.lng']
nearby_venues=nearby_venues.loc[:,filtered_columns]

nearby_venues['venue.categories']=nearby_venues.apply(get_category_type,axis=1)

nearby_venues.columns=[col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Subway,Sandwich Place,43.742645,-79.589643
1,Popeyes Louisiana Kitchen,Fried Chicken Joint,43.741202,-79.584545
2,Shoppers Drug Mart,Pharmacy,43.741685,-79.584487
3,The Beer Store,Beer Store,43.741694,-79.584373
4,Sheriff's No Frills,Grocery Store,43.741696,-79.584379


## Explore Neighbourhoods in Toronto

In [16]:
def getNearbyVenues(names,latitudes,longitudes,radius=500):
    venues_list=[]
    for name,lat,lng in zip(names,latitudes,longitudes):
        print(name)
        
        url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    client_id, 
    client_secret, 
    version, 
    neighbour_latitude, 
    neighbour_longitude, 
    radius, 
    limit)
            
        results=requests.get(url).json()['response']['groups'][0]['items']
        
        venues_list.append([
            (name,lat,lng,v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results
        ])
        
        nearby_venues=pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns= ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
        
        return nearby_venues

In [17]:
data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [20]:
toronto_venues=getNearbyVenues(names=data['Neighbourhood'],latitudes=data['Latitude'],longitudes=data['Longitude'])

Parkwoods


In [22]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Subway,43.742645,-79.589643,Sandwich Place
1,Parkwoods,43.753259,-79.329656,Popeyes Louisiana Kitchen,43.741202,-79.584545,Fried Chicken Joint
2,Parkwoods,43.753259,-79.329656,Shoppers Drug Mart,43.741685,-79.584487,Pharmacy
3,Parkwoods,43.753259,-79.329656,The Beer Store,43.741694,-79.584373,Beer Store
4,Parkwoods,43.753259,-79.329656,Sheriff's No Frills,43.741696,-79.584379,Grocery Store


### Analyse Each Neighbourhood

In [25]:
toronto_onehot=pd.get_dummies(toronto_venues[["Venue Category"]],prefix="",prefix_sep="")

toronto_onehot['Neighbourhood']=toronto_venues['Neighbourhood']

fixed_columns=[toronto_onehot.columns[-1]]+list(toronto_onehot.columns[:-1])
toronto_onehot=toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Beer Store,Fast Food Restaurant,Fried Chicken Joint,Grocery Store,Pharmacy,Pizza Place,Sandwich Place
0,Parkwoods,0,0,0,0,0,0,1
1,Parkwoods,0,0,1,0,0,0,0
2,Parkwoods,0,0,0,0,1,0,0
3,Parkwoods,1,0,0,0,0,0,0
4,Parkwoods,0,0,0,1,0,0,0


In [26]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Beer Store,Fast Food Restaurant,Fried Chicken Joint,Grocery Store,Pharmacy,Pizza Place,Sandwich Place
0,Parkwoods,0.111111,0.111111,0.111111,0.222222,0.111111,0.222222,0.111111


In [30]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Parkwoods----
                  venue  freq
0         Grocery Store  0.22
1           Pizza Place  0.22
2            Beer Store  0.11
3  Fast Food Restaurant  0.11
4   Fried Chicken Joint  0.11




In [31]:
def return_most_common_venues(row,num_top_venues):
    row_categories=row.iloc[1:]
    row_categories_sorted=row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Cluster Neighborhood

In [37]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=1, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0])

In [40]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_venues['Neighbourhood Latitude'], toronto_venues['Neighbourhood Longitude'], toronto_venues['Neighbourhood'], range(0,1)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters