# Part 3 : Final Full Notebook
#### Segmentation and Clustering Toronto Data


In [2]:
#importing Libraries
import requests
import lxml.html as lh
import bs4 as bs
import urllib.request
import numpy as np 
import pandas as pd

In [3]:
#Getting the data from url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(url)
soup = bs.BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))
data = pd.read_json(df[0].to_json(orient='records'))

In [4]:
#Choosing only data where field Borough doesn't have not assigned value
raw_data_selected = data[data['Borough'] != 'Not assigned']

In [5]:
#Grouping Data
raw_data_selected = raw_data_selected.groupby(['Borough', 'Postal Code'], as_index=False).agg(','.join)

In [6]:
#Replacing values in Neighbourhood field with Borough where Neighbourhood is not assigned
raw_data_selected['Neighborhood'] = np.where(raw_data_selected['Neighborhood'] == 'Not assigned', raw_data_selected['Borough'], raw_data_selected['Neighborhood'])

In [7]:
#Shape of Data
raw_data_selected.shape

(103, 3)

### Creating DataFrame


In [8]:
geospatial_url = "https://cocl.us/Geospatial_data"
geospatial_data = pd.read_csv(geospatial_url)

In [9]:
#Merging dataframes
merged_data = pd.merge(raw_data_selected, geospatial_data, on='Postal Code')
merged_data.head()

Unnamed: 0,Borough,Postal Code,Neighborhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.38879
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.38316


### Analysis

In [12]:
!pip -q install folium
import folium

In [13]:
latitude = 43.6532
longitude = -79.3832
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
map_toronto

In [14]:

# add markers to map
for lat, lng, borough, neighborhood in zip(merged_data['Latitude'], merged_data['Longitude'], merged_data['Borough'], merged_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [15]:
merged_data['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Name: Borough, dtype: int64

### Exploring Toronto Boroughs

In [16]:
toronto = merged_data.loc[merged_data['Borough'].str.contains('Toronto')].reset_index()

In [17]:
toronto.head()

Unnamed: 0,index,Borough,Postal Code,Neighborhood,Latitude,Longitude
0,0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879
1,1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,3,Central Toronto,M4S,Davisville,43.704324,-79.38879
4,4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.38316


In [18]:
latitude = 43.6543 
longitude = -79.3860
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Exploring Neighbourhood near 'Richmond, Adelaide, King'

In [19]:
toronto.loc[17, 'Neighborhood']

'Richmond, Adelaide, King'

In [20]:
neighborhood_latitude = toronto.loc[17, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto.loc[17, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto.loc[17, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Richmond, Adelaide, King are 43.65057120000001, -79.3845675.


### Using FourSquare

In [21]:
CLIENT_ID='BR4AVAJNS2XD3EM4LS4ZUQEFOMBN5GSF04TOOIDZUBQT0DGP'
CLIENT_SECRET = '3XEZF24QGP3BO0AEF2KA4IOFOFA1LRZOSVYSX44HGE4WDANK'
radius=100
LIMIT=10
VERSION='20190915'
import requests
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [22]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ef18fb2b9a389001bece03a'},
 'response': {'headerLocation': 'Financial District',
  'headerFullLocation': 'Financial District, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 11,
  'suggestedBounds': {'ne': {'lat': 43.65147120090001,
    'lng': -79.38332597628721},
   'sw': {'lat': 43.649671199100005, 'lng': -79.3858090237128}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad4c062f964a520e5f720e3',
       'name': 'Four Seasons Centre for the Performing Arts',
       'location': {'address': '145 Queen St. W',
        'crossStreet': 'at University Ave.',
        'lat': 43.650592,
        'lng': -79.385806,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.650592,
          'lng': -79.3858

In [23]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [24]:
from pandas.io.json import json_normalize 
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Four Seasons Centre for the Performing Arts,Concert Hall,43.650592,-79.385806
1,Rosalinda,Vegetarian / Vegan Restaurant,43.650252,-79.385156
2,The Keg Steakhouse + Bar - York Street,Restaurant,43.649987,-79.384103
3,Bulldog On The Block,Coffee Shop,43.650652,-79.384141
4,DAVIDsTEA,Tea Room,43.650547,-79.383385


In [25]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

10 venues were returned by Foursquare.
