# Creating Toronto map notebook

### Steps of execution :
1. Importing libraries for fetching table from url. 
2. Fetching table from url using BeautifulSoup4 library and then converting it into dataframe.
3. Reading the geospatial_data.csv and then merging it with the above dataframe created, forming a new dataframe called 'final_df'.
4. Generating latitude and longitude of toronto city and then mapping them on the folium map
5. Displaying the map with labels on it.

## Importing web scrapping libraries.

In [45]:
import bs4
import requests
import lxml.html as lh
import numpy as np
import pandas as pd

In [46]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [47]:
html_data = requests.get(url).text

In [48]:
html_data[:100]

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title'

In [49]:
soup = bs4.BeautifulSoup(html_data,'html.parser')

In [50]:
data_table = soup.find('table',{'class':'wikitable sortable'})

In [51]:
col=[]

In [52]:
for row in data_table.findAll('tr'):
    each_row = row.findAll('td')
    if(len(each_row)==3):
        col.append((each_row[0].text.strip(), each_row[1].text.strip(), each_row[2].text.strip()))                
    

In [53]:
col

[('M1A', 'Not assigned', 'Not assigned'),
 ('M2A', 'Not assigned', 'Not assigned'),
 ('M3A', 'North York', 'Parkwoods'),
 ('M4A', 'North York', 'Victoria Village'),
 ('M5A', 'Downtown Toronto', 'Harbourfront'),
 ('M5A', 'Downtown Toronto', 'Regent Park'),
 ('M6A', 'North York', 'Lawrence Heights'),
 ('M6A', 'North York', 'Lawrence Manor'),
 ('M7A', "Queen's Park", 'Not assigned'),
 ('M8A', 'Not assigned', 'Not assigned'),
 ('M9A', 'Etobicoke', 'Islington Avenue'),
 ('M1B', 'Scarborough', 'Rouge'),
 ('M1B', 'Scarborough', 'Malvern'),
 ('M2B', 'Not assigned', 'Not assigned'),
 ('M3B', 'North York', 'Don Mills North'),
 ('M4B', 'East York', 'Woodbine Gardens'),
 ('M4B', 'East York', 'Parkview Hill'),
 ('M5B', 'Downtown Toronto', 'Ryerson'),
 ('M5B', 'Downtown Toronto', 'Garden District'),
 ('M6B', 'North York', 'Glencairn'),
 ('M7B', 'Not assigned', 'Not assigned'),
 ('M8B', 'Not assigned', 'Not assigned'),
 ('M9B', 'Etobicoke', 'Cloverdale'),
 ('M9B', 'Etobicoke', 'Islington'),
 ('M9B', 

In [54]:
table_array = np.asarray(col)

In [55]:
df = pd.DataFrame(table_array)

In [56]:
df.head()

Unnamed: 0,0,1,2
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [57]:
column_names = ['PostalCode','Borough','Neighborhood']

In [58]:
df.columns = column_names

In [59]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [60]:
df.shape

(289, 3)

In [61]:
df = df[df.Borough != 'Not assigned']

In [62]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [63]:
df.shape

(212, 3)

In [64]:
df = df['Neighborhood'].groupby([df.PostalCode, df.Borough]).apply(list).reset_index()

In [65]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]


In [66]:
df.shape

(103, 3)

In [67]:
for x in range(103):
    if(df['Neighborhood'][x][0] == 'Not assigned'):
        df['Neighborhood'][x][0]=df['Borough'][x]    

In [68]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]
5,M1J,Scarborough,[Scarborough Village]
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]"
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]"
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]"
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]"


In [69]:
df.shape

(103, 3)

In [70]:
df_2 = pd.read_csv("cor.csv")

In [71]:
df_2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [72]:
df_2.columns = ['PostalCode','Latitude','Longitude']

In [73]:
df_2.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [74]:
final_df = pd.merge(df,df_2,on='PostalCode')

In [75]:
final_df.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711
3,M1G,Scarborough,[Woburn],43.770992,-79.216917
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476
5,M1J,Scarborough,[Scarborough Village],43.744734,-79.239476
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]",43.727929,-79.262029
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]",43.711112,-79.284577
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]",43.716316,-79.239476
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]",43.692657,-79.264848


In [76]:
final_df.shape

(103, 5)

In [77]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(final_df['Borough'].unique()),
        final_df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


## Importing golium and geoocoder.

In [78]:
import folium

In [79]:
from sklearn.cluster import KMeans

In [80]:
!conda install -c conda-forge geocoder --y

Collecting package metadata: done
Solving environment: done

# All requested packages already installed.



In [81]:
import geocoder

### making a new dataframe from out final_df so that it will only contain "Toronto" as borough

In [82]:
toronto_data = final_df[final_df['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,[The Beaches],43.676357,-79.293031
1,M4K,East Toronto,"[The Danforth West, Riverdale]",43.679557,-79.352188
2,M4L,East Toronto,"[The Beaches West, India Bazaar]",43.668999,-79.315572
3,M4M,East Toronto,[Studio District],43.659526,-79.340923
4,M4N,Central Toronto,[Lawrence Park],43.72802,-79.38879


In [93]:
latitude= 43.7001100
longitude= -79.4163000

In [94]:
Toronto_map = folium.Map(location=[latitude,longitude],zoom_start=12)

In [95]:
for lat,lang,label in zip(toronto_data['Latitude'],toronto_data['Longitude'],toronto_data['Neighborhood']):
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat,lang],
    radium=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#6b76ff',
    fill_opacity=0.7,
    parse_html=False).add_to(Toronto_map)

In [96]:
Toronto_map

## Now we to need use foursquare api as a reference to Neighborhood notebook and explore all the neighbors of Toronto

In [97]:
CLIENT_ID = 'ZK40VO3EPO1ZY5C04PNYSMQH2ZFZ3U1TPL0QME3WEY553DMH' # your Foursquare ID
CLIENT_SECRET = 'RQBRNE2T3OTMYY4BBJ3YWLS0P2FK413O22MQV1QHBHAD04WG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [101]:
Scarboroug_data = final_df[final_df['Borough'] == 'Scarborough'].reset_index(drop=True)
Scarboroug_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711
3,M1G,Scarborough,[Woburn],43.770992,-79.216917
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476


## creating a map of Scarborough and its neighbors

In [103]:
latitude_Scarborough = 43.7737
longitude_Scarborough = -79.2446

In [104]:
Scarborough_map = folium.Map(location=[latitude_Scarborough,longitude_Scarborough],zoom_start=12)

In [105]:
for lat,lang,label in zip(Scarboroug_data['Latitude'],Scarboroug_data['Longitude'],Scarboroug_data['Neighborhood']):
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat,lang],
    radium=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#6b76ff',
    fill_opacity=0.7,
    parse_html=False).add_to(Scarborough_map)

In [106]:
Scarborough_map

## Getting neighborhood lat and lng 

In [110]:
neighborhood_latitude = Scarboroug_data['Latitude'][0] # neighbourhood latitude value
neighborhood_longitude = Scarboroug_data['Longitude'][0] # neighbourhood longitude value

neighborhood_name = Scarboroug_data['Neighborhood'][0] # neighbourhood name

print('Latitude and longitude values of "{}" are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of "['Rouge', 'Malvern']" are 43.806686299999996, -79.19435340000001.


In [112]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_Scarborough, longitude_Scarborough, VERSION, radius, LIMIT)

In [113]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c508b514c1f6744ec826480'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Scarborough City Centre',
  'headerFullLocation': 'Scarborough City Centre, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 7,
  'suggestedBounds': {'ne': {'lat': 43.7782000045, 'lng': -79.23837961474646},
   'sw': {'lat': 43.769199995499996, 'lng': -79.25082038525355}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d6008f829ef236a8832a059',
       'name': 'CANBE Foods Inc',
       'location': {'address': '1760 Ellesmere Rd.',
        'lat': 43.77354633117736,
        'lng': -79.24608237285928,
        'labeledLatLngs': [{'label': 'display',

In [114]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [115]:

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']  
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,CANBE Foods Inc,Indian Restaurant,43.773546,-79.246082
1,Federick Restaurant,Hakka Restaurant,43.774697,-79.241142
2,Drupati's Roti & Doubles,Caribbean Restaurant,43.775222,-79.241678
3,Thai One On,Thai Restaurant,43.774468,-79.241268
4,B&A Bakery,Bakery,43.774391,-79.243877
5,TD Canada Trust,Bank,43.774952,-79.241343
6,Mitra Hot Yoga,Yoga Studio,43.776812,-79.247074


In [116]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

7 venues were returned by Foursquare.


## 2. Explore Neighborhoods in Scarborough

In [117]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            latitude_Scarborough, 
            longitude_Scarborough, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [118]:
Scarborough_venues = getNearbyVenues(names=Scarboroug_data['Neighborhood'],
                                   latitudes=Scarboroug_data['Latitude'],
                                   longitudes=Scarboroug_data['Longitude']
                                  )

['Rouge', 'Malvern']
['Highland Creek', 'Rouge Hill', 'Port Union']
['Guildwood', 'Morningside', 'West Hill']
['Woburn']
['Cedarbrae']
['Scarborough Village']
['East Birchmount Park', 'Ionview', 'Kennedy Park']
['Clairlea', 'Golden Mile', 'Oakridge']
['Cliffcrest', 'Cliffside', 'Scarborough Village West']
['Birch Cliff', 'Cliffside West']
['Dorset Park', 'Scarborough Town Centre', 'Wexford Heights']
['Maryvale', 'Wexford']
['Agincourt']
['Clarks Corners', 'Sullivan', "Tam O'Shanter"]
['Agincourt North', "L'Amoreaux East", 'Milliken', 'Steeles East']
["L'Amoreaux West", 'Steeles West']
['Upper Rouge']


In [119]:
print(Scarborough_venues.shape)
Scarborough_venues.head()

(119, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"[Rouge, Malvern]",43.806686,-79.194353,CANBE Foods Inc,43.773546,-79.246082,Indian Restaurant
1,"[Rouge, Malvern]",43.806686,-79.194353,Federick Restaurant,43.774697,-79.241142,Hakka Restaurant
2,"[Rouge, Malvern]",43.806686,-79.194353,Drupati's Roti & Doubles,43.775222,-79.241678,Caribbean Restaurant
3,"[Rouge, Malvern]",43.806686,-79.194353,Thai One On,43.774468,-79.241268,Thai Restaurant
4,"[Rouge, Malvern]",43.806686,-79.194353,B&A Bakery,43.774391,-79.243877,Bakery


In [120]:
Scarborough_venues.tail()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
114,[Upper Rouge],43.836125,-79.205636,Drupati's Roti & Doubles,43.775222,-79.241678,Caribbean Restaurant
115,[Upper Rouge],43.836125,-79.205636,Thai One On,43.774468,-79.241268,Thai Restaurant
116,[Upper Rouge],43.836125,-79.205636,B&A Bakery,43.774391,-79.243877,Bakery
117,[Upper Rouge],43.836125,-79.205636,TD Canada Trust,43.774952,-79.241343,Bank
118,[Upper Rouge],43.836125,-79.205636,Mitra Hot Yoga,43.776812,-79.247074,Yoga Studio


In [122]:
print('There are {} uniques categories.'.format(len(Scarborough_venues['Venue Category'].unique())))

There are 7 uniques categories.


## Analysing each neighborhood

In [123]:
# one hot encoding
Scarborough_onehot = pd.get_dummies(Scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Scarborough_onehot['Neighborhood'] = Scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Scarborough_onehot.columns[-1]] + list(Scarborough_onehot.columns[:-1])
Scarborough_onehot = Scarborough_onehot[fixed_columns]

Scarborough_onehot.head()

Unnamed: 0,Neighborhood,Bakery,Bank,Caribbean Restaurant,Hakka Restaurant,Indian Restaurant,Thai Restaurant,Yoga Studio
0,"[Rouge, Malvern]",0,0,0,0,1,0,0
1,"[Rouge, Malvern]",0,0,0,1,0,0,0
2,"[Rouge, Malvern]",0,0,1,0,0,0,0
3,"[Rouge, Malvern]",0,0,0,0,0,1,0
4,"[Rouge, Malvern]",1,0,0,0,0,0,0


In [124]:
Scarborough_onehot.shape

(119, 8)