### **Segmenting and Clustering Neighborhoods in Toronto**

***Before we get the data and start exploring it, let's download all the dependencies that we will need.***

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

!conda install -c conda-forge geocoder --yes
import geocoder

import requests # library to handle requests
import random # library for random number generation

#libraries for displaying images
from IPython.display import Image
from IPython.core.display import HTML

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.17.0                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geocoder                  1.38.1                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Folium installed
Libraries imported.


***Load data from CSV***

In [2]:
df_Geo=pd.read_csv('Toronto-2.csv')
print('Data downloaded!')

Data downloaded!


In [3]:
df_Geo.head()

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood
0,0,M1B,Scarborough,"Rouge, Malvern"
1,1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,3,M1G,Scarborough,Woburn
4,4,M1H,Scarborough,Cedarbrae


In [4]:
df_Geo = df_Geo.drop('Unnamed: 0', axis=1)
df_Geo.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


***Get the geographical coordinates of each postal code by calling geocoder.google function***

In [None]:
latitude = []
longitude = []

import time
start_time = time.time()

n=102
postal_code = df_Geo['PostalCode']

for j in range(n):
    print("row:",j)
    # initialize your variable to None
    lat_lng_coords = None
    
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code[j]))
        lat_lng_coords = g.latlng

    latitude.append(lat_lng_coords[0])
    longitude.append(lat_lng_coords[1])
    
df_Geo['Latitude'], df_Geo['Longitude'] = latitude, longitude

print(df_Geo.head())

print("--- %s seconds ---" % round((time.time() - start_time), 2))

row: 0


***Get the geographical coordinates of each postal code using csv file - http://cocl.us/Geospatial_data***

In [5]:
df_Lat_Long=pd.read_csv('http://cocl.us/Geospatial_data')
print('Data downloaded!')

Data downloaded!


In [6]:
df_Lat_Long.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
df_Lat_Long.rename(columns={'Postal Code':'PostalCode'}, inplace=True) # Rename column
df_Lat_Long.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
df_Lat_Long.shape

(103, 3)

In [9]:
# Merge two dataframes with common column
df_Geo=df_Lat_Long.merge(df_Geo, on='PostalCode')
df_Geo.head()

Unnamed: 0,PostalCode,Latitude,Longitude,Borough,Neighborhood
0,M1B,43.806686,-79.194353,Scarborough,"Rouge, Malvern"
1,M1C,43.784535,-79.160497,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,43.763573,-79.188711,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae


In [10]:
df_Geo = df_Geo[['PostalCode','Borough','Neighborhood','Latitude','Longitude']] #reorder columns
df_Geo.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [11]:
df_Geo.shape

(103, 5)

In [12]:
df_Geo.to_csv('Toronto_Geo.csv')

***The geograpical coordinate of Toronto are 43.653963, -79.387207***

In [13]:
import time
start_time = time.time()

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="CanadaApp")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

print("--- %s seconds ---" % round((time.time() - start_time), 2))

The geograpical coordinate of Toronto are 43.653963, -79.387207.
--- 0.17 seconds ---


***Create map of Toronto using latitude and longitude values***

In [14]:
import folium
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
    
# add markers to map
for lat, lng, borough, neighborhood in zip(df_Geo['Latitude'], df_Geo['Longitude'], df_Geo['Borough'], df_Geo['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)
       
map_toronto