<h1 align=center><b>Segmenting and Clustering Neighborhoods in Toronto</b></h1>

# 1 Create toronto_df from wikipedia page

In [1]:
import pandas as pd
import numpy as np

In [2]:
#conda install -c anaconda lxml
#import lxml

In [3]:
# Scraping data from wikipedia to create the dataFrame
toronto_df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
toronto_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
# removing rows with not assigned borough
toronto_df = toronto_df[toronto_df['Borough']!='Not assigned'].reset_index(drop=True)
toronto_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Is there any row without assigned Neighborhood or duplicate postal code? 

In [8]:
index = toronto_df[toronto_df['Neighborhood']=='Not assigned'].index
len(index)

0

Note : There is no row with not assigned Neighborhood. 
       There is not duplicate postal belonging to many Neighboorhood 

In [9]:
toronto_df.shape

(103, 3)

## 2 Enhance toronto dataframe with latitude and longitude

In [10]:
!wget -q -O 'Geospatial_data'  http://cocl.us/Geospatial_data

In [11]:
geo_df = pd.read_csv('Geospatial_data')
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
toronto_df = toronto_df.merge(geo_df, left_on='Postal Code', right_on= 'Postal Code')
toronto_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [13]:
toronto_df.shape

(103, 5)

#### 3 Explore and cluster the Neighboorhoods

In [15]:
import folium

In [19]:
# create map of New York using latitude and longitude values
latitude = 37.7953
longitude = -95.9368
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [25]:
#### Working with borough taht contain the word Toronto
filtered_toronto_df = toronto_df[toronto_df['Borough'].str.contains('Toronto')]
filtered_toronto_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [26]:
# create map of New York using latitude and longitude values
latitude = 37.7953
longitude = -95.9368
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(filtered_toronto_df['Latitude'], filtered_toronto_df['Longitude'], filtered_toronto_df['Borough'], filtered_toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [27]:
CLIENT_ID = 'OY5VNVM2BPEJPCWP1GOZGMJT0BOG4EFLYEXBHMS4EAYFHH5M' # your Foursquare ID
CLIENT_SECRET = 'XFF4WWBK3K4Y2P5UDCWN3PUZY2YQNAU5I1P2K0NBMFDLX4WT' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OY5VNVM2BPEJPCWP1GOZGMJT0BOG4EFLYEXBHMS4EAYFHH5M
CLIENT_SECRET:XFF4WWBK3K4Y2P5UDCWN3PUZY2YQNAU5I1P2K0NBMFDLX4WT


In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            100)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [32]:
# type your answer here

filtered_toronto_venues = getNearbyVenues(names=filtered_toronto_df['Neighborhood'],
                                   latitudes=filtered_toronto_df['Latitude'],
                                   longitudes=filtered_toronto_df['Longitude']
                                  )

filtered_toronto_venues = json_normalize(filtered_toronto_venues)

Regent Park, Harbourfront


NameError: name 'requests' is not defined