## Libraries

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

import geocoder
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## Importing the Dataset

In [2]:
url = 'https://en.wikipedia.org/wiki/Category:Suburbs_of_Melbourne'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.findAll('div', {'class': "mw-category-group"})

In [3]:
suburbs = []
for tag in soup.find_all("li"):
    if(', Victoria' in tag.text):
        text = tag.text
        i = 0
        while(not text[i].isalpha()):
            i = i + 1        
        
        suburbs.append(tag.text[i:tag.text.index(", Victoria")+10])

len(suburbs)

212

In [7]:
# create a new DataFrame from the list
melb_sub = pd.DataFrame({"Suburbs": suburbs})

melb_sub.head()

Unnamed: 0,Suburbs
0,"Broadmeadows, Victoria"
1,"Dandenong, Victoria"
2,"East Melbourne, Victoria"
3,"Elsternwick, Victoria"
4,"Essendon, Victoria"


## Retrieving Geographical Coordinates

In [6]:
# code for getting the latitude and longitude
def get_lati_long(suburb):
    # initialize your variable to None
    lat_lng_coords = None
    
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Melbourne, Victoria'.format(suburb))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [9]:
suburb_names = melb_sub['Suburbs'].tolist()

In [11]:
lat_lang = [get_lati_long(suburb) for suburb in suburb_names]

In [12]:
df_coords = pd.DataFrame(lat_lang, columns=['Latitude', 'Longitude'])
melb_sub['Latitude'] = df_coords['Latitude']
melb_sub['Longitude'] = df_coords['Longitude']

In [13]:
melb_sub.head()

Unnamed: 0,Suburbs,Latitude,Longitude
0,"Broadmeadows, Victoria",-37.68604,144.9261
1,"Dandenong, Victoria",-37.959885,145.20885
2,"East Melbourne, Victoria",-37.810043,144.985531
3,"Elsternwick, Victoria",-37.887322,145.009896
4,"Essendon, Victoria",-37.75153,144.90951


In [14]:
melb_sub.to_csv('dataset/melb.csv',index=False)

## Getting Location Data of Melbourne

In [15]:
melb_data = pd.read_csv('dataset/melb.csv')
melb_data.head()

Unnamed: 0,Suburbs,Latitude,Longitude
0,"Broadmeadows, Victoria",-37.68604,144.9261
1,"Dandenong, Victoria",-37.959885,145.20885
2,"East Melbourne, Victoria",-37.810043,144.985531
3,"Elsternwick, Victoria",-37.887322,145.009896
4,"Essendon, Victoria",-37.75153,144.90951


In [16]:
address = 'Melbourne, Victoria, Australia'

geolocator = Nominatim(user_agent="melbourne_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Melbourne, Australia are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Melbourne, Australia are -37.8142176, 144.9631608.


In [18]:
neighborhood_latitude = melb_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = melb_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = melb_data.loc[0, 'Suburbs'] # neighborhood name

In [21]:
# create map of Manhattan using latitude and longitude values
map_melbourne = folium.Map(location=[latitude, longitude], zoom_start=9)

# add markers to map
for lat, lng, label in zip(melb_data['Latitude'], melb_data['Longitude'], melb_data['Suburbs']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_melbourne)  
    
map_melbourne

## Using Foursquare to get the Venues in various Suburbs in Melbourne

In [22]:
CLIENT_ID = 'Your-Client-Id' # your Foursquare ID
CLIENT_SECRET = 'Your-Secret-Key' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2H5JYVFDJK2UJZ42OQZ0HBSGQGF0IBBIAZSZGLEN2MXERZ1W
CLIENT_SECRET:BWSLIRMBGF35FIZAWIA1UXUT3PWX415CZCIKQQ30XEQEO2T0


In [None]:
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)
url

In [24]:
results = requests.get(url).json()

#### Get all the Nearby Venues in Melbourne

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Suburbs', 
                  'Suburb Latitude', 
                  'Suburb Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
# type your answer here
melb_venues = getNearbyVenues(names=melb_data['Suburbs'], latitudes=melb_data['Latitude'], longitudes=melb_data['Longitude'])

Broadmeadows, Victoria
Dandenong, Victoria
East Melbourne, Victoria
Elsternwick, Victoria
Essendon, Victoria
Fitzroy, Victoria
Flemington, Victoria
Frankston, Victoria
Glen Waverley, Victoria
Heidelberg, Victoria
Maribyrnong, Victoria
St Kilda East, Victoria
St Kilda, Victoria
Sunshine, Victoria
Werribee, Victoria
Williamstown, Victoria
Abbotsford, Victoria
Aberfeldie, Victoria
Aintree, Victoria
Airport West, Victoria
Albanvale, Victoria
Albert Park, Victoria
Albion, Victoria
Alphington, Victoria
Altona Meadows, Victoria
Altona North, Victoria
Altona, Victoria
Ardeer, Victoria
Armadale, Victoria
Ascot Vale, Victoria
Ashburton, Victoria
Ashwood, Victoria
Aspendale Gardens, Victoria
Aspendale, Victoria
Attwood, Victoria
Auburn, Victoria
Aurora, Victoria
Avondale Heights, Victoria
Balaclava, Victoria
Balwyn, Victoria
Bayswater North, Victoria
Bayswater, Victoria
Beaconsfield, Victoria
Beaumaris, Victoria
Belgrave Heights, Victoria
Belgrave South, Victoria
Belgrave, Victoria
Bellfield, Vic

## Analyse the Dataset

In [32]:
# one hot encoding
melb_onehot = pd.get_dummies(melb_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
melb_onehot['Suburbs'] = melb_venues['Suburbs'] 

# move neighborhood column to the first column
fixed_columns = [melb_onehot.columns[-1]] + list(melb_onehot.columns[:-1])
melb_onehot = melb_onehot[fixed_columns]

melb_onehot.head()

Unnamed: 0,Suburbs,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,...,Travel & Transport,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Broadmeadows, Victoria",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Broadmeadows, Victoria",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Broadmeadows, Victoria",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Broadmeadows, Victoria",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"East Melbourne, Victoria",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
get_dummies method of Pandas packagemelb_grouped = melb_onehot.groupby('Suburbs').mean().reset_index()
melb_grouped

Unnamed: 0,Suburbs,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,...,Travel & Transport,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Abbotsford, Victoria",0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.209302,0.0,0.000000,0.000000,0.0,0.0
1,"Aberfeldie, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
2,"Aintree, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.034483,0.000000,0.0,0.0
3,"Airport West, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.045455,0.000000,0.0,0.000000,0.000000,0.0,0.0
4,"Albanvale, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,"St Kilda East, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.015873,0.000000,0.000000,0.0,0.015873,0.015873,0.0,0.0
187,"St Kilda, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.015873,0.000000,0.000000,0.0,0.015873,0.015873,0.0,0.0
188,"Sunshine, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0
189,"Werribee, Victoria",0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0


In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [36]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Suburbs']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Suburbs'] = melb_grouped['Suburbs']

for ind in np.arange(melb_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(melb_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Suburbs,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Abbotsford, Victoria",Vietnamese Restaurant,Thai Restaurant,Café,Park,Gastropub,Korean Restaurant,Grocery Store,Hotel Bar,Rock Club,Fruit & Vegetable Store
1,"Aberfeldie, Victoria",Café,Tennis Court,Yoga Studio,Frame Store,Football Stadium,Food Truck,Food Service,Food Court,Flower Shop,Flea Market
2,"Aintree, Victoria",Café,Coffee Shop,Japanese Restaurant,Park,Hotel,Kebab Restaurant,Cosmetics Shop,Record Shop,Clothing Store,River
3,"Airport West, Victoria",Café,Fast Food Restaurant,Supermarket,Hotel Bar,Juice Bar,Paper / Office Supplies Store,Portuguese Restaurant,Coffee Shop,Donut Shop,Electronics Store
4,"Albanvale, Victoria",Furniture / Home Store,Laundry Service,Yoga Studio,Farmers Market,Frame Store,Football Stadium,Food Truck,Food Service,Food Court,Flower Shop


In [37]:
neighborhoods_venues_sorted.describe()

Unnamed: 0,Suburbs,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
count,191,191,191,191,191,191,191,191,191,191,191
unique,191,62,64,72,62,58,59,55,51,53,51
top,"Bentleigh East, Victoria",Café,Coffee Shop,Japanese Restaurant,Park,Hotel,Kebab Restaurant,Cosmetics Shop,Record Shop,Clothing Store,River
freq,1,84,44,40,44,39,39,40,39,39,39


In [39]:
res_melb = melb_grouped[["Suburbs", "Café"]]

In [40]:
res_melb

Unnamed: 0,Suburbs,Café
0,"Abbotsford, Victoria",0.093023
1,"Aberfeldie, Victoria",0.500000
2,"Aintree, Victoria",0.172414
3,"Airport West, Victoria",0.090909
4,"Albanvale, Victoria",0.000000
...,...,...
186,"St Kilda East, Victoria",0.047619
187,"St Kilda, Victoria",0.047619
188,"Sunshine, Victoria",0.000000
189,"Werribee, Victoria",0.000000


## Cluster Neighborhoods

In [42]:
# set number of clusters
kclusters = 3

melb_grouped_clustering = res_melb.drop('Suburbs', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(melb_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 2, 1, 0, 0, 1, 1, 2, 0, 0, 0, 1, 2, 1, 2, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 2, 1, 0, 2, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 1, 2, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 2, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 2, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 2, 2, 0, 1, 2, 0, 1, 1, 0, 2, 1, 0, 1,
       0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2], dtype=int32)

In [43]:
melb_data

Unnamed: 0,Suburbs,Latitude,Longitude
0,"Broadmeadows, Victoria",-37.686040,144.926100
1,"Dandenong, Victoria",-37.959885,145.208850
2,"East Melbourne, Victoria",-37.810043,144.985531
3,"Elsternwick, Victoria",-37.887322,145.009896
4,"Essendon, Victoria",-37.751530,144.909510
...,...,...,...
207,"Keilor Downs, Victoria",-37.716360,144.790810
208,"Keilor East, Victoria",-37.739800,144.863600
209,"Keilor Lodge, Victoria",-37.700970,144.798450
210,"Keilor North, Victoria",-37.668940,144.765690


In [47]:
# add clustering labels
res_melb.insert(0, 'Cluster Labels', kmeans.labels_)

In [48]:
melb_merged = melb_data[melb_data['Suburbs'].isin(res_melb.Suburbs)].reset_index()

In [49]:
# merge melb_grouped with melb_data to add latitude/longitude for each neighborhood
melb_merged = melb_merged.join(res_melb.set_index('Suburbs'), on='Suburbs')

melb_merged["Cluster Labels"].dropna(inplace = True)

melb_merged

Unnamed: 0,index,Suburbs,Latitude,Longitude,Cluster Labels,Café
0,0,"Broadmeadows, Victoria",-37.686040,144.926100,0,0.000000
1,2,"East Melbourne, Victoria",-37.810043,144.985531,1,0.172414
2,3,"Elsternwick, Victoria",-37.887322,145.009896,0,0.062500
3,4,"Essendon, Victoria",-37.751530,144.909510,0,0.000000
4,5,"Fitzroy, Victoria",-37.808233,144.977814,1,0.229508
...,...,...,...,...,...,...
195,207,"Keilor Downs, Victoria",-37.716360,144.790810,0,0.000000
196,208,"Keilor East, Victoria",-37.739800,144.863600,0,0.000000
197,209,"Keilor Lodge, Victoria",-37.700970,144.798450,1,0.111111
198,210,"Keilor North, Victoria",-37.668940,144.765690,0,0.000000


In [51]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=9)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(melb_merged['Latitude'], melb_merged['Longitude'], melb_merged['Suburbs'], melb_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<a id='item5'></a>


## Examine the Clusters

### Cluster 1

In [53]:
melb_merged.loc[melb_merged['Cluster Labels'] == 0, melb_merged.columns[[1] + list(range(5, melb_merged.shape[1]))]]

Unnamed: 0,Suburbs,Café
0,"Broadmeadows, Victoria",0.000000
2,"Elsternwick, Victoria",0.062500
3,"Essendon, Victoria",0.000000
6,"Frankston, Victoria",0.000000
7,"Glen Waverley, Victoria",0.052632
...,...,...
191,"Ivanhoe, Victoria",0.055556
193,"Junction Village, Victoria",0.000000
195,"Keilor Downs, Victoria",0.000000
196,"Keilor East, Victoria",0.000000


### Cluster 2


In [54]:
melb_merged.loc[melb_merged['Cluster Labels'] == 1, melb_merged.columns[[1] + list(range(5, melb_merged.shape[1]))]]

Unnamed: 0,Suburbs,Café
1,"East Melbourne, Victoria",0.172414
4,"Fitzroy, Victoria",0.229508
5,"Flemington, Victoria",0.100000
8,"Heidelberg, Victoria",0.111111
17,"Aintree, Victoria",0.172414
...,...,...
190,"Ivanhoe East, Victoria",0.166667
192,"Jacana, Victoria",0.172414
194,"Kealba, Victoria",0.172414
197,"Keilor Lodge, Victoria",0.111111


### Cluster 3


In [55]:
melb_merged.loc[melb_merged['Cluster Labels'] == 2, melb_merged.columns[[1] + list(range(5, melb_merged.shape[1]))]]

Unnamed: 0,Suburbs,Café
14,"Williamstown, Victoria",0.333333
16,"Aberfeldie, Victoria",0.5
22,"Alphington, Victoria",0.333333
27,"Armadale, Victoria",0.5
29,"Ashburton, Victoria",0.4
44,"Belgrave South, Victoria",0.333333
47,"Bentleigh East, Victoria",0.333333
62,"Braeside, Victoria",0.5
69,"Brooklyn, Victoria",0.5
89,"Caroline Springs, Victoria",0.333333
