<a href="https://colab.research.google.com/github/MaguireMaName/Coursera_Capstone/blob/master/The_Battle_of_Neighborhoods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Capstone: Battle of the Neighbourhoods

## Methodology

### 1.  Install/import dependencies

In [0]:
#!pip install geocoder

In [0]:
# load dependancies
import pandas as pd 
import numpy as np
from numpy import *
import geocoder
import folium
from folium import plugins
from folium.plugins import HeatMap
from geopy.geocoders import Nominatim
import requests
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

### 2.  Import the neighbourhood dataset and inspect its dimensions

In [421]:
cbr = pd.read_csv('Canberra suburbs.csv') # read canberra neighbourhoods
print(cbr.shape) # check dimensions
cbr.head() # inspect data

(124, 4)


Unnamed: 0,Neighborhood,Postcode,Country,Region
0,ACTON,2601,Australia,Australian Capital Territory
1,AINSLIE,2602,Australia,Australian Capital Territory
2,AMAROO,2914,Australia,Australian Capital Territory
3,ARANDA,2614,Australia,Australian Capital Territory
4,BANKS,2906,Australia,Australian Capital Territory


### 3.  Geocode the neighbourhood dataset

In [422]:
Lat_list=[]
Lng_list=[]

for i in range(cbr.shape[0]):
    address='{}, Canberra, Australia'.format(cbr.at[i,'Neighborhood']) # geocoding each canberra neighbourhood
    g = geocoder.arcgis(address)
    Lat_list.append(g.latlng[0])
    Lng_list.append(g.latlng[1])
    
for i in range(cbr.shape[0]):  
  
    cbr['Latitude'] = Lat_list   # add the lat to Canberra neighbourhood dataset 
    cbr['Longitude'] = Lng_list  # add the lng to Canberra neighbourhood dataset 
    
print(cbr.shape) # check dimensions
cbr.head() # inspect data

(124, 6)


Unnamed: 0,Neighborhood,Postcode,Country,Region,Latitude,Longitude
0,ACTON,2601,Australia,Australian Capital Territory,-35.28562,149.11827
1,AINSLIE,2602,Australia,Australian Capital Territory,-35.26222,149.14655
2,AMAROO,2914,Australia,Australian Capital Territory,-35.16922,149.12637
3,ARANDA,2614,Australia,Australian Capital Territory,-35.25804,149.08293
4,BANKS,2906,Australia,Australian Capital Territory,-35.47004,149.09771


### 4.  Visualise the point data of neighbourhoods

In [423]:
address = 'Canberra, Australian Capital Territory'

geolocator = Nominatim(user_agent="canberra_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_cbr = folium.Map(location=[latitude, longitude], zoom_start=11, tiles='Stamen Toner') # create a base map of Canberra using latitude and longitude values

# add markers to map
for lat, lng, label in zip(cbr['Latitude'], cbr['Longitude'], cbr['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7.5,
        tooltip=label,
        popup=label,
        color='pink',
        fill=True,
        fill_color='blue',
        fill_opacity=0.8,
        parse_html=False).add_to(map_cbr)  
    
map_cbr

### 4. Lookup venue categories and their frequencies in neighbourhoods

In [0]:
# initialise Foursquare credentials, version no., and limt

client_id = 'KL5SVGOS40RKZBQK4G1VXYBKBICWCDQL2NMCASHFYER432SS' 
client_secret = '1A5KPYJQIATH0SDZXPPZ5YK0SHLBYVEGPER5AAIIMDXLZ0AB'
version = '20180604'
limit = 100

In [0]:
# create a function to lookup venues and iterate across in Canberra neighborhoods dataset

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id, 
            client_secret, 
            version, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # create the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [426]:
# run the above function on each neighborhood and create a new dataframe called Canberra venues

cbr_venues = getNearbyVenues(names=cbr['Neighborhood'],
                                   latitudes=cbr['Latitude'],
                                   longitudes=cbr['Longitude']
                                  )

ACTON
AINSLIE
AMAROO
ARANDA
BANKS
BARTON
BEARD
BELCONNEN
BLACK MOUNTAIN
BONNER
BONYTHON
BRADDON
BRUCE
CALWELL
CAMPBELL
CAPITAL HILL
CASEY
CHAPMAN
CHARNWOOD
CHIFLEY
CHISHOLM
CITY
CONDER
COOK
COOMBS
CRACE
CURTIN
DEAKIN
DENMAN PROSPECT
DICKSON
DOWNER
DUFFY
DUNLOP
DUNTROON
EVATT
FADDEN
FARRER
FISHER
FLOREY
FLYNN
FORDE
FORREST
FRANKLIN
FRASER
FYSHWICK
GARRAN
GILMORE
GIRALANG
GORDON
GOWRIE
GREENWAY
GRIFFITH
GUNGAHLIN
HACKETT
HALL
HARMAN
HARRISON
HAWKER
HIGGINS
HOLDER
HOLT
HUGHES
HUME
ISAACS
ISABELLA PLAINS
JACKA
KALEEN
KAMBAH
KENNY
KINGSTON
KINLYSIDE
LATHAM
LAWSON
LYNEHAM
LYONS
MACARTHUR
MACGREGOR
MACQUARIE
MAJURA
MAWSON
MCKELLAR
MELBA
MITCHELL
MOLONGLO
MONASH
MONCRIEFF
NARRABUNDAH
NGUNNAWAL
NICHOLLS
OAKS ESTATE
O'CONNOR
O'MALLEY
OXLEY
PAGE
PALMERSTON
PARKES
PEARCE
PHILLIP
PIALLIGO
RED HILL
REID
RICHARDSON
RIVETT
RUSSELL
SCULLIN
SPENCE
STIRLING
STROMLO
SYMONSTON
TAYLOR
THARWA
THEODORE
THROSBY
TORRENS
TURNER
URIARRA
WANNIASSA
WARAMANGA
WATSON
WEETANGERA
WESTON
WILLIAMSDALE
WRIGHT
YARRALUMLA


In [427]:
print(cbr_venues.shape) # check dimensions
cbr_venues.head() # inspect data

(663, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,ACTON,-35.28562,149.11827,Monster Kitchen and Bar,-35.285122,149.122547,Hotel Bar
1,ACTON,-35.28562,149.11827,Palace Electric Cinema,-35.285014,149.123135,Movie Theater
2,ACTON,-35.28562,149.11827,National Film & Sound Archive,-35.283131,149.121143,Museum
3,ACTON,-35.28562,149.11827,University House Hotel,-35.283743,149.118193,Hotel
4,ACTON,-35.28562,149.11827,The Fellows Bar & Cafe,-35.283781,149.117636,Bar


###5. Aggregate the venues across venue category and neighborhoods


In [428]:

cbr_venues.groupby('Neighborhood').count() # aggregate the number of venues returned for each neighborhood

print('There are {} unique venue categories.'.format(len(cbr_venues['Venue Category'].unique())))
print(cbr_venues.head())

There are 163 unique venue categories.
  Neighborhood  Neighborhood Latitude  ...  Venue Longitude Venue Category
0        ACTON              -35.28562  ...       149.122547      Hotel Bar
1        ACTON              -35.28562  ...       149.123135  Movie Theater
2        ACTON              -35.28562  ...       149.121143         Museum
3        ACTON              -35.28562  ...       149.118193          Hotel
4        ACTON              -35.28562  ...       149.117636            Bar

[5 rows x 7 columns]


In [429]:
# analyse each neighbourhood
cbr_onehot = pd.get_dummies(cbr_venues[['Venue Category']], prefix="", prefix_sep="") # one hot encoding

cbr_onehot['Neighborhood'] = cbr_venues['Neighborhood'] # add neighborhood column back to dataframe

fixed_columns = [cbr_onehot.columns[-1]] + list(cbr_onehot.columns[:-1]) # move neighborhood column to the first column 
cbr_onehot = cbr_onehot[fixed_columns]

cbr_onehot.head()

Unnamed: 0,Neighborhood,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Bakery,Bar,Baseball Field,Beer Bar,Bike Trail,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burmese Restaurant,Burrito Place,Bus Station,Bus Stop,Business Service,Café,Campaign Office,Cantonese Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comic Shop,Construction & Landscaping,Convenience Store,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run,...,Record Shop,Recreation Center,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Soccer Field,Social Club,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Tiki Bar,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,ACTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,ACTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,ACTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,ACTON,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,ACTON,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [430]:
# aggregate
cbr_grouped = cbr_onehot.groupby('Neighborhood').mean().reset_index()
cbr_grouped.head()

Unnamed: 0,Neighborhood,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Bakery,Bar,Baseball Field,Beer Bar,Bike Trail,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burmese Restaurant,Burrito Place,Bus Station,Bus Stop,Business Service,Café,Campaign Office,Cantonese Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comic Shop,Construction & Landscaping,Convenience Store,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run,...,Record Shop,Recreation Center,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Soccer Field,Social Club,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Thrift / Vintage Store,Tiki Bar,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,ACTON,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,AINSLIE,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,AMAROO,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,ARANDA,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,...,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,BANKS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [431]:
# top 5 frequencies

num_top_venues = 5

for hood in cbr_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = cbr_grouped[cbr_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----ACTON----
                 venue  freq
0        Movie Theater  0.22
1                 Café  0.22
2  Indie Movie Theater  0.11
3                  Bar  0.11
4            Hotel Bar  0.11


----AINSLIE----
                   venue  freq
0         Shopping Plaza  0.14
1                    Pub  0.14
2  Australian Restaurant  0.14
3                 Bakery  0.14
4      Fish & Chips Shop  0.14


----AMAROO----
                 venue  freq
0   Athletics & Sports   0.5
1             Pharmacy   0.5
2  Arts & Crafts Store   0.0
3    Other Repair Shop   0.0
4               Museum   0.0


----ARANDA----
               venue  freq
0               Café  0.33
1  Recreation Center  0.17
2                Bar  0.17
3       Dance Studio  0.17
4          Gift Shop  0.17


----BANKS----
                 venue  freq
0        Grocery Store   1.0
1  Arts & Crafts Store   0.0
2    Other Repair Shop   0.0
3               Museum   0.0
4          Music Venue   0.0


----BARTON----
                 venue  freq
0 

In [0]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [433]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = cbr_grouped['Neighborhood']

for ind in np.arange(cbr_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(cbr_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,ACTON,Café,Movie Theater,Bar,Museum,Hotel Bar
1,AINSLIE,Café,Pub,Australian Restaurant,Bakery,Shopping Plaza
2,AMAROO,Athletics & Sports,Pharmacy,Yoga Studio,Food & Drink Shop,Gaming Cafe
3,ARANDA,Café,Gift Shop,Recreation Center,Bar,Dance Studio
4,BANKS,Grocery Store,Yoga Studio,Garden,Furniture / Home Store,Fruit & Vegetable Store


###6. Cluster the neighbourhoods by similarity across frequencies and venue category

In [434]:
# set number of clusters
kclusters = 5

cbr_grouped_clustering = cbr_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cbr_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:5]

# add clustering labels
neighborhoods_venues_sorted.insert(1, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted.tail()

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
104,WANNIASSA,2,Ice Cream Shop,Theater,Sandwich Place,Pool,Hostel
105,WARAMANGA,4,Soccer Field,Grocery Store,Yoga Studio,Food & Drink Shop,Furniture / Home Store
106,WATSON,1,Café,Grocery Store,Shopping Plaza,Filipino Restaurant,Fish & Chips Shop
107,WESTON,1,Café,Fast Food Restaurant,Sandwich Place,Cricket Ground,Farmers Market
108,WRIGHT,2,River,Yoga Studio,Flower Shop,Furniture / Home Store,Fruit & Vegetable Store


In [435]:
# merge dataset and check output
cbr_merged = pd.merge(cbr, neighborhoods_venues_sorted, on='Neighborhood')
cbr_merged.tail()

Unnamed: 0,Neighborhood,Postcode,Country,Region,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
104,WANNIASSA,2903,Australia,Australian Capital Territory,-35.40288,149.09194,2,Ice Cream Shop,Theater,Sandwich Place,Pool,Hostel
105,WARAMANGA,2611,Australia,Australian Capital Territory,-35.35331,149.06015,4,Soccer Field,Grocery Store,Yoga Studio,Food & Drink Shop,Furniture / Home Store
106,WATSON,2602,Australia,Australian Capital Territory,-35.24132,149.15731,1,Café,Grocery Store,Shopping Plaza,Filipino Restaurant,Fish & Chips Shop
107,WESTON,2611,Australia,Australian Capital Territory,-35.3366,149.05535,1,Café,Fast Food Restaurant,Sandwich Place,Cricket Ground,Farmers Market
108,WRIGHT,2611,Australia,Australian Capital Territory,-35.32441,149.04089,2,River,Yoga Studio,Flower Shop,Furniture / Home Store,Fruit & Vegetable Store


###7. Visualise the neighborhoods by cluster

In [436]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11, tiles='Stamen Toner')

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cbr_merged['Latitude'], cbr_merged['Longitude'], cbr_merged['Neighborhood'], cbr_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

###8. Inspect the venues of each cluster

In [437]:
# cluster 0
cbr_merged.loc[cbr_merged['Cluster Labels'] == 0, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
42,2906,149.08503,0,Bus Station,Spa,Yoga Studio,Food & Drink Shop,Furniture / Home Store
50,2914,149.16094,0,Bus Station,Park,Yoga Studio,Food & Drink Shop,Gaming Cafe
72,2617,149.0757,0,Bus Station,Japanese Restaurant,Yoga Studio,Food & Drink Shop,Gaming Cafe
73,2615,149.05312,0,Bus Station,Yoga Studio,Food & Drink Shop,Gaming Cafe,Furniture / Home Store
77,2914,149.11349,0,Bus Station,Yoga Studio,Food & Drink Shop,Gaming Cafe,Furniture / Home Store
83,2903,149.08206,0,Bus Station,Yoga Studio,Food & Drink Shop,Gaming Cafe,Furniture / Home Store
102,2607,149.08789,0,Bus Station,Shop & Service,Yoga Studio,Food & Drink Shop,Furniture / Home Store


In [438]:
# cluster 1
cbr_merged.loc[cbr_merged['Cluster Labels'] == 1, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,2601,149.11827,1,Café,Movie Theater,Bar,Museum,Hotel Bar
1,2602,149.14655,1,Café,Pub,Australian Restaurant,Bakery,Shopping Plaza
3,2614,149.08293,1,Café,Gift Shop,Recreation Center,Bar,Dance Studio
5,2600,149.13354,1,Café,Hotel,Coffee Shop,Bistro,Event Space
10,2612,149.13833,1,Café,Hotel,Pizza Place,Australian Restaurant,Pub
11,2617,149.09268,1,Café,Flower Shop,Pub,Grocery Store,Furniture / Home Store
13,2600,149.12655,1,Café,Gift Shop,History Museum,Gym / Fitness Center,Flower Shop
17,2606,149.07547,1,Convenience Store,Café,Playground,Gaming Cafe,Furniture / Home Store
20,2614,149.06442,1,Café,Bike Trail,Grocery Store,Gym,Chinese Restaurant
24,2600,149.10837,1,Café,Gym,Gas Station,Seafood Restaurant,Sports Bar


In [439]:
# cluster 2
cbr_merged.loc[cbr_merged['Cluster Labels'] == 2, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,2914,149.12637,2,Athletics & Sports,Pharmacy,Yoga Studio,Food & Drink Shop,Gaming Cafe
6,2620,149.21217,2,Home Service,Dance Studio,Yoga Studio,Flower Shop,Furniture / Home Store
7,2617,149.06345,2,Coffee Shop,Café,Music Venue,Gym,Fast Food Restaurant
8,2601,149.09754,2,Mountain,Monument / Landmark,History Museum,Scenic Lookout,Flower Shop
9,2905,149.0802,2,Wine Shop,IT Services,Cupcake Shop,Food & Drink Shop,Gaming Cafe
14,2913,149.09315,2,Bookstore,Playground,Business Service,Yoga Studio,Gaming Cafe
16,2615,149.03235,2,Construction & Landscaping,Yoga Studio,Garden,Furniture / Home Store,Fruit & Vegetable Store
18,2601,149.13093,2,Coffee Shop,Café,Thai Restaurant,Korean Restaurant,Japanese Restaurant
19,2906,149.09733,2,Supermarket,Sports Club,Toy / Game Store,Pizza Place,Fried Chicken Joint
21,2611,149.0424,2,River,Yoga Studio,Flower Shop,Furniture / Home Store,Fruit & Vegetable Store


In [440]:
# cluster 3
cbr_merged.loc[cbr_merged['Cluster Labels'] == 3, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
12,2612,149.16005,3,Bus Stop,Yoga Studio,Food & Drink Shop,Gaming Cafe,Furniture / Home Store
27,2615,149.02221,3,Bus Stop,Yoga Studio,Food & Drink Shop,Gaming Cafe,Furniture / Home Store
76,2904,149.09226,3,Bus Stop,Yoga Studio,Food & Drink Shop,Gaming Cafe,Furniture / Home Store


In [441]:
# cluster 4
cbr_merged.loc[cbr_merged['Cluster Labels'] == 4, cbr_merged.columns[[1] + list(range(5, cbr_merged.shape[1]))]]

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,2906,149.09771,4,Grocery Store,Yoga Studio,Garden,Furniture / Home Store,Fruit & Vegetable Store
15,2611,149.04076,4,Bus Station,Grocery Store,Track,Yoga Studio,Food & Drink Shop
64,2615,149.03168,4,Soccer Field,Grocery Store,Yoga Studio,Food & Drink Shop,Furniture / Home Store
79,2913,149.10802,4,Grocery Store,Diner,Fast Food Restaurant,Yoga Studio,Furniture / Home Store
85,2913,149.11758,4,Bus Station,Grocery Store,Yoga Studio,Food & Drink Shop,Furniture / Home Store
90,2603,149.12669,4,Asian Restaurant,Grocery Store,Yoga Studio,Flower Shop,Furniture / Home Store
92,2905,149.11124,4,Grocery Store,Yoga Studio,Garden,Furniture / Home Store,Fruit & Vegetable Store
95,2614,149.04042,4,Moving Target,Chinese Restaurant,Bus Station,Grocery Store,Garden
105,2611,149.06015,4,Soccer Field,Grocery Store,Yoga Studio,Food & Drink Shop,Furniture / Home Store


### 9. Import Canberra neighborhood crime statisitcs

In [442]:
cbr_crime = pd.read_excel('Canberra Neighborhoods Crime Stats.xlsx')
print(cbr_crime.shape) # check dimensions
cbr_crime.head() # inspect data

(124, 7)


Unnamed: 0,Neighborhood,Homicide,Assault,Robbery,Burglary,Total,Population
0,ACTON,4,23,2,21,50,2277
1,AINSLIE,0,37,3,57,97,5313
2,AMAROO,0,33,1,36,70,5849
3,ARANDA,0,12,3,18,33,2500
4,BANKS,0,10,1,25,36,4924


###10. Derive variables to return total crime per capital and then rank order those results
#### Note: (1) is the lowest rank and (109) is the highest in terms of population weighted crime

In [0]:
cbr_crime = cbr_crime[ (cbr_crime['Population']>0) & (cbr_crime['Total'] >0) ] # only process those neighborhoods with crime and population counts
cbr_crime['Total per pop'] = (cbr_crime['Total'] /  cbr_crime['Population']) * 100 # determine proportion of total Assault, Robbery, and Burglary per capita
cbr_crime['Rank'] = cbr_crime['Total per pop'].rank(ascending=1) # rank order the results

### 11. Merge crime data with our clustered neighborhoods dataset

In [444]:
cbr_crime = pd.merge(cbr_crime, cbr_merged[['Cluster Labels', 'Neighborhood', 'Latitude', 'Longitude']], on='Neighborhood').sort_values('Rank')
print(cbr_crime.shape) # check dimensions
cbr_crime.head() # inspect data
cbr_crime.tail() # inspect data

(100, 12)


Unnamed: 0,Neighborhood,Homicide,Assault,Robbery,Burglary,Total,Population,Total per pop,Rank,Cluster Labels,Latitude,Longitude
15,CITY,0,451,34,76,561,3860,14.533679,108.0,2,-35.28007,149.13093
70,MONCRIEFF,0,25,2,34,61,105,58.095238,109.0,0,-35.162,149.11349
35,FYSHWICK,0,38,8,120,166,56,296.428571,110.0,1,-35.3271,149.17563
79,PARKES,0,11,1,7,19,5,380.0,111.0,1,-35.30258,149.12881
68,MITCHELL,0,15,0,33,48,9,533.333333,112.0,2,-35.21932,149.13442


###12. Visualise output and and modifify filter to query datset

In this query I'm looking at those neighborhoods that ranked highly given incidence of crime per capita

In [445]:

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11, tiles='Stamen Toner')

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# query filter map data
map_data = cbr_crime[ 
#                    (cbr_crime['Cluster Labels'] == 0) & 
                     (cbr_crime['Rank'] >= cbr_crime['Rank'].quantile(0.9))] # 90th percentile


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(map_data['Latitude'], map_data['Longitude'], map_data['Neighborhood'],  map_data['Cluster Labels'] ):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters