In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print("Libraries imported.")

Libraries imported.


In [2]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Suburbs_of_Sydney").text
soup = BeautifulSoup(data, 'html.parser')
neighborhood = []
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhood.append(row.text)

In [3]:
df = pd.DataFrame({"Neighborhood": neighborhood})

df.head()

Unnamed: 0,Neighborhood
0,"► Agnes Banks, New South Wales‎ (2 P)"
1,"► Alexandria, New South Wales‎ (9 P)"
2,"► Allambie Heights, New South Wales‎ (2 P)"
3,"► Annandale, New South Wales‎ (13 P)"
4,"► Appin, New South Wales‎ (3 P)"


In [4]:
df.shape

(200, 1)

In [5]:
!pip install geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 15.1MB/s ta 0:00:01
[?25hCollecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [6]:
import geocoder
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Kuala Lumpur, Malaysia'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [7]:
coord = [ get_latlng(neighborhood) for neighborhood in df["Neighborhood"].tolist() ]

In [8]:
df_c = pd.DataFrame(coord, columns=['Latitude', 'Longitude'])

In [9]:
df['Latitude'] = df_c['Latitude']
df['Longitude'] = df_c['Longitude']

In [10]:
df.to_csv("df.csv", index=False)

In [11]:
from geopy.geocoders import Nominatim
address = 'Sydney, Australia'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Sydney, Australia {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Sydney, Australia -33.8548157, 151.2164539.


In [12]:
!pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 15.3MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.10.1


In [13]:
import folium

In [14]:
map1 = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map1)  
    
map1

In [15]:
map1.save('map1.html')

In [38]:
CLIENT_ID = "something"
CLIENT_SECRET = "something"
VERSION = "something" 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: something
CLIENT_SECRET:something


In [17]:
radius = 3000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [18]:
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(17104, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,"► Agnes Banks, New South Wales‎ (2 P)",3.113046,101.658006,Restoran Munah,3.112204,101.662412,Malay Restaurant
1,"► Agnes Banks, New South Wales‎ (2 P)",3.113046,101.658006,Edo Ichi Japanese Restaurant,3.110124,101.664937,Japanese Restaurant
2,"► Agnes Banks, New South Wales‎ (2 P)",3.113046,101.658006,Capri by Fraser,3.110244,101.664092,Hotel
3,"► Agnes Banks, New South Wales‎ (2 P)",3.113046,101.658006,VE Hotel & Residence,3.110238,101.665596,Hotel
4,"► Agnes Banks, New South Wales‎ (2 P)",3.113046,101.658006,Big A Productions,3.111726,101.65017,General Entertainment


In [19]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"► Agnes Banks, New South Wales‎ (2 P)",100,100,100,100,100,100
"► Alexandria, New South Wales‎ (9 P)",100,100,100,100,100,100
"► Allambie Heights, New South Wales‎ (2 P)",52,52,52,52,52,52
"► Annandale, New South Wales‎ (13 P)",100,100,100,100,100,100
"► Appin, New South Wales‎ (3 P)",100,100,100,100,100,100
"► Arncliffe, New South Wales‎ (6 P)",100,100,100,100,100,100
"► Artarmon, New South Wales‎ (5 P)",100,100,100,100,100,100
"► Asquith, New South Wales‎ (7 P)",100,100,100,100,100,100
"► Auburn, New South Wales‎ (9 P)",100,100,100,100,100,100
"► Audley, New South Wales‎ (2 P)",100,100,100,100,100,100


In [20]:
venues_df['VenueCategory'].unique()[:50]

array(['Malay Restaurant', 'Japanese Restaurant', 'Hotel',
       'General Entertainment', 'Dive Shop', 'Pet Store',
       'Convenience Store', 'Chinese Restaurant', 'Café', 'Bookstore',
       'Residential Building (Apartment / Condo)', 'Coffee Shop',
       'Clothing Store', 'Sandwich Place', 'Juice Bar',
       'Gym / Fitness Center', 'Spa', 'Print Shop', 'Shopping Mall',
       'Hockey Arena', 'Restaurant', 'Supermarket', 'Indian Restaurant',
       'Food Truck', 'Poke Place', 'Department Store', 'Movie Theater',
       'Stationery Store', 'Steakhouse', 'Noodle House', 'Pharmacy',
       'Hill', 'Soccer Field', 'Snack Place', 'Food & Drink Shop',
       'Flower Shop', 'Theater', 'Breakfast Spot', 'Ice Cream Shop',
       'Boutique', 'Outdoor Event Space', 'Italian Restaurant',
       'Donut Shop', 'Laundromat', "Women's Store", 'Cupcake Shop',
       'Farmers Market', 'Smoke Shop', 'Gym', 'Frozen Yogurt Shop'],
      dtype=object)

In [21]:
onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

onehot['Neighborhoods'] = venues_df['Neighborhood'] 
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

print(onehot.shape)
onehot.head()

(17104, 315)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Austrian Restaurant,Automotive Shop,BBQ Joint,Baby Store,Bakery,Bar,Baseball Field,Baseball Stadium,Basketball Court,Bay,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Bowling Green,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Station,Business Service,Butcher,Café,Cambodian Restaurant,Camera Store,Campground,Candy Store,Cantonese Restaurant,Chaat Place,Chettinad Restaurant,Chinese Restaurant,Chocolate Shop,Churrascaria,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Creperie,Cricket Ground,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Dentist's Office,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Dive Bar,Dive Shop,Dog Run,Donut Shop,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Event Space,Exhibit,Fabric Shop,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,General College & University,General Entertainment,German Restaurant,Gift Shop,Go Kart Track,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Halal Restaurant,Harbor / Marina,Health Food Store,Hill,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Hungarian Restaurant,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indonesian Meatball Place,Indonesian Restaurant,Irish Pub,Island,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lake,Latin American Restaurant,Laundromat,Lebanese Restaurant,Library,Lighthouse,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Monument / Landmark,Mosque,Motel,Motorcycle Shop,Mountain,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,National Park,Neighborhood,Nightclub,Non-Profit,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Event Space,Outlet Store,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pie Shop,Pizza Place,Planetarium,Platform,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Print Shop,Pub,Racetrack,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Resort,Restaurant,River,Rock Club,Rugby Pitch,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shanghai Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Social Club,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Stationery Store,Steakhouse,Street Food Gathering,Supermarket,Surf Spot,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Thrift / Vintage Store,Tour Provider,Track Stadium,Trail,Train Station,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Vineyard,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"► Agnes Banks, New South Wales‎ (2 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"► Agnes Banks, New South Wales‎ (2 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"► Agnes Banks, New South Wales‎ (2 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"► Agnes Banks, New South Wales‎ (2 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"► Agnes Banks, New South Wales‎ (2 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [22]:
grouped_1 = onehot.groupby(["Neighborhoods"]).mean().reset_index()

In [23]:
len(grouped_1[grouped_1["Steakhouse"] > 0])

87

In [24]:
steakhouse = grouped_1[["Neighborhoods","Steakhouse"]]

In [25]:
steakhouse.head()

Unnamed: 0,Neighborhoods,Steakhouse
0,"► Agnes Banks, New South Wales‎ (2 P)",0.01
1,"► Alexandria, New South Wales‎ (9 P)",0.0
2,"► Allambie Heights, New South Wales‎ (2 P)",0.0
3,"► Annandale, New South Wales‎ (13 P)",0.0
4,"► Appin, New South Wales‎ (3 P)",0.0


In [26]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

kclusters = 3

clustering = steakhouse.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0], dtype=int32)

In [27]:
merged = steakhouse.copy()

# add clustering labels
merged["Cluster Labels"] = kmeans.labels_

In [28]:
merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
merged.head()

Unnamed: 0,Neighborhood,Steakhouse,Cluster Labels
0,"► Agnes Banks, New South Wales‎ (2 P)",0.01,0
1,"► Alexandria, New South Wales‎ (9 P)",0.0,1
2,"► Allambie Heights, New South Wales‎ (2 P)",0.0,1
3,"► Annandale, New South Wales‎ (13 P)",0.0,1
4,"► Appin, New South Wales‎ (3 P)",0.0,1


In [29]:
merged = merged.join(df.set_index("Neighborhood"), on="Neighborhood")

print(merged.shape)
merged.head()

(199, 5)


Unnamed: 0,Neighborhood,Steakhouse,Cluster Labels,Latitude,Longitude
0,"► Agnes Banks, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
1,"► Alexandria, New South Wales‎ (9 P)",0.0,1,-33.91237,151.19703
2,"► Allambie Heights, New South Wales‎ (2 P)",0.0,1,-33.76561,151.25159
3,"► Annandale, New South Wales‎ (13 P)",0.0,1,-33.88005,151.1713
4,"► Appin, New South Wales‎ (3 P)",0.0,1,3.14789,101.69405


In [30]:
print(merged.shape)
merged.sort_values(["Cluster Labels"], inplace=True)
merged

(199, 5)


Unnamed: 0,Neighborhood,Steakhouse,Cluster Labels,Latitude,Longitude
0,"► Agnes Banks, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
118,► Lavender Bay‎ (1 P),0.01,0,3.166322,101.690303
117,"► Lansvale, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
113,"► Kyeemagh, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
108,"► Kings Cross, New South Wales‎ (1 C, 19 P)",0.01,0,3.166322,101.690303
105,"► Kensington, New South Wales‎ (8 P)",0.01,0,-33.90988,151.22219
104,"► Hurstville, New South Wales‎ (6 P)",0.010204,0,-33.96732,151.10784
100,"► Homebush, New South Wales‎ (3 P)",0.01,0,-33.86398,151.08232
197,"► Tahmoor, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
98,"► Haymarket, New South Wales‎ (1 C, 14 P)",0.01,0,3.166322,101.690303


In [31]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighborhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [32]:
map_clusters.save('map_clusters.html')

In [33]:
merged.loc[merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Steakhouse,Cluster Labels,Latitude,Longitude
0,"► Agnes Banks, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
118,► Lavender Bay‎ (1 P),0.01,0,3.166322,101.690303
117,"► Lansvale, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
113,"► Kyeemagh, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
108,"► Kings Cross, New South Wales‎ (1 C, 19 P)",0.01,0,3.166322,101.690303
105,"► Kensington, New South Wales‎ (8 P)",0.01,0,-33.90988,151.22219
104,"► Hurstville, New South Wales‎ (6 P)",0.010204,0,-33.96732,151.10784
100,"► Homebush, New South Wales‎ (3 P)",0.01,0,-33.86398,151.08232
197,"► Tahmoor, New South Wales‎ (2 P)",0.01,0,3.113046,101.658006
98,"► Haymarket, New South Wales‎ (1 C, 14 P)",0.01,0,3.166322,101.690303


In [34]:
merged.loc[merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Steakhouse,Cluster Labels,Latitude,Longitude
26,"► Bondi, New South Wales‎ (19 P)",0.0,1,3.14789,101.69405
16,"► Baulkham Hills, New South Wales‎ (4 P)",0.0,1,-33.75759,150.9896
158,"► Palm Beach, New South Wales‎ (3 P)",0.0,1,-33.59243,151.3233
157,"► Paddington, New South Wales‎ (18 P)",0.0,1,-33.88203,151.2289
137,"► Menangle Park, New South Wales‎ (4 P)",0.0,1,-34.10899,150.74824
138,"► Menangle, New South Wales‎ (3 P)",0.0,1,-34.12754,150.73838
140,"► Millers Point, New South Wales‎ (2 C, 28 P)",0.0,1,3.197824,101.623244
141,► Milsons Point‎ (6 P),0.0,1,3.14789,101.69405
145,"► Mulgoa, New South Wales‎ (6 P)",0.0,1,3.14789,101.69405
143,"► Mosman, New South Wales‎ (25 P)",0.0,1,3.14789,101.69405


In [35]:
merged.loc[merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Steakhouse,Cluster Labels,Latitude,Longitude
25,"► Bondi Junction, New South Wales‎ (5 P)",0.02,2,-33.89241,151.24732
39,"► Canley Vale, New South Wales‎ (3 P)",0.03,2,-33.88705,150.9426
81,"► Emu Plains, New South Wales‎ (7 P)",0.037037,2,-33.75347,150.65991
171,"► Queens Park, New South Wales‎ (3 P)",0.02,2,-33.89937,151.24732
92,"► Glenwood, New South Wales‎ (3 P)",0.019231,2,-33.73223,150.92793
66,"► Cronulla, New South Wales‎ (20 P)",0.018519,2,-34.05516,151.15333
115,"► Lane Cove North, New South Wales‎ (3 P)",0.022222,2,-33.80159,151.14855
