# Segmenting and Clustering Neighborhoods in Hamburg

The following notebook is used to cluster the neighborhoods of Hamburg, Germany. Please note, that cells including credentials are modified after running and then pushed to GitHub to hide the API credentials.

In [1]:
import json

import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np
import pandas as pd
import requests

from bs4 import BeautifulSoup
from geopy.exc import GeocoderTimedOut
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

Get a list of neighborhoods in Hamburg.

In [2]:
page = requests.get("https://www.hamburg.com/residents/neighbourhoods/")
soup = BeautifulSoup(page.content, "html.parser")

navigation = soup.find(id="navigation")
neighborhoods = []

for element in navigation.div.ul.find_all("li"):
    neighborhoods.append(element.a.text.strip())

neighborhoods

['Altona',
 'Bergedorf',
 'Blankenese',
 'Eimsbüttel',
 'Hamburg-Mitte',
 'Hamburg-Nord',
 'Harburg',
 'Karoviertel',
 'Ottensen',
 'St. Georg',
 'HafenCity',
 'St. Pauli',
 'Schanzenviertel',
 'Wandsbek',
 'Wilhelmsburg']

Convert the list of neighborhoods into a DataFrame.

In [3]:
columns = ["Neighborhood"]
index = range(len(neighborhoods))
neighborhoods_df = pd.DataFrame(neighborhoods, index=index, columns=columns)
neighborhoods_df

Unnamed: 0,Neighborhood
0,Altona
1,Bergedorf
2,Blankenese
3,Eimsbüttel
4,Hamburg-Mitte
5,Hamburg-Nord
6,Harburg
7,Karoviertel
8,Ottensen
9,St. Georg


Get longitude and latitude for each neighborhood. The logic to get the latitude and longitude for each neighborhood is inside a function, because it needs to be recalled on failure. This can happen, as the OpenStreeMap API may time out.

In [4]:
def get_location_neighborhoods(df):
    df["Longitude"] = 0.0
    df["Latitude"] = 0.0
    
    for neighborhood in df["Neighborhood"]:
        address = neighborhood + ", Hamburg"

        geolocator = Nominatim(user_agent="hamburg_explorer")
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude

        df.loc[df["Neighborhood"] == neighborhood, "Latitude"] = latitude
        df.loc[df["Neighborhood"] == neighborhood, "Longitude"] = longitude

    return df

In [5]:
flag = True
while flag:
    try:
        neighborhoods_df = get_location_neighborhoods(neighborhoods_df)
        flag = False
    except GeocoderTimedOut:
        flag = True

neighborhoods_df

Unnamed: 0,Neighborhood,Longitude,Latitude
0,Altona,9.77767,53.586468
1,Bergedorf,10.2267,53.4858
2,Blankenese,9.80306,53.5575
3,Eimsbüttel,9.9501,53.572483
4,Hamburg-Mitte,8.436046,53.933137
5,Hamburg-Nord,10.00974,53.619156
6,Harburg,9.989014,53.454174
7,Karoviertel,9.97267,53.558448
8,Ottensen,9.919819,53.555066
9,St. Georg,10.014162,53.556993


In [6]:
address = "Hamburg, Hamburg"

geolocator = Nominatim(user_agent="hamburg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_hamburg = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(neighborhoods_df['Latitude'], neighborhoods_df['Longitude'], neighborhoods_df['Neighborhood']):
    label = '{}, Hamburg'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_hamburg)  
    
map_hamburg

As Hamburg has not so many neighborhoods as New York City, we are going to use all of them for clustering.

In [None]:
CLIENT_ID = '2X3Hxxxxxxxx' # your Foursquare ID
CLIENT_SECRET = '4JNHxxxxxxxx' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Define a function to get nearby venues for each neighborhood of Hamburg.

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

The following DataFrame is used for all venues per neighborhood.

In [10]:
hamburg_venues = getNearbyVenues(names=neighborhoods_df['Neighborhood'],
                                   latitudes=neighborhoods_df['Latitude'],
                                   longitudes=neighborhoods_df['Longitude']
                                  )

Altona
Bergedorf
Blankenese
Eimsbüttel
Hamburg-Mitte
Hamburg-Nord
Harburg
Karoviertel
Ottensen
St. Georg
HafenCity
St. Pauli
Schanzenviertel
Wandsbek
Wilhelmsburg


Let's check shape and the first five entries of the resulting DataFrame as well as the number of venues per neighborhood.

In [11]:
print(hamburg_venues.shape)
hamburg_venues.head()

(514, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Altona,53.586468,9.77767,Driving Range Rissen,53.585143,9.777167,Golf Course
1,Altona,53.586468,9.77767,THK Rissen,53.584959,9.774733,Athletics & Sports
2,Bergedorf,53.4858,10.2267,Achilleon,53.483014,10.22172,Restaurant
3,Bergedorf,53.4858,10.2267,Spielplatz Ernst-Henning Str.,53.485504,10.226491,Playground
4,Bergedorf,53.4858,10.2267,Cura Häuslicher Pflegedienst Bergedorf,53.482184,10.227903,Daycare


In [12]:
hamburg_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Altona,2,2,2,2,2,2
Bergedorf,3,3,3,3,3,3
Blankenese,17,17,17,17,17,17
Eimsbüttel,29,29,29,29,29,29
HafenCity,61,61,61,61,61,61
Hamburg-Nord,7,7,7,7,7,7
Harburg,16,16,16,16,16,16
Karoviertel,74,74,74,74,74,74
Ottensen,16,16,16,16,16,16
Schanzenviertel,96,96,96,96,96,96


Analyze single neighborhoods

In [13]:
# one hot encoding
hamburg_onehot = pd.get_dummies(hamburg_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
hamburg_onehot['Neighborhood'] = hamburg_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [hamburg_onehot.columns[-1]] + list(hamburg_onehot.columns[:-1])
hamburg_onehot = hamburg_onehot[fixed_columns]

hamburg_onehot.head()

Unnamed: 0,Wine Shop,Accessories Store,Airport Service,Arepa Restaurant,Art Gallery,Asian Restaurant,Athletics & Sports,Austrian Restaurant,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Bavarian Restaurant,Beach,Beach Bar,Beer Bar,Beer Store,Bistro,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Café,Cheese Shop,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Convenience Store,Cruise,Currywurst Joint,Daycare,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Doner Restaurant,Drugstore,Eastern European Restaurant,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Fast Food Restaurant,Flea Market,Food & Drink Shop,Food Court,French Restaurant,Frozen Yogurt Shop,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Hardware Store,Hawaiian Restaurant,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Korean Restaurant,Kumpir Restaurant,Lighthouse,Liquor Store,Lounge,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Neighborhood,Nightclub,Optical Shop,Organic Grocery,Other Nightlife,Palatine Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Pet Service,Pet Store,Pharmacy,Pide Place,Pier,Pizza Place,Playground,Plaza,Pool Hall,Portuguese Restaurant,Pub,Racetrack,Ramen Restaurant,Record Shop,Restaurant,Road,Rock Club,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Snack Place,Soup Place,Souvlaki Shop,Spa,Spanish Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Taverna,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Altona,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Altona,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bergedorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bergedorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bergedorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Group neighborhoods

In [14]:
hamburg_grouped = hamburg_onehot.groupby('Neighborhood').mean().reset_index()
hamburg_grouped

Unnamed: 0,Neighborhood,Wine Shop,Accessories Store,Airport Service,Arepa Restaurant,Art Gallery,Asian Restaurant,Athletics & Sports,Austrian Restaurant,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Bavarian Restaurant,Beach,Beach Bar,Beer Bar,Beer Store,Bistro,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Café,Cheese Shop,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Convenience Store,Cruise,Currywurst Joint,Daycare,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Doner Restaurant,Drugstore,Eastern European Restaurant,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Fast Food Restaurant,Flea Market,Food & Drink Shop,Food Court,French Restaurant,Frozen Yogurt Shop,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Hardware Store,Hawaiian Restaurant,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Korean Restaurant,Kumpir Restaurant,Lighthouse,Liquor Store,Lounge,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Nightclub,Optical Shop,Organic Grocery,Other Nightlife,Palatine Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Pet Service,Pet Store,Pharmacy,Pide Place,Pier,Pizza Place,Playground,Plaza,Pool Hall,Portuguese Restaurant,Pub,Racetrack,Ramen Restaurant,Record Shop,Restaurant,Road,Rock Club,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Snack Place,Soup Place,Souvlaki Shop,Spa,Spanish Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Taverna,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar
0,Altona,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bergedorf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Blankenese,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Eimsbüttel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.103448,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.103448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483
4,HafenCity,0.0,0.0,0.0,0.0,0.016393,0.032787,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.04918,0.016393,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.016393,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.016393,0.032787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.081967,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.065574,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.04918,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.016393,0.032787,0.0,0.016393,0.016393,0.0,0.032787,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.016393,0.016393,0.016393,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.04918,0.0,0.0,0.0,0.016393,0.016393,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.016393,0.0,0.032787,0.0,0.032787,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Hamburg-Nord,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Harburg,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Karoviertel,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.013514,0.013514,0.013514,0.0,0.081081,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.027027,0.013514,0.0,0.027027,0.0,0.0,0.054054,0.0,0.013514,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.027027,0.0,0.027027,0.0,0.013514,0.0,0.013514,0.0,0.013514,0.0,0.040541,0.0,0.013514,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.013514,0.013514,0.0,0.0,0.0,0.0,0.013514,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.027027,0.040541,0.0,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013514,0.0,0.013514,0.0,0.0,0.013514,0.0,0.0,0.027027,0.013514,0.013514,0.0,0.0,0.0,0.027027,0.013514,0.013514,0.0,0.0,0.0,0.0,0.0,0.027027,0.013514,0.0,0.013514,0.0,0.0,0.0,0.013514,0.0,0.0,0.0,0.013514,0.013514,0.013514,0.013514,0.0,0.0
8,Ottensen,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.125,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0
9,Schanzenviertel,0.010417,0.010417,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.010417,0.010417,0.010417,0.0,0.010417,0.0,0.010417,0.010417,0.020833,0.010417,0.0,0.0625,0.010417,0.0,0.0,0.041667,0.020833,0.083333,0.0,0.010417,0.0,0.010417,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.03125,0.0,0.0,0.010417,0.0,0.010417,0.0,0.0,0.03125,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.020833,0.0,0.010417,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.020833,0.020833,0.010417,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.020833,0.0,0.010417,0.020833,0.041667,0.0,0.0,0.010417,0.0,0.010417,0.0,0.0,0.010417,0.010417,0.010417,0.0,0.0,0.020833,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0


Printing the top five venue categories per neighborhood

In [15]:
num_top_venues = 5

for hood in hamburg_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = hamburg_grouped[hamburg_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Altona----
                venue  freq
0         Golf Course   0.5
1  Athletics & Sports   0.5
2           Wine Shop   0.0
3     Organic Grocery   0.0
4       Movie Theater   0.0


----Bergedorf----
        venue  freq
0  Restaurant  0.33
1  Playground  0.33
2     Daycare  0.33
3   Wine Shop  0.00
4   Multiplex  0.00


----Blankenese----
                venue  freq
0         Snack Place  0.12
1                Café  0.12
2  Seafood Restaurant  0.12
3               Hotel  0.12
4               Beach  0.12


----Eimsbüttel----
                venue  freq
0         Supermarket  0.10
1              Bakery  0.10
2  Italian Restaurant  0.07
3                Park  0.07
4            Pharmacy  0.07


----HafenCity----
                venue  freq
0   German Restaurant  0.08
1               Hotel  0.07
2                 Bar  0.05
3  Italian Restaurant  0.05
4          Restaurant  0.05


----Hamburg-Nord----
             venue  freq
0            Hotel  0.29
1  Airport Service  0.14
2      Gas St

The following function will return a DataFrame sorted by venues descending.

In [16]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

The following DataFrame is created to display the top ten venues per neighborhood.

In [17]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = hamburg_grouped['Neighborhood']

for ind in np.arange(hamburg_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(hamburg_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Altona,Golf Course,Athletics & Sports,Wine Bar,Event Space,French Restaurant,Food Court,Food & Drink Shop,Flea Market,Fast Food Restaurant,Falafel Restaurant
1,Bergedorf,Daycare,Playground,Restaurant,Electronics Store,Food Court,Food & Drink Shop,Flea Market,Fast Food Restaurant,Falafel Restaurant,Exhibit
2,Blankenese,Seafood Restaurant,Café,Hotel,Snack Place,Restaurant,Beach,Pier,Lighthouse,French Restaurant,Bavarian Restaurant
3,Eimsbüttel,Bakery,Supermarket,Pharmacy,Bookstore,Italian Restaurant,Drugstore,Park,Café,Hot Dog Joint,Gym
4,HafenCity,German Restaurant,Hotel,Italian Restaurant,Restaurant,Bar,Museum,Exhibit,Bistro,Tapas Restaurant,Tea Room


Let's cluster the neighborhoods into **three** clusters. In this case we use **three** instead of **five** as Hamburg doesn't have many neighborhoods.

In [18]:
kclusters = 3

hamburg_grouped_clustering = hamburg_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(hamburg_grouped_clustering)

kmeans.labels_[0:10] 

array([0, 2, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

The following DataFrame contains the new clusters as well as the top ten venues per neighborhood.
**Note:** The neighborhood *Hamburg-Mitte* is dropped as no venues were found for it.

In [19]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

hamburg_merged = neighborhoods_df
hamburg_merged = hamburg_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

hamburg_merged = hamburg_merged[hamburg_merged["Neighborhood"] != "Hamburg-Mitte"]
hamburg_merged["Cluster Labels"] = hamburg_merged["Cluster Labels"].astype(int)

hamburg_merged.head()

Unnamed: 0,Neighborhood,Longitude,Latitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Altona,9.77767,53.586468,0,Golf Course,Athletics & Sports,Wine Bar,Event Space,French Restaurant,Food Court,Food & Drink Shop,Flea Market,Fast Food Restaurant,Falafel Restaurant
1,Bergedorf,10.2267,53.4858,2,Daycare,Playground,Restaurant,Electronics Store,Food Court,Food & Drink Shop,Flea Market,Fast Food Restaurant,Falafel Restaurant,Exhibit
2,Blankenese,9.80306,53.5575,1,Seafood Restaurant,Café,Hotel,Snack Place,Restaurant,Beach,Pier,Lighthouse,French Restaurant,Bavarian Restaurant
3,Eimsbüttel,9.9501,53.572483,1,Bakery,Supermarket,Pharmacy,Bookstore,Italian Restaurant,Drugstore,Park,Café,Hot Dog Joint,Gym
5,Hamburg-Nord,10.00974,53.619156,1,Hotel,Pet Service,Bus Stop,Gas Station,Airport Service,Hostel,Dim Sum Restaurant,French Restaurant,Food Court,Food & Drink Shop


Finally, the clusters are visualized.

In [20]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(hamburg_merged['Latitude'], hamburg_merged['Longitude'], hamburg_merged['Neighborhood'], hamburg_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

As you can see, Hamburg itself is quite homogen except the neighborhoods *Altona* and *Bergedorf*. For people living in Hamburg this is no new information.

Now let's have a look at the three different clusters itself.

**Cluster 1**

In [21]:
hamburg_merged.loc[hamburg_merged['Cluster Labels'] == 0, hamburg_merged.columns[[1] + list(range(5, hamburg_merged.shape[1]))]]

Unnamed: 0,Longitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,9.77767,Athletics & Sports,Wine Bar,Event Space,French Restaurant,Food Court,Food & Drink Shop,Flea Market,Fast Food Restaurant,Falafel Restaurant


**Cluster 2**

In [22]:
hamburg_merged.loc[hamburg_merged['Cluster Labels'] == 1, hamburg_merged.columns[[1] + list(range(5, hamburg_merged.shape[1]))]]

Unnamed: 0,Longitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,9.80306,Café,Hotel,Snack Place,Restaurant,Beach,Pier,Lighthouse,French Restaurant,Bavarian Restaurant
3,9.9501,Supermarket,Pharmacy,Bookstore,Italian Restaurant,Drugstore,Park,Café,Hot Dog Joint,Gym
5,10.00974,Pet Service,Bus Stop,Gas Station,Airport Service,Hostel,Dim Sum Restaurant,French Restaurant,Food Court,Food & Drink Shop
6,9.989014,Supermarket,Shopping Mall,Clothing Store,Fast Food Restaurant,Gastropub,Kebab Restaurant,Spanish Restaurant,Multiplex,Electronics Store
7,9.97267,Café,Nightclub,Gastropub,Music Venue,Steakhouse,Seafood Restaurant,Austrian Restaurant,Event Space,Falafel Restaurant
8,9.919819,Dessert Shop,Grocery Store,French Restaurant,Bus Stop,Soup Place,Church,Supermarket,Greek Restaurant,Taverna
9,10.014162,Italian Restaurant,Restaurant,Indian Restaurant,German Restaurant,French Restaurant,Burger Joint,Café,Grocery Store,Gay Bar
10,9.995835,Hotel,Italian Restaurant,Restaurant,Bar,Museum,Exhibit,Bistro,Tapas Restaurant,Tea Room
11,9.959432,Bar,Cocktail Bar,Pub,Restaurant,Nightclub,Vietnamese Restaurant,Rock Club,German Restaurant,Italian Restaurant
12,9.964898,Bar,Café,Clothing Store,Restaurant,Gastropub,Falafel Restaurant,Record Shop,Cocktail Bar,Burger Joint


**Cluster 3**

In [23]:
hamburg_merged.loc[hamburg_merged['Cluster Labels'] == 2, hamburg_merged.columns[[1] + list(range(5, hamburg_merged.shape[1]))]]

Unnamed: 0,Longitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,10.2267,Playground,Restaurant,Electronics Store,Food Court,Food & Drink Shop,Flea Market,Fast Food Restaurant,Falafel Restaurant,Exhibit


**Use 5 clusters**

Even if we use five clusters, the resulting map is very similar to the one with three clusters. **Note:** For simplicity, the code of the above cells is merged into one cell, but with changing the number of clusters to 5.

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = hamburg_grouped['Neighborhood']

for ind in np.arange(hamburg_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(hamburg_grouped.iloc[ind, :], num_top_venues)

kclusters = 5

hamburg_grouped_clustering = hamburg_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(hamburg_grouped_clustering)

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

hamburg_merged = neighborhoods_df
hamburg_merged = hamburg_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

hamburg_merged = hamburg_merged[hamburg_merged["Neighborhood"] != "Hamburg-Mitte"]
hamburg_merged["Cluster Labels"] = hamburg_merged["Cluster Labels"].astype(int)

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(hamburg_merged['Latitude'], hamburg_merged['Longitude'], hamburg_merged['Neighborhood'], hamburg_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters