# Capstone Project

## Battle of the Neighbourhoods - Stockholm versus Gothenburg

### 1. Download and explore the data

In [4]:
# Import the important libraries
import pandas as pd
import numpy as np
import json # to handle json files
from sklearn.cluster import KMeans # for clustering 
#!conda install -c conda-forge folium=0.5.0
import folium # for mapping
# !conda install -c conda-forge geopy --yes # Install the geopy library
from geopy.geocoders import Nominatim # to convert addresses into latitude and longitudes
from pandas.io.json import json_normalize # to convert json to pandas data frame
import requests # library to handle requests
print('Libraries imported successfully')

Libraries imported successfully


In [48]:
# Explore the data
mycolumns = ['ISOCode','Postal Code','Place Name','State','State Code','County','County Code','del1','del2','Latitude','Longitude','Accuracy']
swdf = pd.read_csv('SE.txt', sep='\t',names=mycolumns)
# Replace Göteborg with Gothenburg from Swedish to English spelling
swdf['Place Name'].replace('Göteborg','Gothenburg',inplace=True)
swdf['County'].replace('Göteborg','Gothenburg',inplace=True)
swdf.head()

Unnamed: 0,ISOCode,Postal Code,Place Name,State,State Code,County,County Code,del1,del2,Latitude,Longitude,Accuracy
0,SE,186 00,Vallentuna,Stockholm,AB,Vallentuna,115.0,,,59.5344,18.0776,4.0
1,SE,186 01,Vallentuna,Stockholm,AB,Vallentuna,115.0,,,59.5344,18.0776,4.0
2,SE,186 03,Brottby,Stockholm,AB,Vallentuna,115.0,,,59.5632,18.2403,4.0
3,SE,186 21,Vallentuna,Stockholm,AB,Vallentuna,115.0,,,59.5344,18.0776,4.0
4,SE,186 22,Vallentuna,Stockholm,AB,Vallentuna,115.0,,,59.5344,18.0776,4.0


In [53]:
# Retrieve only required columns into a dataframe
fdf = swdf[['Place Name','County','Latitude','Longitude']]
print(fdf.shape)
fdf.head()

(16403, 4)


Unnamed: 0,Place Name,County,Latitude,Longitude
0,Vallentuna,Vallentuna,59.5344,18.0776
1,Vallentuna,Vallentuna,59.5344,18.0776
2,Brottby,Vallentuna,59.5632,18.2403
3,Vallentuna,Vallentuna,59.5344,18.0776
4,Vallentuna,Vallentuna,59.5344,18.0776


In [152]:
# Let us seperate stockholm and Gothenburg data to two separate data frames
stdf = fdf[fdf['County'] == 'Stockholm'].reset_index(drop=True)
gtdf = fdf[fdf['County'] == 'Gothenburg'].reset_index(drop=True)
print('Stockholm size:',stdf.shape)
print('Gothenburg size:', gtdf.shape)
stdf.head()
gtdf.head()

Stockholm size: (1380, 4)
Gothenburg size: (732, 4)


Unnamed: 0,Place Name,County,Latitude,Longitude
0,Gothenburg,Gothenburg,57.7072,11.9668
1,Gothenburg,Gothenburg,57.7072,11.9668
2,Gothenburg,Gothenburg,57.7072,11.9668
3,Gothenburg,Gothenburg,57.7072,11.9668
4,Gothenburg,Gothenburg,57.7072,11.9668


In [153]:
# Let us group the results and retain the first location values of the grouping
stdf=stdf.groupby(['Place Name']).agg({'County':'first','Latitude':'first','Longitude':'first'}).reset_index(drop=False)
gtdf=gtdf.groupby('Place Name').agg({'County':'first','Latitude':'first','Longitude':'first'}).reset_index(drop=False)

In [155]:
# Filter out the places data for tha locations as indicated above
print(stdf.shape)
print(gtdf.shape)
gtdf.head(50)

(21, 4)
(21, 4)


Unnamed: 0,Place Name,County,Latitude,Longitude
0,Agnesberg,Gothenburg,57.7833,12.0
1,Angered,Gothenburg,57.7833,12.1
2,Askim,Gothenburg,57.6158,11.9456
3,Asperö,Gothenburg,57.6479,11.7937
4,Billdal,Gothenburg,57.5667,11.9333
5,Bohus,Gothenburg,57.85,12.0167
6,Brännö,Gothenburg,57.6453,11.7817
7,Donsö,Gothenburg,57.6,11.7992
8,Gothenburg,Gothenburg,57.7072,11.9668
9,Gunnilse,Gothenburg,57.8,12.0667


In [156]:
# Use the geopy library to get the latitude and longitude of both stockholm and Gothenburg
s_address = 'Stockholm'
g_address = 'Gothenburg'
geolocator = Nominatim(user_agent="s_explorer")
s_location = geolocator.geocode(s_address)
g_location = geolocator.geocode(g_address)
s_lat = s_location.latitude
s_long = s_location.longitude
g_lat = g_location.latitude
g_long = g_location.longitude
print('The cordinates for Stockholm are Latitude:',s_lat,'and Longitude:',s_long)
print('The cordinates for Gothenburg are Latitude:',g_lat,'and Longitude:',g_long)

The cordinates for Stockholm are Latitude: 59.3251172 and Longitude: 18.0710935
The cordinates for Gothenburg are Latitude: 57.7072326 and Longitude: 11.9670171


In [358]:
# Use the data to create a map of stockholm with the stockholm location data superimposed on it
stock_map = folium.Map(location=[s_lat,s_long], zoom_start=10)

#Add some markers
for slat,slng,scounty,splace in zip(stdf['Latitude'],stdf['Longitude'],stdf['County'],stdf['Place Name']):
    label='{}, {}'.format(splace,scounty)
    folium.CircleMarker([slat,slng],
                        radius=5,
                        popup=label,
                        color='blue',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False
                       ).add_to(stock_map)
stock_map

In [178]:
# Create map of Gothenburg with its data superimposed
goth_map = folium.Map(location=[g_lat,g_long], zoom_start=10)
#Add some markers
for glat,glng,gcounty,gplace in zip(gtdf['Latitude'],gtdf['Longitude'],gtdf['County'],gtdf['Place Name']):
    glabel='{}, {}'.format(gplace,gcounty)
    folium.CircleMarker([glat,glng],
                        radius=5,
                        popup=glabel,
                        color='red',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False
                       ).add_to(goth_map)

goth_map

### 2. Explore and Analyse the Neighbourhoods

In [312]:
# Get the Foursquare details
CLIENT_ID = '2HLUZNXHRNEDVYBEXQAGBPYYYDWY5VAU02OP1JFQ0V1UG5MU' # your Foursquare ID
CLIENT_SECRET = 'ABAK4IQNGKVB2ZGLZJWQGGLCQPD4NTZBE5KZCZ3KTNVFPBZP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

In [337]:
# Define a function to get all the data and ...
# Use a for loop to get the top 100 venues for every place in the two counties of Stockholm and Gothenburg

def get_venues(places,lats,longs,radius = 500):
    my_list = []
    for place,lat,long in zip(places,lats,longs): # The for loop
        # Create the Foursquare API structure
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            long, 
            radius, 
            LIMIT)
        # Request the data from Foursquare
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # Only append the data we want to our list
        my_list.append([(
            place, 
            lat, 
            long, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    the_venues = pd.DataFrame([item for my_list in my_list for item in my_list])
    the_venues.columns = ['Place Name', 
                  'Place Latitude', 
                  'Place Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(the_venues)

In [338]:
# Lets generate the data for Stockholm
stock_venues = get_venues(places=stdf['Place Name'],lats=stdf['Latitude'],longs=stdf['Longitude'])

In [339]:
# Lets generate the data for Gothenburg
gothen_venues = get_venues(places=gtdf['Place Name'],lats=gtdf['Latitude'],longs=gtdf['Longitude'])

In [340]:
print('Stockhom returned',stock_venues.shape[0],'venues')
print('Gothenburg returned',gothen_venues.shape[0],'venues')

Stockhom returned 379 venues
Gothenburg returned 104 venues


In [367]:
# Let us view the data for Stockholm
stock_venues.groupby('Place Name').count()

Unnamed: 0_level_0,Place Latitude,Place Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Place Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bagarmossen,6,6,6,6,6,6
Bandhagen,16,16,16,16,16,16
Bromma,4,4,4,4,4,4
Enskede,11,11,11,11,11,11
Enskede Gård,12,12,12,12,12,12
Enskededalen,28,28,28,28,28,28
Farsta,5,5,5,5,5,5
Hägersten,12,12,12,12,12,12
Johanneshov,28,28,28,28,28,28
Kista,61,61,61,61,61,61


In [342]:
# Let us view the data for Gothenburg
gothen_venues.groupby('Place Name').count()

Unnamed: 0_level_0,Place Latitude,Place Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Place Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agnesberg,3,3,3,3,3,3
Angered,1,1,1,1,1,1
Askim,4,4,4,4,4,4
Billdal,1,1,1,1,1,1
Bohus,4,4,4,4,4,4
Brännö,4,4,4,4,4,4
Donsö,5,5,5,5,5,5
Gothenburg,52,52,52,52,52,52
Gunnilse,1,1,1,1,1,1
Hisings Backa,4,4,4,4,4,4


#### Analysing the data

In [343]:
# Encode the data using get_dummies
# Stockholm
stock_one = pd.get_dummies(stock_venues[['Venue Category']],prefix="",prefix_sep="")
stock_one['Place Name'] = stock_venues['Place Name']
stock_temp = [stock_one.columns[-1]] + list(stock_one.columns[:-1])
stock_one = stock_one[stock_temp]
stock_one.head()

Unnamed: 0,Place Name,American Restaurant,Arts & Crafts Store,Asian Restaurant,Assisted Living,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,...,Sushi Restaurant,Taco Place,Tapas Restaurant,Tennis Stadium,Thai Restaurant,Theater,Trail,Train Station,Vietnamese Restaurant,Yoga Studio
0,Bagarmossen,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bagarmossen,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bagarmossen,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bagarmossen,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Bagarmossen,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [344]:
# Gothenburg
gothen_one = pd.get_dummies(gothen_venues[['Venue Category']],prefix="",prefix_sep="")
gothen_one['Place Name'] = gothen_venues['Place Name']
gothen_temp = [gothen_one.columns[-1]] + list(gothen_one.columns[:-1])
gothen_one = gothen_one[gothen_temp]
gothen_one.head()

Unnamed: 0,Place Name,Asian Restaurant,Athletics & Sports,Bar,Beer Garden,Bistro,Boat or Ferry,Bookstore,Burger Joint,Burrito Place,...,Shoe Store,Shopping Mall,Soccer Field,Speakeasy,Stadium,Supermarket,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park
0,Agnesberg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Agnesberg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Agnesberg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Angered,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Askim,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [345]:
# Lets get the mean frequency of all the venues for both places
# Stockholm
stock_group = stock_one.groupby('Place Name').mean().reset_index()
stock_group

Unnamed: 0,Place Name,American Restaurant,Arts & Crafts Store,Asian Restaurant,Assisted Living,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,...,Sushi Restaurant,Taco Place,Tapas Restaurant,Tennis Stadium,Thai Restaurant,Theater,Trail,Train Station,Vietnamese Restaurant,Yoga Studio
0,Bagarmossen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bandhagen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0
2,Bromma,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Enskede,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0
4,Enskede Gård,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Enskededalen,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,...,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Farsta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0
7,Hägersten,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Johanneshov,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,...,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Kista,0.016393,0.0,0.065574,0.0,0.0,0.016393,0.016393,0.016393,0.0,...,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0


In [346]:
# Mean for Gothenburg
gothen_group = gothen_one.groupby('Place Name').mean().reset_index()
gothen_group

Unnamed: 0,Place Name,Asian Restaurant,Athletics & Sports,Bar,Beer Garden,Bistro,Boat or Ferry,Bookstore,Burger Joint,Burrito Place,...,Shoe Store,Shopping Mall,Soccer Field,Speakeasy,Stadium,Supermarket,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park
0,Agnesberg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Angered,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Askim,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25
3,Billdal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bohus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
5,Brännö,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Donsö,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Gothenburg,0.0,0.0,0.019231,0.019231,0.019231,0.0,0.038462,0.076923,0.019231,...,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.019231,0.019231,0.0
8,Gunnilse,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Hisings Backa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [347]:
# Print the new sizes
print('Stockholm is:',stock_group.shape[0],'lines long')
print('Gothenburg is:',gothen_group.shape[0],'lines long')

Stockholm is: 20 lines long
Gothenburg is: 17 lines long


In [348]:
# Lets write a function to sort out the venues in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [349]:
# Now lets view the top ten venues for each place
# Stockholm
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Place Name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
stock_venues_sorted = pd.DataFrame(columns=columns)
stock_venues_sorted['Place Name'] = stock_group['Place Name']

for ind in np.arange(stock_group.shape[0]):
    stock_venues_sorted.iloc[ind, 1:] = return_most_common_venues(stock_group.iloc[ind, :], num_top_venues)

stock_venues_sorted.head()

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bagarmossen,Gym / Fitness Center,Grocery Store,Pizza Place,Plaza,Bus Station,Diner,Yoga Studio,Farmers Market,Concert Hall,Convenience Store
1,Bandhagen,Light Rail Station,Fast Food Restaurant,Pedestrian Plaza,Bed & Breakfast,Hotel,Flower Shop,Music Venue,Convenience Store,Café,Bus Station
2,Bromma,Bistro,Park,Soccer Field,Lake,Electronics Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Yoga Studio,Flower Shop
3,Enskede,Thai Restaurant,Supermarket,Plaza,Hardware Store,Park,Metro Station,Pizza Place,Bakery,Bus Station,Stadium
4,Enskede Gård,Café,Bus Stop,Scandinavian Restaurant,Plaza,Kebab Restaurant,Forest,Bar,Restaurant,Park,Greek Restaurant


In [350]:
# Now lets view the top ten venues for each place
# Gothenburg
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Place Name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
gothen_venues_sorted = pd.DataFrame(columns=columns)
gothen_venues_sorted['Place Name'] = gothen_group['Place Name']

for ind in np.arange(gothen_group.shape[0]):
    gothen_venues_sorted.iloc[ind, 1:] = return_most_common_venues(gothen_group.iloc[ind, :], num_top_venues)

gothen_venues_sorted.head()

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agnesberg,Construction & Landscaping,Furniture / Home Store,Bus Stop,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden
1,Angered,Construction & Landscaping,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store,Flea Market
2,Askim,Water Park,Athletics & Sports,Stadium,Bus Stop,Furniture / Home Store,Diner,Electronics Store,Fast Food Restaurant,Flea Market,Gift Shop
3,Billdal,Gym / Fitness Center,Water Park,Convenience Store,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store,Flea Market
4,Bohus,Pizza Place,Train Station,Shopping Mall,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Construction & Landscaping,Furniture / Home Store


### 3. Clustering the Neighbourhood with K-Means

In [351]:
# Stockholm
# set number of clusters
skclusters = 5

stock_group_clustering = stock_group.drop('Place Name', 1)

# run k-means clustering
stock_kmeans = KMeans(n_clusters=skclusters, random_state=0).fit(stock_group_clustering)

stock_venues_sorted.insert(0, 'Stockholm Labels', stock_kmeans.labels_)

stock_merged = stdf
#print(stock_kmeans.labels_[0:10] )

# merge grouped data with initial stockholm data
stock_merged = stock_merged.join(stock_venues_sorted.set_index('Place Name'), on='Place Name')

stock_merged.head()

Unnamed: 0,Place Name,County,Latitude,Longitude,Stockholm Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bagarmossen,Stockholm,59.2556,18.1167,3.0,Gym / Fitness Center,Grocery Store,Pizza Place,Plaza,Bus Station,Diner,Yoga Studio,Farmers Market,Concert Hall,Convenience Store
1,Bandhagen,Stockholm,59.2968,18.0313,3.0,Light Rail Station,Fast Food Restaurant,Pedestrian Plaza,Bed & Breakfast,Hotel,Flower Shop,Music Venue,Convenience Store,Café,Bus Station
2,Bromma,Stockholm,59.35,17.9167,3.0,Bistro,Park,Soccer Field,Lake,Electronics Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Yoga Studio,Flower Shop
3,Enskede,Stockholm,59.2833,18.0667,3.0,Thai Restaurant,Supermarket,Plaza,Hardware Store,Park,Metro Station,Pizza Place,Bakery,Bus Station,Stadium
4,Enskede Gård,Stockholm,59.3,18.05,3.0,Café,Bus Stop,Scandinavian Restaurant,Plaza,Kebab Restaurant,Forest,Bar,Restaurant,Park,Greek Restaurant


In [352]:
# set number of clusters
gkclusters = 5

gothen_group_clustering = gothen_group.drop('Place Name', 1)

# run k-means clustering
gothen_kmeans = KMeans(n_clusters=gkclusters, random_state=0).fit(gothen_group_clustering)

gothen_venues_sorted.insert(0, 'Gothenburg Labels', gothen_kmeans.labels_)

gothen_merged = gtdf

# merge grouped data with initial Gothenburg data
gothen_merged = gothen_merged.join(gothen_venues_sorted.set_index('Place Name'), on='Place Name')

In [353]:
gothen_merged.head(10)

Unnamed: 0,Place Name,County,Latitude,Longitude,Gothenburg Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agnesberg,Gothenburg,57.7833,12.0,0.0,Construction & Landscaping,Furniture / Home Store,Bus Stop,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden
1,Angered,Gothenburg,57.7833,12.1,0.0,Construction & Landscaping,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store,Flea Market
2,Askim,Gothenburg,57.6158,11.9456,0.0,Water Park,Athletics & Sports,Stadium,Bus Stop,Furniture / Home Store,Diner,Electronics Store,Fast Food Restaurant,Flea Market,Gift Shop
3,Asperö,Gothenburg,57.6479,11.7937,,,,,,,,,,,
4,Billdal,Gothenburg,57.5667,11.9333,3.0,Gym / Fitness Center,Water Park,Convenience Store,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store,Flea Market
5,Bohus,Gothenburg,57.85,12.0167,0.0,Pizza Place,Train Station,Shopping Mall,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Construction & Landscaping,Furniture / Home Store
6,Brännö,Gothenburg,57.6453,11.7817,0.0,Boat or Ferry,Café,Mountain,Water Park,Furniture / Home Store,Diner,Electronics Store,Fast Food Restaurant,Flea Market,Garden
7,Donsö,Gothenburg,57.6,11.7992,0.0,Pier,Convenience Store,Island,Business Service,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store
8,Gothenburg,Gothenburg,57.7072,11.9668,0.0,Hotel,Burger Joint,Coffee Shop,Café,Pub,Scandinavian Restaurant,Bookstore,Italian Restaurant,Department Store,Hot Dog Joint
9,Gunnilse,Gothenburg,57.8,12.0667,0.0,Shoe Store,Water Park,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store


#### Viewing the resultant Map

In [365]:
# Lets view the stockholm  map
# create map
stock_clusters = folium.Map(location=[s_lat,s_long], zoom_start=11)

# add markers to the map

for slat,slng,splace,scounty in zip(stock_merged['Latitude'],stock_merged['Longitude'],stock_merged['Place Name'],stock_merged['Stockholm Labels']):
    label='{}, {}'.format(splace,scounty)
    folium.CircleMarker([slat,slng],
                        radius=5,
                        popup=label,
                        color='blue',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False
                       ).add_to(stock_clusters)
stock_clusters

In [371]:
# Lets view the Gothenburg  map
# create map
gothen_clusters = folium.Map(location=[g_lat,g_long], zoom_start=10)

# add markers to the map

for nglat,nglng,ngplace,nglabel in zip(gothen_merged['Latitude'],gothen_merged['Longitude'],gothen_merged['Place Name'],gothen_merged['Gothenburg Labels']):
    nglabel='{}, {}'.format(ngplace,nglabel)
    folium.CircleMarker([nglat,nglng],
                        radius=5,
                        popup=nglabel,
                        color='red',
                        fill=True,
                        fill_color='#3186cc',
                        fill_opacity=0.7,
                        parse_html=False
                       ).add_to(gothen_clusters)
gothen_clusters



### 4. Examine the clusters

##### Stockholm Cluster 1

In [379]:
# Stockholm clusters
# Cluster 1
stock_merged.loc[stock_merged['Stockholm Labels'] == 0, stock_merged.columns[[0] + list(range(5, stock_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Vårby,Gym Pool,Pizza Place,Convenience Store,Metro Station,Cocktail Bar,Concert Hall,Convention Center,Cosmetics Shop,Deli / Bodega,Department Store


##### Stockholm Cluster 2

In [392]:
stock_merged.loc[stock_merged['Stockholm Labels'] == 2, stock_merged.columns[[0] + list(range(5, stock_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Sköndal,Assisted Living,Café,Bus Station,Park,Yoga Studio,Flower Shop,Convenience Store,Convention Center,Cosmetics Shop,Deli / Bodega


##### Stockholm Cluster 3

In [382]:
stock_merged.loc[stock_merged['Stockholm Labels'] == 3, stock_merged.columns[[0] + list(range(5, stock_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bagarmossen,Gym / Fitness Center,Grocery Store,Pizza Place,Plaza,Bus Station,Diner,Yoga Studio,Farmers Market,Concert Hall,Convenience Store
1,Bandhagen,Light Rail Station,Fast Food Restaurant,Pedestrian Plaza,Bed & Breakfast,Hotel,Flower Shop,Music Venue,Convenience Store,Café,Bus Station
2,Bromma,Bistro,Park,Soccer Field,Lake,Electronics Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Yoga Studio,Flower Shop
3,Enskede,Thai Restaurant,Supermarket,Plaza,Hardware Store,Park,Metro Station,Pizza Place,Bakery,Bus Station,Stadium
4,Enskede Gård,Café,Bus Stop,Scandinavian Restaurant,Plaza,Kebab Restaurant,Forest,Bar,Restaurant,Park,Greek Restaurant
5,Enskededalen,Hostel,Steakhouse,Hotel,Café,Modern European Restaurant,Scandinavian Restaurant,Pub,Brewery,Burger Joint,Cheese Shop
7,Hägersten,Pizza Place,Gym / Fitness Center,Pub,Beach,Bathing Area,Bakery,Bus Stop,Café,Supermarket,Golf Course
9,Johanneshov,Hostel,Steakhouse,Hotel,Café,Modern European Restaurant,Scandinavian Restaurant,Pub,Brewery,Burger Joint,Cheese Shop
10,Kista,Gym / Fitness Center,Asian Restaurant,Restaurant,Hotel,Middle Eastern Restaurant,Café,Coffee Shop,Grocery Store,American Restaurant,Kids Store
12,Skärholmen,Clothing Store,Supermarket,Playground,Fast Food Restaurant,Coffee Shop,Shopping Mall,Middle Eastern Restaurant,Plaza,Bagel Shop,Liquor Store


##### Stockholm Cluster 4

In [383]:
stock_merged.loc[stock_merged['Stockholm Labels'] == 4, stock_merged.columns[[0] + list(range(5, stock_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Farsta,Trail,Smoke Shop,Soccer Field,Forest,Yoga Studio,Diner,Farmers Market,Falafel Restaurant,Electronics Store,Department Store


#### Gothenburg clusters

##### Gothenburg Cluster 1

In [384]:
# Gothenburg clusters
gothen_merged.loc[gothen_merged['Gothenburg Labels'] == 0, gothen_merged.columns[[0] + list(range(5, gothen_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agnesberg,Construction & Landscaping,Furniture / Home Store,Bus Stop,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden
1,Angered,Construction & Landscaping,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store,Flea Market
2,Askim,Water Park,Athletics & Sports,Stadium,Bus Stop,Furniture / Home Store,Diner,Electronics Store,Fast Food Restaurant,Flea Market,Gift Shop
5,Bohus,Pizza Place,Train Station,Shopping Mall,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Construction & Landscaping,Furniture / Home Store
6,Brännö,Boat or Ferry,Café,Mountain,Water Park,Furniture / Home Store,Diner,Electronics Store,Fast Food Restaurant,Flea Market,Garden
7,Donsö,Pier,Convenience Store,Island,Business Service,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store
8,Gothenburg,Hotel,Burger Joint,Coffee Shop,Café,Pub,Scandinavian Restaurant,Bookstore,Italian Restaurant,Department Store,Hot Dog Joint
9,Gunnilse,Shoe Store,Water Park,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store
16,Styrsö,Café,Supermarket,Gift Shop,Restaurant,Scandinavian Restaurant,Water Park,Fast Food Restaurant,Design Studio,Diner,Electronics Store
18,Torslanda,Asian Restaurant,Gym / Fitness Center,Grocery Store,Soccer Field,Convenience Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store


##### Gothenburg Cluster 2

In [385]:
gothen_merged.loc[gothen_merged['Gothenburg Labels'] == 1, gothen_merged.columns[[0] + list(range(5, gothen_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Säve,Motorcycle Shop,Water Park,Hot Dog Joint,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store


##### Gothenburg Cluster 3

In [386]:
gothen_merged.loc[gothen_merged['Gothenburg Labels'] == 2, gothen_merged.columns[[0] + list(range(5, gothen_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Hovås,Massage Studio,Water Park,Hot Dog Joint,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store


##### Gothenburg Cluster 4

In [387]:
gothen_merged.loc[gothen_merged['Gothenburg Labels'] == 3, gothen_merged.columns[[0] + list(range(5, gothen_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Billdal,Gym / Fitness Center,Water Park,Convenience Store,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop,Garden,Furniture / Home Store,Flea Market


In [388]:
gothen_merged.loc[gothen_merged['Gothenburg Labels'] == 4, gothen_merged.columns[[0] + list(range(5, gothen_merged.shape[1]))]]

Unnamed: 0,Place Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Hisings Backa,Garden,Restaurant,Diner,Water Park,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop
11,Hisings Kärra,Garden,Restaurant,Diner,Water Park,Convenience Store,Gym / Fitness Center,Grocery Store,Greek Restaurant,Gourmet Shop,Gift Shop
