# Applying ML to Berlin metro dataset

by Tom Fenske

### 1. Importing Libraries and Dataset

In [1]:
#Importing necessary libraries

import pandas as pd
import numpy as np
import folium
import json 
import requests
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
%matplotlib inline 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
print('Libraries successfully imported')

Libraries successfully imported


In [2]:
#Import dataset
df_metro=pd.read_csv('../../II_Datasets/Berlin_metro.csv')
print('Done')

Done


In [3]:
df_metro.head()

Unnamed: 0.1,Unnamed: 0,Station,lat,lon
0,0,Adlershof,52.434722,13.541389
1,1,Ahrensfelde,52.571667,13.565
2,2,Albrechtshof,52.549444,13.128333
3,3,Alexanderplatz,52.521389,13.411944
4,4,Alt-Reinickendorf,52.577778,13.350556


In [4]:
df_metro.drop('Unnamed: 0', axis=1, inplace=True)
df_metro.head()

Unnamed: 0,Station,lat,lon
0,Adlershof,52.434722,13.541389
1,Ahrensfelde,52.571667,13.565
2,Albrechtshof,52.549444,13.128333
3,Alexanderplatz,52.521389,13.411944
4,Alt-Reinickendorf,52.577778,13.350556


In [5]:
df_metro.shape

(172, 3)

### Creating Foursquare API call to retrieve venues around the stations

In [6]:
#API Credentials
client_ID = 'LEU5OIAP31MI5JXX1M4LGWJNMMPHZWTKHQD3W5A34FGSDPY0'
client_secret = '0EKHGNCNUD014O4USGZ03ZDFXQQEDNEHOMP0OJQSGBIDAMEY'
version='20200801'
radius=1500
limit=50
lat=52.520008
lon=13.404954


In [7]:
#URL to call API
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    client_ID, 
    client_secret, 
    version, 
    lat, 
    lon, 
    radius, 
    limit)

In [8]:
#Defining API call for nearby venues around the coordinates of each station

def getNearbyVenues(names, latitudes, longitudes, radius=1500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        #URL to call API
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_ID, 
            client_secret, 
            version, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Station', 
                  'Station Latitude', 
                  'Station Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [9]:
#Calling the API with the Toronto dataframe

Station_venues = getNearbyVenues(names=df_metro['Station'],
                                   latitudes=df_metro['lat'],
                                   longitudes=df_metro['lon']
                                  )



Adlershof
Ahrensfelde
Albrechtshof
Alexanderplatz
Alt-Reinickendorf
Altglienicke
Anhalter Bahnhof
Attilastraße
Babelsberg
Baumschulenweg
Bellevue
Bergfelde 
Bernau 
Bernau-Friedenstal
Betriebsbahnhof Rummelsburg
Betriebsbahnhof Schöneweide
Beusselstraße
Biesdorf
Birkenstein
Birkenwerder 
Blankenburg
Blankenfelde 
Borgsdorf
Bornholmer Straße
Botanischer Garten
Brandenburger Tor
Buch
Buckower Chaussee
Bundesplatz
Charlottenburg
Dahlewitz
Dreilinden
Düppel
Eichborndamm
Eichkamp
Eichwalde
Erkner
Falkensee
Feuerbachstraße
Flughafen Berlin-Brandenburg
Flughafen Berlin-Schönefeld
Frankfurter Allee
Fredersdorf 
Friedenau
Friedrichsfelde Ost
Friedrichshagen
Friedrichstraße
Frohnau
Gartenfeld
Gehrenseestraße
Gesundbrunnen
Greifswalder Straße
Griebnitzsee
Grünau
Grünbergallee
Grunewald
Hackescher Markt
Halensee
Hauptbahnhof
Heerstraße
Hegermühle
Heidelberger Platz
Heiligensee
Hennigsdorf 
Hennigsdorf Nord
Hermannstraße
Hermsdorf
Hirschgarten
Hohen Neuendorf 
Hohenschönhausen
Hohenschöpping
Hohenz

In [10]:
Station_venues.shape

(8496, 7)

In [11]:
Station_venues.head()

Unnamed: 0,Station,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Adlershof,52.434722,13.541389,Olympia Greek Food,52.433982,13.5385,Greek Restaurant
1,Adlershof,52.434722,13.541389,Griechisches Restaurant Athen,52.434901,13.54668,Greek Restaurant
2,Adlershof,52.434722,13.541389,Mia Toscana,52.438327,13.549573,Italian Restaurant
3,Adlershof,52.434722,13.541389,Food - Taste of the world on the road,52.428786,13.538168,Food Truck
4,Adlershof,52.434722,13.541389,mani mogo,52.432893,13.531991,Korean Restaurant


In [12]:
print('There are {} uniques categories.'.format(len(Station_venues['Venue Category'].unique())))

There are 383 uniques categories.


In [13]:
print('There are {} uniques stations.'.format(len(Station_venues['Station'].unique())))

There are 172 uniques stations.


In [14]:
# Applying One-Hot-Encoding in preparation of kmeans method
Station_onehot = pd.get_dummies(Station_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
Station_onehot['Station'] = Station_venues['Station'] 

# move neighbourhood column to the first column
fixed_columns = [Station_onehot.columns[-1]] + list(Station_onehot.columns[:-1])
Station_onehot = Station_onehot[fixed_columns]

Station_onehot.head()

Unnamed: 0,Station,ATM,Adult Boutique,African Restaurant,Airport,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Animal Shelter,...,Waterfront,Whisky Bar,Windmill,Wine Bar,Wine Shop,Women's Store,Yemeni Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Adlershof,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Adlershof,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Adlershof,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Adlershof,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Adlershof,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
Station_onehot.shape

(8496, 384)

In [16]:
#Grouping the venues of each neigbourhood
Station_grouped = Station_onehot.groupby('Station').mean().reset_index()
Station_grouped

Unnamed: 0,Station,ATM,Adult Boutique,African Restaurant,Airport,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Animal Shelter,...,Waterfront,Whisky Bar,Windmill,Wine Bar,Wine Shop,Women's Store,Yemeni Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Adlershof,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.000000,0.00,0.0
1,Ahrensfelde,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.000000,0.00,0.0
2,Albrechtshof,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.000000,0.00,0.0
3,Alexanderplatz,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.01,0.00,0.0,0.000000,0.00,0.0
4,Alt-Reinickendorf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.000000,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,Zehlendorf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.022727,0.00,0.0
168,Zehlendorf Süd,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.025641,0.00,0.0
169,Zepernick,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.000000,0.00,0.0
170,Zeuthen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.000000,0.00,0.0


In [17]:
Station_grouped.shape

(172, 384)

In [18]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [101]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Station']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Station_venues_sorted = pd.DataFrame(columns=columns)
Station_venues_sorted['Station'] = Station_grouped['Station']

for ind in np.arange(Station_grouped.shape[0]):
    Station_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Station_grouped.iloc[ind, :], num_top_venues)

Station_venues_sorted.head()

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adlershof,Supermarket,Café,Italian Restaurant,Tram Station,Gas Station,Drugstore,Greek Restaurant,Park,Plaza,Hotel
1,Ahrensfelde,Supermarket,Tram Station,Train Station,Skate Park,Animal Shelter,Farmers Market,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
2,Albrechtshof,Supermarket,Train Station,Discount Store,Greek Restaurant,Furniture / Home Store,Trattoria/Osteria,Automotive Shop,Bus Stop,Lake,Thai Restaurant
3,Alexanderplatz,Hotel,Clothing Store,Café,Coffee Shop,Indie Movie Theater,Italian Restaurant,Ice Cream Shop,Vietnamese Restaurant,Optical Shop,Boutique
4,Alt-Reinickendorf,Supermarket,Bus Stop,Park,Metro Station,Chinese Restaurant,Grocery Store,Turkish Restaurant,Dry Cleaner,Trattoria/Osteria,Shopping Mall


In [102]:
# set number of clusters
kclusters = 5

Station_grouped_clustering = Station_grouped.drop('Station', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Station_grouped_clustering)


# check cluster labels generated for each row in the dataframe
kmeans.labels_[:] 

array([0, 1, 4, 0, 4, 4, 0, 4, 4, 4, 0, 1, 0, 2, 0, 0, 0, 4, 4, 1, 4, 2,
       0, 0, 0, 0, 0, 4, 0, 0, 2, 4, 4, 4, 0, 4, 4, 4, 0, 3, 4, 0, 0, 0,
       4, 0, 0, 4, 4, 4, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 1, 0, 4, 4, 2, 0,
       4, 4, 0, 1, 2, 0, 1, 0, 0, 0, 1, 0, 1, 1, 4, 4, 0, 0, 2, 4, 0, 4,
       4, 1, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 1, 0, 4, 4,
       0, 0, 0, 4, 1, 4, 0, 0, 1, 0, 4, 0, 4, 0, 0, 0, 4, 0, 0, 0, 0, 4,
       4, 4, 0, 4, 0, 4, 4, 4, 0, 0, 0, 0, 2, 0, 0, 4, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 4, 4, 4, 0, 0, 4, 4, 0, 0, 0, 0, 1, 4, 0])

--------------------------------

In [21]:
#Elbow method
#distortions = []
#K = range(1,10)
#for k in K:
    #kmeans = KMeans(n_clusters=k, random_state=0).fit(Station_grouped_clustering)
    #distortions.append(kmeans.inertia_)

In [22]:
#plt.figure(figsize=(16,8))
#plt.plot(K, distortions, 'bx-')
#plt.xlabel('k')
#plt.ylabel('Distortion')
#plt.title('The Elbow Method showing the optimal k')
#plt.show()

--------------------------

In [103]:
# Add Clustr values to dataframe
Station_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
Station_venues_sorted.dropna()
Station_merged = df_metro

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Station_merged = Station_merged.join(Station_venues_sorted.set_index('Station'), on='Station')

Station_merged.head() 

Unnamed: 0,Station,lat,lon,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adlershof,52.434722,13.541389,0,Supermarket,Café,Italian Restaurant,Tram Station,Gas Station,Drugstore,Greek Restaurant,Park,Plaza,Hotel
1,Ahrensfelde,52.571667,13.565,1,Supermarket,Tram Station,Train Station,Skate Park,Animal Shelter,Farmers Market,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
2,Albrechtshof,52.549444,13.128333,4,Supermarket,Train Station,Discount Store,Greek Restaurant,Furniture / Home Store,Trattoria/Osteria,Automotive Shop,Bus Stop,Lake,Thai Restaurant
3,Alexanderplatz,52.521389,13.411944,0,Hotel,Clothing Store,Café,Coffee Shop,Indie Movie Theater,Italian Restaurant,Ice Cream Shop,Vietnamese Restaurant,Optical Shop,Boutique
4,Alt-Reinickendorf,52.577778,13.350556,4,Supermarket,Bus Stop,Park,Metro Station,Chinese Restaurant,Grocery Store,Turkish Restaurant,Dry Cleaner,Trattoria/Osteria,Shopping Mall


In [104]:
# create map
Berlin_lat=52.520008
Berlin_lon=13.404954

map_clusters = folium.Map(location=[Berlin_lat, Berlin_lon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []

for lat, lon, poi, cluster in zip(Station_merged['lat'], Station_merged['lon'], Station_merged['Station'], Station_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ': Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster],
        fill=True,
        fill_color=rainbow[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       

#map_clusters.save('Berlin_metro_k=5.html')
map_clusters

### Interpretation of Clusters 

In [105]:
#Cluster 0: touristic
Cluster_0 = Station_merged.loc[Station_merged['Cluster Labels'] == 0, 
                               Station_merged.columns[[0] + list(range(3, Station_merged.shape[1]))]]

print('The number of stations in Cluster 0 is ' + str(Cluster_0.shape[0]))
Cluster_0

The number of stations in Cluster 0 is 92


Unnamed: 0,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adlershof,0,Supermarket,Café,Italian Restaurant,Tram Station,Gas Station,Drugstore,Greek Restaurant,Park,Plaza,Hotel
3,Alexanderplatz,0,Hotel,Clothing Store,Café,Coffee Shop,Indie Movie Theater,Italian Restaurant,Ice Cream Shop,Vietnamese Restaurant,Optical Shop,Boutique
6,Anhalter Bahnhof,0,Hotel,Coffee Shop,Modern European Restaurant,Concert Hall,Science Museum,Chocolate Shop,Theater,Spa,Shopping Mall,Café
10,Bellevue,0,Hotel,Café,Zoo Exhibit,Beer Garden,Park,Restaurant,Cocktail Bar,Ice Cream Shop,Italian Restaurant,Waterfront
12,Bernau,0,Café,German Restaurant,Gas Station,Supermarket,Italian Restaurant,Platform,Greek Restaurant,Miscellaneous Shop,Gym,Gym / Fitness Center
...,...,...,...,...,...,...,...,...,...,...,...,...
165,Yorckstraße,0,Café,Italian Restaurant,Pizza Place,Park,Pastry Shop,Ice Cream Shop,Korean Restaurant,Bakery,Turkish Restaurant,Doner Restaurant
166,Yorckstraße,0,Café,Cocktail Bar,Park,Pizza Place,Italian Restaurant,Ice Cream Shop,Turkish Restaurant,Bakery,Middle Eastern Restaurant,Pastry Shop
167,Zehlendorf,0,Supermarket,Italian Restaurant,Café,Bakery,Drugstore,Doner Restaurant,German Restaurant,Mobile Phone Shop,Pet Store,Thai Restaurant
168,Zehlendorf Süd,0,Italian Restaurant,Café,Supermarket,Market,Steakhouse,Bus Stop,Park,Plaza,Thai Restaurant,Sushi Restaurant


In [107]:
#Cluster 1: residential
Cluster_1 = Station_merged.loc[Station_merged['Cluster Labels'] == 1, 
                               Station_merged.columns[[0] + list(range(3, Station_merged.shape[1]))]]

print('The number of stations in Cluster 1 is ' + str(Cluster_1.shape[0]))
Cluster_1


The number of stations in Cluster 1 is 16


Unnamed: 0,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Ahrensfelde,1,Supermarket,Tram Station,Train Station,Skate Park,Animal Shelter,Farmers Market,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
11,Bergfelde,1,Light Rail Station,Forest,Supermarket,Motorcycle Shop,Food Truck,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Fabric Shop
19,Birkenwerder,1,Supermarket,Train Station,Light Rail Station,Café,Hotel,German Restaurant,Farmers Market,Empanada Restaurant,Ethiopian Restaurant,Event Space
54,Grünbergallee,1,Supermarket,Bakery,Food Truck,Hotel,Light Rail Station,Liquor Store,Gas Station,Lake,Discount Store,Diner
60,Hegermühle,1,Supermarket,Drugstore,Electronics Store,Light Rail Station,Gas Station,Shopping Mall,Fast Food Restaurant,Farmers Market,Empanada Restaurant,Ethiopian Restaurant
69,Hohenschönhausen,1,Supermarket,Shopping Mall,Light Rail Station,Tram Station,Electronics Store,Movie Theater,Soccer Field,Farm,Gas Station,Fast Food Restaurant
72,Hoppegarten,1,Supermarket,Italian Restaurant,Racecourse,Light Rail Station,Miscellaneous Shop,Park,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Exhibit
76,Karl-Bonhoeffer-Nervenklinik,1,Supermarket,German Restaurant,Motorcycle Shop,Restaurant,Pet Store,Athletics & Sports,Big Box Store,Drugstore,Trattoria/Osteria,Pool
78,Karow,1,Supermarket,Miscellaneous Shop,Nature Preserve,Plaza,Drugstore,Doner Restaurant,Bakery,Restaurant,Ethiopian Restaurant,Exhibit
79,Kaulsdorf,1,Supermarket,Hotel,Trail,Drugstore,Asian Restaurant,Greek Restaurant,Shopping Mall,Bakery,Light Rail Station,Fast Food Restaurant


In [108]:
#Cluster 2: rural
Cluster_2 = Station_merged.loc[Station_merged['Cluster Labels'] == 2, 
                               Station_merged.columns[[0] + list(range(3, Station_merged.shape[1]))]]

print('The number of stations in Cluster 2 is ' + str(Cluster_2.shape[0]))
Cluster_2


The number of stations in Cluster 2 is 7


Unnamed: 0,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Bernau-Friedenstal,2,Gas Station,Building,Supermarket,Sushi Restaurant,Auto Dealership,Gym / Fitness Center,Greek Restaurant,Hotel,Light Rail Station,Falafel Restaurant
21,Blankenfelde,2,Train Station,Supermarket,Greek Restaurant,Outdoor Supply Store,Post Office,Gas Station,Shopping Mall,Fish & Chips Shop,Electronics Store,Empanada Restaurant
30,Dahlewitz,2,Train Station,Supermarket,Greek Restaurant,Post Office,Gas Station,Shopping Mall,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant
64,Hennigsdorf Nord,2,Supermarket,Gas Station,Fast Food Restaurant,Hotel,Zoo Exhibit,Field,Empanada Restaurant,Ethiopian Restaurant,Event Space,Exhibit
70,Hohenschöpping,2,Automotive Shop,Building,Gas Station,Playground,Zoo Exhibit,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Fabric Shop
84,Lehnitz,2,Supermarket,Basketball Stadium,Greek Restaurant,Gas Station,Water Park,Zoo Exhibit,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Exhibit
144,Teltow,2,Train Station,Zoo,Supermarket,Gas Station,Playground,Farmers Market,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space


In [109]:
#Cluster 3: Airport
Cluster_3 = Station_merged.loc[Station_merged['Cluster Labels'] == 3, 
                               Station_merged.columns[[0] + list(range(3, Station_merged.shape[1]))]]

print('The number of stations in Cluster 3 is ' + str(Cluster_3.shape[0]))
Cluster_3


The number of stations in Cluster 3 is 1


Unnamed: 0,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
39,Flughafen Berlin-Brandenburg,3,Airport Terminal,Airport,Airport Service,Train Station,Watch Shop,Field,Empanada Restaurant,Ethiopian Restaurant,Event Space,Exhibit


In [106]:
#Cluster 4: residential
Cluster_4 = Station_merged.loc[Station_merged['Cluster Labels'] == 4, 
                               Station_merged.columns[[0] + list(range(3, Station_merged.shape[1]))]]

print('The number of stations in Cluster 4 is ' + str(Cluster_4.shape[0]))
Cluster_4



The number of stations in Cluster 4 is 56


Unnamed: 0,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Albrechtshof,4,Supermarket,Train Station,Discount Store,Greek Restaurant,Furniture / Home Store,Trattoria/Osteria,Automotive Shop,Bus Stop,Lake,Thai Restaurant
4,Alt-Reinickendorf,4,Supermarket,Bus Stop,Park,Metro Station,Chinese Restaurant,Grocery Store,Turkish Restaurant,Dry Cleaner,Trattoria/Osteria,Shopping Mall
5,Altglienicke,4,Supermarket,Light Rail Station,Italian Restaurant,Discount Store,Greek Restaurant,Shopping Mall,Tram Station,Drugstore,Liquor Store,Automotive Shop
7,Attilastraße,4,Supermarket,Pool,Park,Bus Stop,Italian Restaurant,Skating Rink,Tennis Court,Mini Golf,Taverna,Furniture / Home Store
8,Babelsberg,4,Supermarket,German Restaurant,Ice Cream Shop,Hotel,Café,Pub,Trattoria/Osteria,Fast Food Restaurant,Soccer Field,Scenic Lookout
9,Baumschulenweg,4,Supermarket,Drugstore,Park,Café,Bakery,Bus Stop,Forest,Italian Restaurant,Gas Station,Trattoria/Osteria
17,Biesdorf,4,Supermarket,Italian Restaurant,Drugstore,Greek Restaurant,Light Rail Station,Liquor Store,Trail,Gym,Electronics Store,Shopping Mall
18,Birkenstein,4,Bowling Alley,Supermarket,Automotive Shop,Auto Garage,Greek Restaurant,Business Service,Light Rail Station,Drugstore,Fast Food Restaurant,Exhibit
20,Blankenburg,4,Tram Station,Supermarket,Lake,Hotel,Pet Store,Light Rail Station,Asian Restaurant,Electronics Store,Café,Greek Restaurant
27,Buckower Chaussee,4,Supermarket,Bus Stop,Fast Food Restaurant,Chinese Restaurant,Light Rail Station,Italian Restaurant,Tennis Court,Greek Restaurant,Bakery,Big Box Store


In [110]:
#Cluster 5: __
Cluster_5 = Station_merged.loc[Station_merged['Cluster Labels'] == 5, 
                               Station_merged.columns[[0] + list(range(3, Station_merged.shape[1]))]]

print('The number of stations in Cluster 5 is ' + str(Cluster_5.shape[0]))
Cluster_5


The number of stations in Cluster 5 is 0


Unnamed: 0,Station,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
