In [3]:
#import packages
#!pip install folium
import folium
import types
import pandas as pd
from ibm_botocore.client import Config
import ibm_boto3
import numpy as np
import requests
import json
from pandas.io.json import json_normalize
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans 
import matplotlib.cm as cm
import matplotlib.colors as colors

## Data Collection

### GeoJSON File of Munich

In [4]:
#Import GeoJSON presenting Munich per postal code
!wget --quiet https://www.dropbox.com/s/29qjo7tsseh318d/Muenchen.geojson?dl=1 -O Muenchen.json
muenchen = r'Muenchen.json'
muenchen

'Muenchen.json'

## Districts and postal codes of Munich

In [5]:
#Import file that contains the Districts (DISTRICT) of Munich and the postalcodes in each district (PLZ)
!wget --quiet https://www.dropbox.com/s/gj2zvr30px2bwta/districtplz.csv?dl=1 -O district.csv
df_district = pd.read_csv("district.csv")
df_district.count()

DISTRICT    75
PLZ         75
dtype: int64

## Criminal activities in Munich per district

In [6]:
#Import file that contains the number of criminal activities in 2017 (CRIME) per district (DISTRICT) (crime data per postal code wasn't available)
!wget --quiet https://www.dropbox.com/s/n275v92fbt135l8/crime.csv?dl=1 -O crime.csv
df_crime = pd.read_csv("crime.csv")
df_crime.count()

DISTRICT    25
CRIME       25
dtype: int64

In [7]:
#merge the two dataframes on 'DISTRICT'
df_district_crime = pd.merge(df_crime, df_district, how='inner', on=['DISTRICT'])
df_district_crime.count()

DISTRICT    75
CRIME       75
PLZ         75
dtype: int64

In [8]:
#bring the columns in the order DISTRICT, PLZ, CRIME and convert the PLZ to a string (that it can afterwards be matched with the GeoJSON file)
df_district_crime = df_district_crime[['DISTRICT', 'PLZ', 'CRIME']]
df_district_crime.PLZ = df_district_crime.PLZ.astype(str)
df_district_crime.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 75 entries, 0 to 74
Data columns (total 3 columns):
DISTRICT    75 non-null object
PLZ         75 non-null object
CRIME       75 non-null int64
dtypes: int64(1), object(2)
memory usage: 2.3+ KB


## People living in each district

In [9]:
#Import file that contains the Districts (DISTRICT) of Munich and the people that live in each district (PEOPLE)
!wget --quiet https://www.dropbox.com/s/uijul6yky5moylk/districtpeople.csv?dl=1 -O people.csv
df_district_people = pd.read_csv("people.csv")
df_district_people.PEOPLE = df_district_people.PEOPLE.astype(int)
df_district_people.count()

DISTRICT    25
PEOPLE      25
dtype: int64

In [10]:
#count columns to check afterwards if the two dataframes match exactly
df_district_crime.count()

DISTRICT    75
PLZ         75
CRIME       75
dtype: int64

In [11]:
#merge the two dataframes on 'DISTRICT'
df_district_crime_people = pd.merge(df_district_crime, df_district_people, how='inner', on=['DISTRICT'])
df_district_crime_people.count()

DISTRICT    75
PLZ         75
CRIME       75
PEOPLE      75
dtype: int64

In [12]:
df_district_crime_people.head()

Unnamed: 0,DISTRICT,PLZ,CRIME,PEOPLE
0,Altstadt-Lehel,80331,7868,20422
1,Altstadt-Lehel,80333,7868,20422
2,Altstadt-Lehel,80335,7868,20422
3,Altstadt-Lehel,80336,7868,20422
4,Altstadt-Lehel,80538,7868,20422


In [13]:
#calculate the crime per person in the district (as some districts are much larger than the rest)
df_district_crime_people['CRIME_PER_PERSON'] = df_district_crime_people['CRIME']/df_district_crime_people['PEOPLE']
df_district_crime_people.head()

Unnamed: 0,DISTRICT,PLZ,CRIME,PEOPLE,CRIME_PER_PERSON
0,Altstadt-Lehel,80331,7868,20422,0.385271
1,Altstadt-Lehel,80333,7868,20422,0.385271
2,Altstadt-Lehel,80335,7868,20422,0.385271
3,Altstadt-Lehel,80336,7868,20422,0.385271
4,Altstadt-Lehel,80538,7868,20422,0.385271


## Price in Euro per square meter in each district of Munich

In [14]:
#Import file that contains the Districts (DISTRICT) of Munich and the price in euro per square meter (EUROPERSQM)
!wget --quiet https://www.dropbox.com/s/4mglpjkisv0k19f/districtprice.csv?dl=1 -O price.csv
df_district_price = pd.read_csv("price.csv")
df_district_price.count()

DISTRICT      26
EUROPERSQM    26
dtype: int64

In [15]:
#merge the two dataframes on 'DISTRICT'
df_district_crime_people_euro = pd.merge(df_district_crime_people, df_district_price, how='inner', on=['DISTRICT'])
df_district_crime_people.count()

DISTRICT            75
PLZ                 75
CRIME               75
PEOPLE              75
CRIME_PER_PERSON    75
dtype: int64

In [16]:
df_district_crime_people_euro.head()

Unnamed: 0,DISTRICT,PLZ,CRIME,PEOPLE,CRIME_PER_PERSON,EUROPERSQM
0,Altstadt-Lehel,80331,7868,20422,0.385271,9208
1,Altstadt-Lehel,80333,7868,20422,0.385271,9208
2,Altstadt-Lehel,80335,7868,20422,0.385271,9208
3,Altstadt-Lehel,80336,7868,20422,0.385271,9208
4,Altstadt-Lehel,80538,7868,20422,0.385271,9208


In [17]:
#Import file that contains the latitude and longitude of a district of Munich
!wget --quiet https://www.dropbox.com/s/128yqau4f798qcr/latlong.csv?dl=1 -O latlong.csv
df_latlong = pd.read_csv("latlong.csv")
df_latlong.head()

Unnamed: 0,DISTRICT,LATITUDE,LONGITUDE
0,Altstadt-Lehel,48.136111,11.572222
1,Schwabing-Freimann,48.180556,11.602778
2,Ludwigsvorstadt-Isarvorstadt,48.127222,11.564722
3,Bogenhausen,48.14776,11.60161
4,Maxvorstadt,48.15,11.569444


In [18]:
#merge the two dataframes on 'DISTRICT'
df_district_crime_people_euro_latlong = pd.merge(df_district_crime_people_euro, df_latlong, how='inner', on=['DISTRICT'])
df_district_crime_people_euro_latlong.head()

Unnamed: 0,DISTRICT,PLZ,CRIME,PEOPLE,CRIME_PER_PERSON,EUROPERSQM,LATITUDE,LONGITUDE
0,Altstadt-Lehel,80331,7868,20422,0.385271,9208,48.136111,11.572222
1,Altstadt-Lehel,80333,7868,20422,0.385271,9208,48.136111,11.572222
2,Altstadt-Lehel,80335,7868,20422,0.385271,9208,48.136111,11.572222
3,Altstadt-Lehel,80336,7868,20422,0.385271,9208,48.136111,11.572222
4,Altstadt-Lehel,80538,7868,20422,0.385271,9208,48.136111,11.572222


## Get data from foursquare about elementary schools in the districts of Munich

In [19]:
# The code was removed by Watson Studio for sharing.

In [20]:
#parameters to search for elementary schools (Grundschule) on foursquare around the city of munich 
search_query = 'Grundschule'
radius = 10000
LIMIT = 100
VERSION = '20190116'
print(search_query + ' .... OK!')
district_latitude = '48.137154'
district_longitude = '11.576124'

Grundschule .... OK!


In [21]:
# The code was removed by Watson Studio for sharing.

'https://api.foursquare.com/v2/venues/search?client_id=SOEHLEM5CFWDPUKGEQYMOYAKT4TRCECAIQCC4UB5OGNPLKVK&client_secret=PONUAWQBFOGXAMMGV4ITPVMCYLI23ZZXZSOOH0C04D52RCIY&ll=48.137154,11.576124&v=20190116&query=Grundschule&radius=10000&limit=100'

In [22]:
results = requests.get(url).json()

In [23]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId
0,"[{'name': 'Elementary School', 'primary': True...",False,4dad30e5a86e7fc66e2bb2c6,Bazeillesstr. 8,DE,München,Deutschland,,1977,"[Bazeillesstr. 8, 81669 München, Deutschland]","[{'lng': 11.59611042562499, 'label': 'display'...",48.125418,11.59611,,81669,Bayern,Grundschule an der Bazeillesstraße,v-1547719717
1,"[{'name': 'Elementary School', 'primary': True...",False,5235685111d27df769ab4448,Dachauer Str. 98,DE,München,Deutschland,,2243,"[Dachauer Str. 98, 80335 München, Deutschland]","[{'lng': 11.556885028086434, 'label': 'display...",48.152688,11.556885,,80335,Bayern,Grundschule an der Dachauer Straße 98,v-1547719717
2,"[{'name': 'Elementary School', 'primary': True...",False,52357a3e11d2261da3c79e0b,Mariahilfpl. 18,DE,München,Deutschland,,1413,"[Mariahilfpl. 18, 81541 München, Deutschland]","[{'lng': 11.580468152165913, 'label': 'display...",48.124794,11.580468,,81541,Bayern,Grundschule am Mariahilfplatz,v-1547719717
3,"[{'name': 'Elementary School', 'primary': True...",False,51c7dfe0498e56f11988beeb,Türkenstr. 68,DE,München,Deutschland,,1539,"[Türkenstr. 68, 80799 München, Deutschland]","[{'lng': 11.576556907709076, 'label': 'display...",48.150982,11.576557,,80799,Bayern,Grundschule an der Türkenstraße,v-1547719717
4,"[{'name': 'Elementary School', 'primary': True...",False,52358afe11d2cc2dec13096b,Pfeuferstr. 1,DE,München,Deutschland,,2893,"[Pfeuferstr. 1, 81373 München, Deutschland]","[{'lng': 11.54015064239502, 'label': 'display'...",48.127186,11.540151,,81373,Bayern,Grundschule an der Pfeuferstraße,v-1547719717


In [24]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,Grundschule an der Bazeillesstraße,Elementary School,Bazeillesstr. 8,DE,München,Deutschland,,1977,"[Bazeillesstr. 8, 81669 München, Deutschland]","[{'lng': 11.59611042562499, 'label': 'display'...",48.125418,11.59611,,81669,Bayern,4dad30e5a86e7fc66e2bb2c6
1,Grundschule an der Dachauer Straße 98,Elementary School,Dachauer Str. 98,DE,München,Deutschland,,2243,"[Dachauer Str. 98, 80335 München, Deutschland]","[{'lng': 11.556885028086434, 'label': 'display...",48.152688,11.556885,,80335,Bayern,5235685111d27df769ab4448
2,Grundschule am Mariahilfplatz,Elementary School,Mariahilfpl. 18,DE,München,Deutschland,,1413,"[Mariahilfpl. 18, 81541 München, Deutschland]","[{'lng': 11.580468152165913, 'label': 'display...",48.124794,11.580468,,81541,Bayern,52357a3e11d2261da3c79e0b
3,Grundschule an der Türkenstraße,Elementary School,Türkenstr. 68,DE,München,Deutschland,,1539,"[Türkenstr. 68, 80799 München, Deutschland]","[{'lng': 11.576556907709076, 'label': 'display...",48.150982,11.576557,,80799,Bayern,51c7dfe0498e56f11988beeb
4,Grundschule an der Pfeuferstraße,Elementary School,Pfeuferstr. 1,DE,München,Deutschland,,2893,"[Pfeuferstr. 1, 81373 München, Deutschland]","[{'lng': 11.54015064239502, 'label': 'display'...",48.127186,11.540151,,81373,Bayern,52358afe11d2cc2dec13096b


In [25]:
#filter for those schools who are within the postal codes of the previous analyses and then count the number of schools per postal code
df_final = df_district_crime_people_euro_latlong.merge(dataframe_filtered, left_on='PLZ', right_on='postalCode' , how='right')
df_final = df_final.groupby(['PLZ']).count()
df_final = df_final.reset_index()

#filter for columns PLZ and and take any column as we used the count() method
df_final = df_final[['PLZ', 'DISTRICT']]

#rename the column to COUNT_SCHOOLS and merge again with the previous dataset
df_final = df_final.rename(columns={'DISTRICT': 'COUNT_SCHOOLS'})
df_final = pd.merge(df_district_crime_people_euro_latlong, df_final, how='left', on=['PLZ'])
df_final.count()

DISTRICT            75
PLZ                 75
CRIME               75
PEOPLE              75
CRIME_PER_PERSON    75
EUROPERSQM          75
LATITUDE            75
LONGITUDE           75
COUNT_SCHOOLS       23
dtype: int64

In [26]:
#--> in 23 out of 75 postal codesexist no elementary schools; replace nan values with 0
df_final['COUNT_SCHOOLS'].fillna(0, inplace=True)

In [27]:
df_final.count()

DISTRICT            75
PLZ                 75
CRIME               75
PEOPLE              75
CRIME_PER_PERSON    75
EUROPERSQM          75
LATITUDE            75
LONGITUDE           75
COUNT_SCHOOLS       75
dtype: int64

In [28]:
df_final.head()

Unnamed: 0,DISTRICT,PLZ,CRIME,PEOPLE,CRIME_PER_PERSON,EUROPERSQM,LATITUDE,LONGITUDE,COUNT_SCHOOLS
0,Altstadt-Lehel,80331,7868,20422,0.385271,9208,48.136111,11.572222,0.0
1,Altstadt-Lehel,80333,7868,20422,0.385271,9208,48.136111,11.572222,0.0
2,Altstadt-Lehel,80335,7868,20422,0.385271,9208,48.136111,11.572222,1.0
3,Altstadt-Lehel,80336,7868,20422,0.385271,9208,48.136111,11.572222,0.0
4,Altstadt-Lehel,80538,7868,20422,0.385271,9208,48.136111,11.572222,0.0


## Modeling (Clustering based on crime, price in euro per square meter and number of schools in Munich

In [29]:
#now I am aggregating to get the data per district. for CRIME and EUROPERSQM I will calculate the average (mean) as these are identical for each district anyway
#for COUNT_SCHOOLS I will use count() to aggregate all schools from the PLZs to district level
df_final_mean = df_final.groupby(['DISTRICT']).mean()
df_final_mean = df_final_mean.reset_index()
df_final_count = df_final.groupby(['DISTRICT']).count()
df_final_count = df_final_count.reset_index()
df_final_mean = df_final_mean[['DISTRICT', 'CRIME', 'EUROPERSQM', 'COUNT_SCHOOLS']]
df_final_mean['COUNT_SCHOOLS'] = df_final_count['COUNT_SCHOOLS']
df_for_cluster = df_final_mean
df_for_cluster.head()

Unnamed: 0,DISTRICT,CRIME,EUROPERSQM,COUNT_SCHOOLS
0,Allach-Untermenzing,889,5699,5
1,Altstadt-Lehel,7868,9208,6
2,Au-Haidhausen,3407,7872,7
3,Aubing-Lochhausen-Langwied,1533,5396,2
4,Berg am Laim,2579,5921,3


In [30]:
#normalize data for modeling
X = df_for_cluster.values[:,1:]
X = np.nan_to_num(X)
cluster_dataset = StandardScaler().fit_transform(X)
cluster_dataset



array([[-1.03940219, -0.71868426,  1.33630621],
       [ 2.03109487,  2.15831567,  2.00445931],
       [ 0.06842294,  1.06294038,  2.67261242],
       [-0.75606646, -0.96711144, -0.6681531 ],
       [-0.29586586, -0.5366683 ,  0.        ],
       [-0.44369321,  1.49502329,  0.6681531 ],
       [-0.51892676, -1.43608948, -0.6681531 ],
       [-0.74638728, -0.47927588,  0.        ],
       [-0.52728605, -0.89086151, -0.6681531 ],
       [ 3.76894605,  1.54831625, -0.6681531 ],
       [ 0.47054851,  1.14164998, -0.6681531 ],
       [ 0.25320712, -0.56536451,  0.        ],
       [-0.20919329, -0.76459819,  0.        ],
       [ 0.21097073,  0.69152944,  0.6681531 ],
       [-0.41949528, -0.99334797, -0.6681531 ],
       [-0.00505077, -0.42188347, -0.6681531 ],
       [ 0.58933833, -0.80805245,  0.        ],
       [ 0.88939264,  1.71967362,  1.33630621],
       [-0.49692865,  0.86288681,  0.        ],
       [-0.7076706 ,  0.31847873, -1.33630621],
       [-0.64563591, -0.122623  ,  0.   

In [68]:
#model data based on price in euro per square meter, crime and number of schools per district
num_clusters = 4
k_means = KMeans(init = "k-means++", n_clusters = num_clusters, n_init = 12)
k_means.fit(X)
labels = k_means.labels_

print(labels)

[1 2 0 1 3 0 1 1 1 2 0 3 3 0 1 3 3 0 0 1 1 1 3 3 1]


In [69]:
# add clustering labels
df_for_cluster['Cluster Labels'] = k_means.labels_

df_for_cluster.head()

Unnamed: 0,DISTRICT,CRIME,EUROPERSQM,COUNT_SCHOOLS,Cluster Labels
0,Allach-Untermenzing,889,5699,5,1
1,Altstadt-Lehel,7868,9208,6,2
2,Au-Haidhausen,3407,7872,7,0
3,Aubing-Lochhausen-Langwied,1533,5396,2,1
4,Berg am Laim,2579,5921,3,3


In [70]:
#merge the counted schools and cluster to the original dataset
df_for_cluster_to_merge = df_for_cluster[['DISTRICT', 'Cluster Labels', 'COUNT_SCHOOLS']]
df_district_crime_people_euro_latlong_cluster =  pd.merge(df_district_crime_people_euro_latlong, df_for_cluster_to_merge, how='inner', on=['DISTRICT'])
df_district_crime_people_euro_latlong_cluster.count()

DISTRICT            75
PLZ                 75
CRIME               75
PEOPLE              75
CRIME_PER_PERSON    75
EUROPERSQM          75
LATITUDE            75
LONGITUDE           75
Cluster Labels      75
COUNT_SCHOOLS       75
dtype: int64

In [71]:
df_district_crime_people_euro_latlong_cluster.head()

Unnamed: 0,DISTRICT,PLZ,CRIME,PEOPLE,CRIME_PER_PERSON,EUROPERSQM,LATITUDE,LONGITUDE,Cluster Labels,COUNT_SCHOOLS
0,Altstadt-Lehel,80331,7868,20422,0.385271,9208,48.136111,11.572222,2,6
1,Altstadt-Lehel,80333,7868,20422,0.385271,9208,48.136111,11.572222,2,6
2,Altstadt-Lehel,80335,7868,20422,0.385271,9208,48.136111,11.572222,2,6
3,Altstadt-Lehel,80336,7868,20422,0.385271,9208,48.136111,11.572222,2,6
4,Altstadt-Lehel,80538,7868,20422,0.385271,9208,48.136111,11.572222,2,6


## Visualization of results

In [72]:
#calculate the threshold values for the legend in the choropleth map
threshold_scale = np.linspace(df_district_crime_people_euro_latlong_cluster['EUROPERSQM'].min(),
                              df_district_crime_people_euro_latlong_cluster['EUROPERSQM'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum immigration
threshold_scale

[4824, 5700, 6577, 7454, 8331, 9209]

In [73]:
#create choropleth map visualizing the price in euro per square meter
muc_map = folium.Map(location=[48.137154, 11.576124], zoom_start=12)
muc_map.choropleth(
    geo_data=muenchen,
    data=df_district_crime_people_euro_latlong_cluster,
    columns=['PLZ', 'EUROPERSQM'],
    key_on='feature.properties.plz',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Price per square meter in Euro',
    reset=True
)



In [74]:
#add markers for each district, different colors show the differnt clusters, detailed data about crime, price and schools are labeled
x = np.arange(num_clusters)
ys = [i+x+(i*x)**2 for i in range(num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, name, cluster, crime, schools in zip(df_district_crime_people_euro_latlong_cluster['LATITUDE'], df_district_crime_people_euro_latlong_cluster['LONGITUDE'], df_district_crime_people_euro_latlong_cluster['DISTRICT'], df_district_crime_people_euro_latlong_cluster['Cluster Labels'], df_district_crime_people_euro_latlong_cluster['CRIME'], df_district_crime_people_euro_latlong_cluster['COUNT_SCHOOLS']):
    label = 'District: {}, Crime:{}, Schools: {}'.format(name, crime, schools)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(muc_map)

muc_map

## Details of Cluster to differentiate among them

In [75]:
#details on cluster 1
df_for_cluster.loc[df_for_cluster['Cluster Labels'] == 0]

Unnamed: 0,DISTRICT,CRIME,EUROPERSQM,COUNT_SCHOOLS,Cluster Labels
2,Au-Haidhausen,3407,7872,7,0
5,Bogenhausen,2243,8399,4,0
10,Maxvorstadt,4321,7968,2,0
13,Neuhausen-Nymphenburg,3731,7419,4,0
17,Schwabing-Freimann,5273,8673,5,0
18,Schwabing-West,2122,7628,3,0


In [76]:
#details on cluster 2
df_for_cluster.loc[df_for_cluster['Cluster Labels'] == 1]

Unnamed: 0,DISTRICT,CRIME,EUROPERSQM,COUNT_SCHOOLS,Cluster Labels
0,Allach-Untermenzing,889,5699,5,1
3,Aubing-Lochhausen-Langwied,1533,5396,2,1
6,Feldmoching-Hasenbergl,2072,4824,2,1
7,Hadern,1555,5991,3,1
8,Laim,2053,5489,2,1
14,Obergiesing-Fasangarten,2298,5364,2,1
19,Schwanthalerhoehe,1643,6964,1,1
20,Sendling,1784,6426,3,1
21,Sendling-Westpark,2165,6008,1,1
24,Untergiesing-Harlaching,1640,6095,1,1


In [77]:
#details on cluster 3
df_for_cluster.loc[df_for_cluster['Cluster Labels'] == 2]

Unnamed: 0,DISTRICT,CRIME,EUROPERSQM,COUNT_SCHOOLS,Cluster Labels
1,Altstadt-Lehel,7868,9208,6,2
9,Ludwigsvorstadt-Isarvorstadt,11818,8464,2,2


In [78]:
#details on cluster 4
df_for_cluster.loc[df_for_cluster['Cluster Labels'] == 3]

Unnamed: 0,DISTRICT,CRIME,EUROPERSQM,COUNT_SCHOOLS,Cluster Labels
4,Berg am Laim,2579,5921,3,3
11,Milbertshofen-Am Hart,3827,5886,3,3
12,Moosach,2776,5643,3,3
15,Pasing-Obermenzing,3240,6061,2,3
16,Ramersdorf-Perlach,4591,5590,3,3
22,Thalkirchen-Obersendling-Fuerstenried-Forstenr...,2952,5852,4,3
23,Trudering-Riem,2907,5549,2,3


In [80]:
#details on cluster 5
#df_for_cluster.loc[df_for_cluster['Cluster Labels'] == 4]