## Libraries

In [0]:
# library to handle data in a vectorized manner
import numpy as np

# library for data analsysis
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# library to handle JSON files
import urllib.request, json 

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## Extract Json url

In [0]:
#Load your json file into an object
with urllib.request.urlopen("https://offenedaten-wuppertal.de/sites/default/files/Stadtbezirke_EPSG4326_JSON.json") as url:
    wuppertal_data = json.loads(url.read().decode())

In [0]:
#Cleaning: We want to keep only data after 'features'    
neighborhoods_data = wuppertal_data['features']

In [0]:
#Create your dataframe
results = pd.DataFrame()

In [0]:
#Create a loop to extract want we want (neighborhoods, latitude and longitude)
for data in neighborhoods_data:
    
    #Determine which data ('NAME and coordinates') we are looking for in our json.file
    neighborhood_name = data['properties']['NAME']
    temp_df = pd.DataFrame(data['geometry']['coordinates'])
    
    #We have the coordinate in one single format, so we are going to modify our feature (temp_df) and split the coordinates into 2 columns: latitude and longitude
    temp_df = temp_df.T
    temp_df = pd.DataFrame(temp_df.iloc[:,0].tolist(), columns=['Longitude','Latitude']) #the json file was tricky because we have to switch order (longitude then latitude)

    #Create the name of your feature
    temp_df['Neighborhood'] = neighborhood_name

    #Update your dataframe with the new features
    results = results.append(temp_df).reset_index(drop=True)
    
results.head()

Unnamed: 0,Longitude,Latitude,Neighborhood
0,7.251019,51.291708,Langerfeld-Beyenburg
1,7.250556,51.29227,Langerfeld-Beyenburg
2,7.250052,51.292716,Langerfeld-Beyenburg
3,7.2495,51.293046,Langerfeld-Beyenburg
4,7.24902,51.293275,Langerfeld-Beyenburg


## Cleaning Data

In [0]:
#We want to reorganize the colums to have neigborhood first and keep the order after (latitude then longitude)
df_wuppertal = results[['Neighborhood','Latitude','Longitude']]
df_wuppertal.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Langerfeld-Beyenburg,51.291708,7.251019
1,Langerfeld-Beyenburg,51.29227,7.250556
2,Langerfeld-Beyenburg,51.292716,7.250052
3,Langerfeld-Beyenburg,51.293046,7.2495
4,Langerfeld-Beyenburg,51.293275,7.24902


In [0]:
#Find the mean of each neighborhoods in Wuppertal in order to have just one geodata per neighborhood
df_wuppertal = df_wuppertal.groupby('Neighborhood').mean().reset_index()
df_wuppertal

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Barmen,51.267846,7.184963
1,Cronenberg,51.205583,7.142629
2,Elberfeld,51.246547,7.14785
3,Elberfeld West,51.248559,7.109591
4,Heckinghausen,51.260607,7.211397
5,Langerfeld-Beyenburg,51.245742,7.267937
6,Oberbarmen,51.292888,7.231723
7,Ronsdorf,51.229937,7.205519
8,Uellendahl-Katernberg,51.284183,7.133142
9,Vohwinkel,51.235858,7.066885


## Mapping the data

### Define Your Coordinate

In [0]:
address = 'Wuppertal'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Wuppertal are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Wuppertal are 51.264018, 7.1780374.


### Create a Map

In [0]:
# create map of Wuppertal using latitude and longitude values
map_wuppertal = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(df_wuppertal['Latitude'], df_wuppertal['Longitude'], df_wuppertal['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_wuppertal) 

map_wuppertal

### Define Foursquare Credentials and Version 

In [0]:
CLIENT_ID = 'RDAOU3NHGSCZTE430WJBXECWE4XGYBOIIUYMAS0Y5HPHDDBE' # your Foursquare ID
CLIENT_SECRET = 'IUHKECOYMPDJHOSQK4VP0LNVKZ1Z3GYOWTAPQH1RMUBLOB2K' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RDAOU3NHGSCZTE430WJBXECWE4XGYBOIIUYMAS0Y5HPHDDBE
CLIENT_SECRET:IUHKECOYMPDJHOSQK4VP0LNVKZ1Z3GYOWTAPQH1RMUBLOB2K


In [0]:
# type your answer here
radius = 1500
LIMIT=100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)

In [0]:
import requests
results_test = requests.get(url).json()
results_test

{'meta': {'code': 200, 'requestId': '5cb5cc749fb6b7771994601d'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4faff71fe4b03d52278968eb-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/greek_',
          'suffix': '.png'},
         'id': '53d6c1b0e4b02351e88a83d6',
         'name': 'Grilled Meat Restaurant',
         'pluralName': 'Grilled Meat Restaurants',
         'primary': True,
         'shortName': 'Grilled Meat Restaurants'}],
       'id': '4faff71fe4b03d52278968eb',
       'location': {'address': 'Loher Str. 12',
        'cc': 'DE',
        'city': 'Wuppertal',
        'country': 'Deutschland',
        'distance': 336,
        'formattedAddress': ['Loher Str. 12',
         '42283 Wuppertal',
         'Deutschland'],
        'labeledLatLngs': [{'label': 'di

In [0]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [0]:
from pandas.io.json import json_normalize

venues = results_test['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Loher Grill,Grilled Meat Restaurant,51.265498,7.182242
1,akzenta,Supermarket,51.268108,7.190262
2,Opernhaus,Opera House,51.267435,7.193147
3,Hardt-Anlage,Park,51.26166,7.159438
4,Botanischer Garten,Botanical Garden,51.260751,7.160379


In [0]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

20 venues were returned by Foursquare.


In [0]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [0]:
# type your answer here

wuppertal_venues = getNearbyVenues(names=df_wuppertal['Neighborhood'],
                                   latitudes=df_wuppertal['Latitude'],
                                   longitudes=df_wuppertal['Longitude']
                                  )



Barmen
Cronenberg
Elberfeld
Elberfeld West
Heckinghausen
Langerfeld-Beyenburg
Oberbarmen
Ronsdorf
Uellendahl-Katernberg
Vohwinkel


In [0]:
print(wuppertal_venues.shape)
wuppertal_venues.head()

(207, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Barmen,51.267846,7.184963,Loher Grill,51.265498,7.182242,Grilled Meat Restaurant
1,Barmen,51.267846,7.184963,akzenta,51.268108,7.190262,Supermarket
2,Barmen,51.267846,7.184963,Opernhaus,51.267435,7.193147,Opera House
3,Barmen,51.267846,7.184963,Nordbahntrasse,51.270425,7.179859,Trail
4,Barmen,51.267846,7.184963,Extrablatt,51.271479,7.200124,Café


In [0]:
#Let's check how many venues were returned for each neighborhood
wuppertal_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Barmen,32,32,32,32,32,32
Cronenberg,7,7,7,7,7,7
Elberfeld,99,99,99,99,99,99
Elberfeld West,22,22,22,22,22,22
Heckinghausen,16,16,16,16,16,16
Langerfeld-Beyenburg,3,3,3,3,3,3
Oberbarmen,4,4,4,4,4,4
Ronsdorf,11,11,11,11,11,11
Uellendahl-Katernberg,4,4,4,4,4,4
Vohwinkel,9,9,9,9,9,9


In [0]:
#Analyze each neigh
# one hot encoding
wuppertal_onehot = pd.get_dummies(wuppertal_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
wuppertal_onehot['Neighborhood'] = wuppertal_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [wuppertal_onehot.columns[-1]] + list(wuppertal_onehot.columns[:-1])
wuppertal_onehot = wuppertal_onehot[fixed_columns]

wuppertal_onehot.head()

Unnamed: 0,Neighborhood,Antique Shop,Art Museum,Asian Restaurant,Bakery,Bar,Bistro,Bookstore,Brewery,Burger Joint,Bus Stop,Business Service,Cable Car,Café,Camera Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Stadium,Convenience Store,Department Store,Diner,Discount Store,Drugstore,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Furniture / Home Store,Gas Station,Gastropub,General Entertainment,German Restaurant,Greek Restaurant,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hardware Store,Historic Site,Hotel,IT Services,Ice Cream Shop,Insurance Office,Intersection,Italian Restaurant,Korean Restaurant,Light Rail Station,Market,Mexican Restaurant,Mobile Phone Shop,Modern European Restaurant,Multiplex,Music Venue,Nightclub,Opera House,Organic Grocery,Paper / Office Supplies Store,Park,Pharmacy,Photography Studio,Pizza Place,Plaza,Pool,Pub,Racetrack,Rest Area,Restaurant,Sandwich Place,Shopping Mall,Snack Place,Soccer Stadium,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Taverna,Thai Restaurant,Town Hall,Trail,Turkish Restaurant,Zoo
0,Barmen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Barmen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,Barmen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Barmen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,Barmen,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [0]:
#Let's examine the dataframe size
wuppertal_onehot.shape

(207, 86)

In [0]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

wuppertal_grouped = wuppertal_onehot.groupby('Neighborhood').mean().reset_index()
wuppertal_grouped

Unnamed: 0,Neighborhood,Antique Shop,Art Museum,Asian Restaurant,Bakery,Bar,Bistro,Bookstore,Brewery,Burger Joint,Bus Stop,Business Service,Cable Car,Café,Camera Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Stadium,Convenience Store,Department Store,Diner,Discount Store,Drugstore,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Furniture / Home Store,Gas Station,Gastropub,General Entertainment,German Restaurant,Greek Restaurant,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hardware Store,Historic Site,Hotel,IT Services,Ice Cream Shop,Insurance Office,Intersection,Italian Restaurant,Korean Restaurant,Light Rail Station,Market,Mexican Restaurant,Mobile Phone Shop,Modern European Restaurant,Multiplex,Music Venue,Nightclub,Opera House,Organic Grocery,Paper / Office Supplies Store,Park,Pharmacy,Photography Studio,Pizza Place,Plaza,Pool,Pub,Racetrack,Rest Area,Restaurant,Sandwich Place,Shopping Mall,Snack Place,Soccer Stadium,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Taverna,Thai Restaurant,Town Hall,Trail,Turkish Restaurant,Zoo
0,Barmen,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.03125,0.0,0.0,0.03125,0.03125,0.03125,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15625,0.0,0.0,0.0,0.03125,0.0,0.0
1,Cronenberg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Elberfeld,0.0,0.010101,0.010101,0.020202,0.040404,0.010101,0.010101,0.0,0.020202,0.0,0.0,0.0,0.070707,0.0,0.010101,0.040404,0.020202,0.030303,0.010101,0.010101,0.010101,0.0,0.0,0.040404,0.010101,0.0,0.010101,0.0,0.020202,0.0,0.0,0.0,0.020202,0.010101,0.020202,0.0,0.0,0.010101,0.010101,0.010101,0.010101,0.010101,0.0,0.060606,0.010101,0.030303,0.010101,0.0,0.050505,0.010101,0.0,0.0,0.020202,0.010101,0.010101,0.010101,0.0,0.0,0.0,0.010101,0.010101,0.020202,0.0,0.0,0.020202,0.010101,0.010101,0.020202,0.0,0.0,0.020202,0.010101,0.010101,0.0,0.0,0.010101,0.010101,0.010101,0.050505,0.010101,0.010101,0.010101,0.0,0.020202,0.0
3,Elberfeld West,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.045455,0.045455,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.045455
4,Heckinghausen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.125,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0
5,Langerfeld-Beyenburg,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Oberbarmen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
7,Ronsdorf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0
8,Uellendahl-Katernberg,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Vohwinkel,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
#Let's confirm the new size
wuppertal_grouped.shape

(10, 86)

In [0]:
#Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in wuppertal_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = wuppertal_grouped[wuppertal_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Barmen----
                  venue  freq
0           Supermarket  0.16
1                 Plaza  0.06
2  Fast Food Restaurant  0.06
3             Cable Car  0.06
4                  Café  0.06


----Cronenberg----
                venue  freq
0            Bus Stop  0.29
1               Plaza  0.14
2  Italian Restaurant  0.14
3   Food & Drink Shop  0.14
4           Drugstore  0.14


----Elberfeld----
                venue  freq
0                Café  0.07
1               Hotel  0.06
2  Italian Restaurant  0.05
3         Supermarket  0.05
4      Clothing Store  0.04


----Elberfeld West----
                  venue  freq
0                 Hotel  0.09
1           Supermarket  0.09
2                   Zoo  0.05
3    Chinese Restaurant  0.05
4  Fast Food Restaurant  0.05


----Heckinghausen----
                  venue  freq
0  Fast Food Restaurant  0.12
1             Drugstore  0.12
2                  Café  0.12
3  Gym / Fitness Center  0.06
4              Bus Stop  0.06


----Langerfeld-Be

In [0]:
#let's put into df
#First, let's write a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [0]:
#Now let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = wuppertal_grouped['Neighborhood']

for ind in np.arange(wuppertal_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(wuppertal_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Barmen,Supermarket,Fast Food Restaurant,Café,Cable Car,Plaza,Market,Opera House,Discount Store,Music Venue,Drugstore
1,Cronenberg,Bus Stop,Gas Station,Drugstore,Plaza,Italian Restaurant,Food & Drink Shop,Furniture / Home Store,Electronics Store,Event Space,Falafel Restaurant
2,Elberfeld,Café,Hotel,Supermarket,Italian Restaurant,Clothing Store,Bar,Drugstore,Coffee Shop,Ice Cream Shop,Fast Food Restaurant
3,Elberfeld West,Supermarket,Hotel,Zoo,Sandwich Place,Pizza Place,Cocktail Bar,Intersection,Cable Car,Racetrack,Light Rail Station
4,Heckinghausen,Fast Food Restaurant,Café,Drugstore,Gym / Fitness Center,Bus Stop,Diner,Clothing Store,Cable Car,Opera House,German Restaurant
5,Langerfeld-Beyenburg,Antique Shop,Farmers Market,Camera Store,Furniture / Home Store,Drugstore,Electronics Store,Event Space,Falafel Restaurant,Fast Food Restaurant,Food & Drink Shop
6,Oberbarmen,Historic Site,Photography Studio,Supermarket,Rest Area,Food & Drink Shop,Discount Store,Drugstore,Electronics Store,Event Space,Falafel Restaurant
7,Ronsdorf,Supermarket,Fast Food Restaurant,Drugstore,Pizza Place,IT Services,Ice Cream Shop,Plaza,Gas Station,Bus Stop,Light Rail Station
8,Uellendahl-Katernberg,Business Service,Bakery,Park,Pharmacy,Grocery Store,Fast Food Restaurant,Diner,Discount Store,Drugstore,Electronics Store
9,Vohwinkel,Supermarket,Gym / Fitness Center,Café,Hotel,Bakery,Event Space,Gas Station,Cable Car,Electronics Store,Falafel Restaurant


## Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters.

In [0]:
# set number of clusters
kclusters = 4

wuppertal_grouped_clustering = wuppertal_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(wuppertal_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 0, 3, 2, 1, 2], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [0]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

wuppertal_merged = df_wuppertal

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
wuppertal_merged = wuppertal_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

wuppertal_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Barmen,51.267846,7.184963,2,Supermarket,Fast Food Restaurant,Café,Cable Car,Plaza,Market,Opera House,Discount Store,Music Venue,Drugstore
1,Cronenberg,51.205583,7.142629,2,Bus Stop,Gas Station,Drugstore,Plaza,Italian Restaurant,Food & Drink Shop,Furniture / Home Store,Electronics Store,Event Space,Falafel Restaurant
2,Elberfeld,51.246547,7.14785,2,Café,Hotel,Supermarket,Italian Restaurant,Clothing Store,Bar,Drugstore,Coffee Shop,Ice Cream Shop,Fast Food Restaurant
3,Elberfeld West,51.248559,7.109591,2,Supermarket,Hotel,Zoo,Sandwich Place,Pizza Place,Cocktail Bar,Intersection,Cable Car,Racetrack,Light Rail Station
4,Heckinghausen,51.260607,7.211397,2,Fast Food Restaurant,Café,Drugstore,Gym / Fitness Center,Bus Stop,Diner,Clothing Store,Cable Car,Opera House,German Restaurant


In [0]:
#In order to save my dataframe, I've to connect the notebook to my drive and enter my token
from google.colab import drive
drive.mount('drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at drive


In [0]:
#Now, I can save my df to my drive
wuppertal_merged.to_csv('wuppertal_merged.csv')
!cp wuppertal_merged.csv drive/My\ Drive/

In [0]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(wuppertal_merged['Latitude'], wuppertal_merged['Longitude'], wuppertal_merged['Neighborhood'], wuppertal_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Map of Wuppertal neighborhoods with top 10 clustered venues

In [0]:
# create map of Wuppertal using latitude and longitude values from Nominatim
latitude= 51.264018
longitude= 7.1780374 


kclusters=4
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(wuppertal_merged['Latitude'], wuppertal_merged['Longitude'], wuppertal_merged['Neighborhood'], wuppertal_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=20,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
  # add markers for rental places to map
for lat, lng, label in zip(df_wuppertal['Latitude'], df_wuppertal['Longitude'], df_wuppertal['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters)    
    
       
map_clusters