# The Battle of Neighborhoods

## 1. Setting up the environment

In [1]:
!pip install beautifulsoup4
!pip install lxml
!pip install html5lib
!pip install requests
!conda install -c conda-forge geopy --yes
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

import requests
import pandas as pd
import numpy as np
#import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
print('Libraraies imported')

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.8.2
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.

Libraraies imported



## 2. Scraping Top EU Cities Table

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_cities_in_the_European_Union_by_population_within_city_limits'
page = requests.get(url).text
soup = BeautifulSoup(page, 'lxml') # get the page html

table = soup.find_all('table')[1] # pick second table in the page
df = pd.read_html(str(table)) # read table as a list of DataFrame objects
df = df[0] # Change list of objects to a DF by picking the 1st object


# Change type from object to int/str
df = df.astype({'City': str, 'Member State': str, 'Officialpopulation': int})

df.head()

Unnamed: 0,City,Member State,Officialpopulation,Date of census,Reference,Photography
0,Berlin,Germany,3748148,31 December 2018,[1],
1,Madrid,Spain,3348536,1 February 2020,[2],
2,Rome,Italy,2856133,31 December 2018,[3],
3,Bucharest,Romania,2151665,1 January 2020,[4],
4,Paris,France,2140526,1 January 2019,[5][6],


In [3]:
## Keeping only relevant columns

df1=df[['City','Member State','Officialpopulation']]
df1.head()

Unnamed: 0,City,Member State,Officialpopulation
0,Berlin,Germany,3748148
1,Madrid,Spain,3348536
2,Rome,Italy,2856133
3,Bucharest,Romania,2151665
4,Paris,France,2140526


In [4]:
df1.shape

(93, 3)

#### We will select only those cities that have a population of more than half a million people.

In [5]:
df1 = df1.loc[df1['Officialpopulation'] > 500000] # Selecting cities with population > 0.5 million
df1

Unnamed: 0,City,Member State,Officialpopulation
0,Berlin,Germany,3748148
1,Madrid,Spain,3348536
2,Rome,Italy,2856133
3,Bucharest,Romania,2151665
4,Paris,France,2140526
5,Vienna,Austria,1911728
6,Hamburg,Germany,1841179
7,Warsaw,Poland,1790658
8,Budapest,Hungary,1752286
9,Barcelona,Spain,1620343


In [6]:
df1.shape

(53, 3)

In [7]:
# Merging City and State name and placing this information adding by a new column

df1['Location'] = df1['City'] + ', ' + df1['Member State']
df1.head()

Unnamed: 0,City,Member State,Officialpopulation,Location
0,Berlin,Germany,3748148,"Berlin, Germany"
1,Madrid,Spain,3348536,"Madrid, Spain"
2,Rome,Italy,2856133,"Rome, Italy"
3,Bucharest,Romania,2151665,"Bucharest, Romania"
4,Paris,France,2140526,"Paris, France"


## 3. Scraping Top UK Cities Table

In [8]:
url = 'https://en.wikipedia.org/wiki/List_of_cities_in_the_United_Kingdom'
page = requests.get(url).text
soup = BeautifulSoup(page, 'lxml') # get the page html

table = soup.find_all('table')[0] # pick second table in the page
df2 = pd.read_html(str(table)) # read table as a list of DataFrame objects
df2 = df2[0] # Change list of objects to a DF by picking the 1st object

df2.head()

Unnamed: 0,City[1],Year grantedor confirmed,Cathedral (pre-1889 England & Wales only),City council,Nation/Region,Image,Population
0,Aberdeen32(Scots: Aiberdeen)(Scottish Gaelic: ...,31(Burgh: 1179),not applicable,Local government district(Council area),Scotland,,18912038
1,Armagh11(Irish: Ard Mhacha)(Ulster-Scots: Airm...,,not applicable,"None. Represented on Armagh City, Banbridge an...",Northern Ireland,,"14,777 (2011)[13]"
2,Bangor1,time immemorial,Cathedral Church of St Deiniol,Community,Wales,,"18,808 (2011)[14]"
3,Bath1,,Abbey Church of SS Peter & Paul4,Charter trustees,"South West, England",,"88,859 (2011)[15]97,311 (urban area, 2010)[16]"
4,Belfast(Irish: Béal Feirste)(Ulster-Scots: Bil...,,not applicable,Local government district,Northern Ireland,,"333,871 (2011)[17]"


#### Let's start data cleaning

In [9]:
#renaming the columns
df2=df2.rename(columns={"City[1]": "City", "Nation/Region": "Member State", "Population": "Officialpopulation"})

# Keeping only relevant columns
df2=df2[['City','Member State','Officialpopulation']]

df2.head(20)

Unnamed: 0,City,Member State,Officialpopulation
0,Aberdeen32(Scots: Aiberdeen)(Scottish Gaelic: ...,Scotland,18912038
1,Armagh11(Irish: Ard Mhacha)(Ulster-Scots: Airm...,Northern Ireland,"14,777 (2011)[13]"
2,Bangor1,Wales,"18,808 (2011)[14]"
3,Bath1,"South West, England","88,859 (2011)[15]97,311 (urban area, 2010)[16]"
4,Belfast(Irish: Béal Feirste)(Ulster-Scots: Bil...,Northern Ireland,"333,871 (2011)[17]"
5,Birmingham2,"West Midlands, England","1,092,330 (2013)[18]"
6,Bradford1,"Yorkshire and the Humber, England","522,452 (2011)[19]"
7,Brighton & Hove15,"South East, England","273,369 (2011)[19]"
8,Bristol1,"South West, England","428,234 (2011)[19]"
9,Cambridge3,"East, England","123,867 (2011)[19]"


#### Cleaning the unncessary years and numbers from 'Oficialpopulation' column

In [10]:

# Importing re package for using regular expressions 
import re 
  
# Function to clean the names 
def Clean_names(Column_name): 
    # Search for opening bracket in the name followed by 
    # any characters repeated any number of times 
    if re.search('\(.*', Column_name): 
  
        # Extract the position of beginning of pattern 
        pos = re.search('\(.*', Column_name).start() 
  
        # return the cleaned name 
        return Column_name[:pos] 
  
    else: 
        # if clean up needed return the same name 
        return Column_name 
          
# Updated the city columns 
df2['Officialpopulation'] = df2['Officialpopulation'].apply(Clean_names) 
df2['City'] =df2['City'].apply(Clean_names) 
# Print the updated dataframe 
df2.head()

Unnamed: 0,City,Member State,Officialpopulation
0,Aberdeen32,Scotland,18912038
1,Armagh11,Northern Ireland,14777
2,Bangor1,Wales,18808
3,Bath1,"South West, England",88859
4,Belfast,Northern Ireland,333871


#### Extracting the second part from 'Member State'

In [11]:
# Function to clean the names 
def Clean_names(Column_name): 
    # Search for opening bracket in the name followed by 
    # any characters repeated any number of times 
    if re.search('\,.*', Column_name): 
  
        # Extract the position of beginning of pattern 
        pos = re.search('\,.*', Column_name).start() 
  
        # return the cleaned name 
        return Column_name[pos+2:] 
  
    else: 
        # if clean up needed return the same name 
        return Column_name 
          
# Updated the city columns 
df2['Member State'] =df2['Member State'].apply(Clean_names) 
# Print the updated dataframe 
df2.head()

Unnamed: 0,City,Member State,Officialpopulation
0,Aberdeen32,Scotland,18912038
1,Armagh11,Northern Ireland,14777
2,Bangor1,Wales,18808
3,Bath1,England,88859
4,Belfast,Northern Ireland,333871


#### Cleaning the unnecessary numbers from 'City' Column

In [12]:
#Function to clean the names 
def Clean_names(Column_name): 
    # Search for opening bracket in the name followed by 
    # any characters repeated any number of times 
    if re.search('\d.*', Column_name): 
  
        # Extract the position of beginning of pattern 
        pos = re.search('\d.*', Column_name).start() 
  
        # return the cleaned name 
        return Column_name[:pos] 
  
    else: 
        # if clean up needed return the same name 
        return Column_name 
          
# Updated the city columns 
df2['City'] =df2['City'].apply(Clean_names) 
# Print the updated dataframe 
df2.head()

Unnamed: 0,City,Member State,Officialpopulation
0,Aberdeen,Scotland,18912038
1,Armagh,Northern Ireland,14777
2,Bangor,Wales,18808
3,Bath,England,88859
4,Belfast,Northern Ireland,333871


#### Cleaning the rest of the unnecessary comma and bracket from 'Officialpopulation'

In [15]:
# Function to clean the names 
def Clean_names(Column_name): 
    # Search for opening bracket in the name followed by 
    # any characters repeated any number of times 
    if re.search('\,.*', Column_name): 
  
        # Extract the position of beginning of pattern 
        pos = re.search('\,.*', Column_name).start() 
  
        # return the cleaned name 
        return Column_name[:pos] + Column_name[pos+1:]
    
    if re.search('\[.*', Column_name): 
  
        # Extract the position of beginning of pattern 
        pos = re.search('\[.*', Column_name).start() 
  
        # return the cleaned name 
        return Column_name[:pos] 
  
    else: 
        # if clean up needed return the same name 
        return Column_name 
          
# Updated the city columns 
df2['Officialpopulation'] =df2['Officialpopulation'].apply(Clean_names) 
# Print the updated dataframe 
df2.head(10)

Unnamed: 0,City,Member State,Officialpopulation
0,Aberdeen,Scotland,18912038
1,Armagh,Northern Ireland,14777
2,Bangor,Wales,18808
3,Bath,England,88859
4,Belfast,Northern Ireland,333871
5,Birmingham,England,1092330
6,Bradford,England,522452
7,Brighton & Hove,England,273369
8,Bristol,England,428234
9,Cambridge,England,123867


In [16]:
#Change type from object to int/str
df2 = df2.astype({'City': str, 'Member State': str, 'Officialpopulation': int})

# Selecting cities with population > 0.5 million
df3 = df2.loc[df2['Officialpopulation'] > 500000] 
df3.head()

Unnamed: 0,City,Member State,Officialpopulation
0,Aberdeen,Scotland,18912038
5,Birmingham,England,1092330
6,Bradford,England,522452
19,Dundee,Scotland,15399038
21,Edinburgh,Scotland,46872038


In [17]:
df3.shape

(11, 3)

In [18]:
df3=df3.reset_index(drop=True)
df3

Unnamed: 0,City,Member State,Officialpopulation
0,Aberdeen,Scotland,18912038
1,Birmingham,England,1092330
2,Bradford,England,522452
3,Dundee,Scotland,15399038
4,Edinburgh,Scotland,46872038
5,Glasgow,Scotland,60308038
6,Leeds,England,751485
7,Manchester,England,503127
8,Perth,Scotland,4577038
9,Sheffield,England,552698


In [19]:
# Merging City and State name and placing this information adding by a new column

df3['Location'] = df3['City'] + ', ' + df3['Member State']
df3.head()

Unnamed: 0,City,Member State,Officialpopulation,Location
0,Aberdeen,Scotland,18912038,"Aberdeen, Scotland"
1,Birmingham,England,1092330,"Birmingham, England"
2,Bradford,England,522452,"Bradford, England"
3,Dundee,Scotland,15399038,"Dundee, Scotland"
4,Edinburgh,Scotland,46872038,"Edinburgh, Scotland"


### Merging EU & UK Dataframe

In [20]:
df_EU_UK = pd.concat([df1, df3])
df_EU_UK.tail()

Unnamed: 0,City,Member State,Officialpopulation,Location
6,Leeds,England,751485,"Leeds, England"
7,Manchester,England,503127,"Manchester, England"
8,Perth,Scotland,4577038,"Perth, Scotland"
9,Sheffield,England,552698,"Sheffield, England"
10,Stirling,Scotland,3479038,"Stirling, Scotland"


In [21]:
df_new=df_EU_UK.reset_index(drop=True)
df_new.tail()

Unnamed: 0,City,Member State,Officialpopulation,Location
59,Leeds,England,751485,"Leeds, England"
60,Manchester,England,503127,"Manchester, England"
61,Perth,Scotland,4577038,"Perth, Scotland"
62,Sheffield,England,552698,"Sheffield, England"
63,Stirling,Scotland,3479038,"Stirling, Scotland"


In [22]:
df_new.shape

(64, 4)

## 4. Adding Geographic Coordinates

In [23]:
df_new['Latitude'] = np.nan
df_new['Longitude'] = np.nan

for index, row in df_new.iterrows():
    address = df_new.Location[index]
    geolocator = Nominatim(user_agent="EU")
    loc = geolocator.geocode(address, timeout=10)
    df_new.at[index,'Latitude'] = loc.latitude
    df_new.at[index,'Longitude'] = loc.longitude

df_new.head()

Unnamed: 0,City,Member State,Officialpopulation,Location,Latitude,Longitude
0,Berlin,Germany,3748148,"Berlin, Germany",52.517037,13.38886
1,Madrid,Spain,3348536,"Madrid, Spain",40.416705,-3.703582
2,Rome,Italy,2856133,"Rome, Italy",41.89332,12.482932
3,Bucharest,Romania,2151665,"Bucharest, Romania",44.436141,26.10272
4,Paris,France,2140526,"Paris, France",48.856697,2.351462


## 5. Adding Venues for Each City

In [26]:
# FourSqaure credentials and parameters

CLIENT_ID = 'RFI3LYZIDBCYB5CVFHZ1RREZYWPJZ2NBQIHUYZU0DPZ3GJGK'
CLIENT_SECRET = '5ISB02OTKAUG1HL0QDQXZ143TV33GLN1KLMW5BMSA0EDAELS'
VERSION = '20180605'

LIMIT = 100

### Function to get venues in all neighbourhoods

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=20000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
EU_UK_venues = getNearbyVenues(names=df_new['Location'],
                                   latitudes=df_new['Latitude'],
                                   longitudes=df_new['Longitude']
                                  )
EU_UK_venues.head()

Berlin, Germany
Madrid, Spain
Rome, Italy
Bucharest, Romania
Paris, France
Vienna, Austria
Hamburg, Germany
Warsaw, Poland
Budapest, Hungary
Barcelona, Spain
Munich, Germany
Milan, Italy
Prague, Czech Republic
Sofia, Bulgaria
Cologne, Germany
Stockholm, Sweden
Naples, Italy
Turin, Italy
Amsterdam, Netherlands
Marseille, France
Zagreb, Croatia
Copenhagen, Denmark
Valencia, Spain
Kraków, Poland
Frankfurt, Germany
Seville, Spain
Łódź, Poland
Zaragoza, Spain
Athens, Greece
Palermo, Italy
Rotterdam, Netherlands
Helsinki, Finland
Wrocław, Poland
Stuttgart, Germany
Riga, Latvia
Düsseldorf, Germany
Leipzig, Germany
Dortmund, Germany
Essen, Germany
Gothenburg, Sweden
Genoa, Italy
Málaga, Spain
Bremen, Germany
Vilnius, Lithuania
Dresden, Germany
Dublin, Ireland
The Hague, Netherlands
Hanover, Germany
Poznań, Poland
Antwerp, Belgium
Nuremberg, Germany
Lyon, France
Lisbon, Portugal
Aberdeen, Scotland
Birmingham, England
Bradford, England
Dundee, Scotland
Edinburgh, Scotland
Glasgow, Scotland
Leeds

Unnamed: 0,City,City Latitude,City Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Berlin, Germany",52.517037,13.38886,Dussmann English Bookshop,52.518223,13.389239,Bookstore
1,"Berlin, Germany",52.517037,13.38886,Dussmann das KulturKaufhaus,52.518343,13.388965,Bookstore
2,"Berlin, Germany",52.517037,13.38886,Gendarmenmarkt,52.51357,13.39272,Plaza
3,"Berlin, Germany",52.517037,13.38886,Konzerthaus Berlin,52.513639,13.391795,Concert Hall
4,"Berlin, Germany",52.517037,13.38886,Lafayette Gourmet,52.514385,13.389569,Gourmet Shop


In [29]:
EU_UK_venues.shape

(6300, 7)

In [30]:
print('There are {} uniques categories.'.format(len(EU_UK_venues['Venue Category'].unique())))

There are 361 uniques categories.


## 6. Analyzing Venues in Each City

In [31]:
# one hot encoding
EU_UK_onehot = pd.get_dummies(EU_UK_venues[['Venue Category']], prefix="", prefix_sep="")

# add city column back to dataframe
EU_UK_onehot['City'] = EU_UK_venues['City'] 

# move city column to the first column
fixed_columns = [EU_UK_onehot.columns[-1]] + list(EU_UK_onehot.columns[:-1])
EU_UK_onehot = EU_UK_onehot[fixed_columns]

EU_UK_onehot.head()

Unnamed: 0,City,Accessories Store,Advertising Agency,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Antique Shop,Aquarium,Arcade,...,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Berlin, Germany",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Berlin, Germany",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Berlin, Germany",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Berlin, Germany",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Berlin, Germany",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
# Group rows by city and take mean frequency of occurence of each category

EU_UK_grouped = EU_UK_onehot.groupby('City').mean().reset_index()
EU_UK_grouped.head()

Unnamed: 0,City,Accessories Store,Advertising Agency,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Antique Shop,Aquarium,Arcade,...,Waterfall,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Aberdeen, Scotland",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Amsterdam, Netherlands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0
2,"Antwerp, Belgium",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0
3,"Athens, Greece",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0
4,"Barcelona, Spain",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0


### Function to sort venues in descending order

In [33]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Creating dataframe with Top 10 venues for each neighborhood

In [34]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] = EU_UK_grouped['City']

for ind in np.arange(EU_UK_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(EU_UK_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Aberdeen, Scotland",Bar,Beer Bar,Hotel,Beach,Coffee Shop,Restaurant,Café,Park,Department Store,Seafood Restaurant
1,"Amsterdam, Netherlands",Hotel,Coffee Shop,Cocktail Bar,Yoga Studio,Bar,Plaza,Breakfast Spot,Café,Indie Movie Theater,Restaurant
2,"Antwerp, Belgium",Coffee Shop,Cocktail Bar,Bar,Italian Restaurant,Restaurant,French Restaurant,Plaza,Clothing Store,Asian Restaurant,Seafood Restaurant
3,"Athens, Greece",Café,Coffee Shop,Historic Site,Bar,Hotel,Wine Bar,Meze Restaurant,History Museum,Boutique,Park
4,"Barcelona, Spain",Tapas Restaurant,Hotel,Coffee Shop,Plaza,Bookstore,Ice Cream Shop,Pizza Place,Cocktail Bar,Spanish Restaurant,Spa


## 7. Clustering Cities

In [35]:
# set number of clusters
kclusters = 11

EU_UK_grouped_clustering = EU_UK_grouped.drop('City', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(EU_UK_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([10,  0, 10, 10,  5,  0,  6,  6,  9,  3], dtype=int32)

### Creating dataframe that includes cluster as well as top 10 venues for each city

In [36]:
# adding clustering labels
city_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

city_venues_sorted.rename(columns={'City':'Location'}, inplace=True)

city_merged = df_new

# merge city_grouped with city_data to add latitude/longitude for each city
city_merged = city_merged.join(city_venues_sorted.set_index('Location'), on='Location')

# Change cluster float to integer
city_merged['Cluster Labels'] = city_merged['Cluster Labels'].fillna(0.0).astype(int)

city_merged.head()

Unnamed: 0,City,Member State,Officialpopulation,Location,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berlin,Germany,3748148,"Berlin, Germany",52.517037,13.38886,0,Coffee Shop,Park,Bookstore,Ice Cream Shop,Café,Bakery,Hotel,Wine Bar,Monument / Landmark,Gourmet Shop
1,Madrid,Spain,3348536,"Madrid, Spain",40.416705,-3.703582,5,Plaza,Spanish Restaurant,Hotel,Restaurant,Art Museum,Coffee Shop,Tapas Restaurant,Park,Ice Cream Shop,Monument / Landmark
2,Rome,Italy,2856133,"Rome, Italy",41.89332,12.482932,8,Plaza,Historic Site,Monument / Landmark,Ice Cream Shop,Italian Restaurant,Church,Sandwich Place,Fountain,Pizza Place,Hotel
3,Bucharest,Romania,2151665,"Bucharest, Romania",44.436141,26.10272,3,Coffee Shop,Dessert Shop,Hotel,Park,Café,Burger Joint,Tea Room,Bar,Pizza Place,Beer Garden
4,Paris,France,2140526,"Paris, France",48.856697,2.351462,7,Plaza,Hotel,Wine Bar,Bookstore,Cocktail Bar,Ice Cream Shop,Bakery,Italian Restaurant,Art Museum,Historic Site


## 8. Examining Clusters

#### Cluster 0

In [53]:
clust_0= city_merged.loc[city_merged['Cluster Labels'] == 0, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_0

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berlin,Germany,0,Coffee Shop,Park,Bookstore,Ice Cream Shop,Café,Bakery,Hotel,Wine Bar,Monument / Landmark,Gourmet Shop
6,Hamburg,Germany,0,Hotel,Café,Coffee Shop,Park,Vietnamese Restaurant,Cocktail Bar,Pizza Place,Theater,Steakhouse,Bookstore
8,Budapest,Hungary,0,Coffee Shop,Bakery,Pizza Place,Ice Cream Shop,Hotel,Dessert Shop,Italian Restaurant,Café,Park,Indie Movie Theater
11,Milan,Italy,0,Hotel,Plaza,Ice Cream Shop,Monument / Landmark,Pizza Place,Boutique,Cocktail Bar,Italian Restaurant,Café,Hostel
12,Prague,Czech Republic,0,Hotel,Park,Café,Ice Cream Shop,Coffee Shop,Burger Joint,Yoga Studio,Scenic Lookout,Garden,Indie Movie Theater
18,Amsterdam,Netherlands,0,Hotel,Coffee Shop,Cocktail Bar,Yoga Studio,Bar,Plaza,Breakfast Spot,Café,Indie Movie Theater,Restaurant
23,Kraków,Poland,0,Hotel,Café,Plaza,Park,Bar,Ice Cream Shop,Beer Bar,Bakery,Italian Restaurant,Cocktail Bar
43,Vilnius,Lithuania,0,Hotel,Coffee Shop,Café,Scenic Lookout,Wine Bar,Bar,Gym,Dessert Shop,Cocktail Bar,Plaza
52,Lisbon,Portugal,0,Plaza,Portuguese Restaurant,Scenic Lookout,Vegetarian / Vegan Restaurant,Hotel,Garden,Ice Cream Shop,Restaurant,Pizza Place,Café


In [54]:
clust_0.shape

(9, 13)

#### Cluster 1

In [55]:
clust_1=city_merged.loc[city_merged['Cluster Labels'] == 1, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_1

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Sofia,Bulgaria,1,Park,Bakery,Cocktail Bar,Coffee Shop,Restaurant,Vegetarian / Vegan Restaurant,Theater,Ice Cream Shop,Bar,Italian Restaurant
21,Copenhagen,Denmark,1,Coffee Shop,Beer Bar,Café,Park,Bakery,Wine Bar,Scandinavian Restaurant,Breakfast Spot,Cocktail Bar,Wine Shop
46,The Hague,Netherlands,1,Park,Bar,Café,Restaurant,Coffee Shop,Breakfast Spot,Italian Restaurant,Sandwich Place,Seafood Restaurant,Theater
56,Dundee,Scotland,1,Park,Café,Hotel,Coffee Shop,Beach,Golf Course,Restaurant,Bar,Fish & Chips Shop,Pub


In [56]:
clust_1.shape

(4, 13)

#### Cluster 2

In [57]:
clust_2=city_merged.loc[city_merged['Cluster Labels'] == 2, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_2

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Naples,Italy,2,Pizza Place,Plaza,Café,Italian Restaurant,Ice Cream Shop,Historic Site,Hotel,Wine Bar,Park,Bar
29,Palermo,Italy,2,Italian Restaurant,Ice Cream Shop,Pizza Place,Historic Site,Café,Bar,Plaza,Mediterranean Restaurant,Park,Beach
40,Genoa,Italy,2,Italian Restaurant,Pizza Place,Plaza,Ice Cream Shop,Hotel,Beach,Historic Site,Bar,Bakery,Scenic Lookout


In [58]:
clust_2.shape

(3, 13)

#### Cluster 3

In [59]:
clust_3=city_merged.loc[city_merged['Cluster Labels'] == 3, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_3

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Bucharest,Romania,3,Coffee Shop,Dessert Shop,Hotel,Park,Café,Burger Joint,Tea Room,Bar,Pizza Place,Beer Garden
60,Manchester,England,3,Pub,Coffee Shop,Hotel,Beer Bar,Pizza Place,Indian Restaurant,Café,Restaurant,Tapas Restaurant,Soccer Stadium
63,Stirling,Scotland,3,Coffee Shop,Pub,Supermarket,Italian Restaurant,Hotel,Café,Gastropub,Park,Bar,Indian Restaurant


In [60]:
clust_3.shape

(3, 13)

#### Cluster 4

In [61]:
clust_4=city_merged.loc[city_merged['Cluster Labels'] == 4, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_4

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,Zagreb,Croatia,4,Café,Bar,Plaza,Restaurant,BBQ Joint,Dessert Shop,Hotel,Coffee Shop,Bistro,Pub
61,Perth,Scotland,4,Café,Bar,Coffee Shop,Restaurant,American Restaurant,Park,French Restaurant,Movie Theater,Hotel,Palace


In [62]:
clust_4.shape

(2, 13)

#### Cluster 5

In [63]:
clust_5= city_merged.loc[city_merged['Cluster Labels'] == 5, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_5

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Madrid,Spain,5,Plaza,Spanish Restaurant,Hotel,Restaurant,Art Museum,Coffee Shop,Tapas Restaurant,Park,Ice Cream Shop,Monument / Landmark
9,Barcelona,Spain,5,Tapas Restaurant,Hotel,Coffee Shop,Plaza,Bookstore,Ice Cream Shop,Pizza Place,Cocktail Bar,Spanish Restaurant,Spa
22,Valencia,Spain,5,Hotel,Tapas Restaurant,Park,Plaza,Aquarium,Garden,Paella Restaurant,Ice Cream Shop,Café,Spanish Restaurant
25,Seville,Spain,5,Tapas Restaurant,Spanish Restaurant,Plaza,Hotel,Mediterranean Restaurant,Restaurant,Ice Cream Shop,Monument / Landmark,Winery,Museum
27,Zaragoza,Spain,5,Tapas Restaurant,Restaurant,Hotel,Spanish Restaurant,Plaza,Bar,Brewery,Coffee Shop,Park,Burger Joint
41,Málaga,Spain,5,Hotel,Café,Plaza,Tapas Restaurant,Art Museum,Beach,Park,Beer Bar,Coffee Shop,Ice Cream Shop


In [64]:
clust_5.shape

(6, 13)

#### Cluster 6

In [65]:
clust_6=city_merged.loc[city_merged['Cluster Labels'] == 6, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_6

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,Dublin,Ireland,6,Café,Pub,Coffee Shop,Park,Restaurant,Plaza,Hotel,Brewery,Gastropub,Soccer Stadium
54,Birmingham,England,6,Pub,Bar,Coffee Shop,Indian Restaurant,Park,Department Store,Beer Bar,Shopping Mall,Cocktail Bar,Italian Restaurant
55,Bradford,England,6,Pub,Bar,Coffee Shop,Park,Beer Bar,Indian Restaurant,Brewery,Italian Restaurant,Café,Restaurant
62,Sheffield,England,6,Pub,Park,Coffee Shop,Café,Bar,Bakery,Climbing Gym,Beer Store,Theater,Grocery Store


In [66]:
clust_6.shape

(4, 13)

#### Cluster 7

In [67]:
clust_7=city_merged.loc[city_merged['Cluster Labels'] == 7, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_7

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Paris,France,7,Plaza,Hotel,Wine Bar,Bookstore,Cocktail Bar,Ice Cream Shop,Bakery,Italian Restaurant,Art Museum,Historic Site
5,Vienna,Austria,7,Austrian Restaurant,Plaza,Hotel,Ice Cream Shop,Park,Restaurant,Italian Restaurant,Art Museum,Café,Bar
19,Marseille,France,7,French Restaurant,Hotel,Plaza,Bar,Provençal Restaurant,Historic Site,Park,Church,Italian Restaurant,Coffee Shop
51,Lyon,France,7,Bar,Plaza,French Restaurant,Lyonese Bouchon,Café,Cocktail Bar,Historic Site,Church,Burger Joint,Garden


In [68]:
clust_7.shape

(4, 13)

#### Cluster 8

In [69]:
clust_8= city_merged.loc[city_merged['Cluster Labels'] == 8, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_8

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Rome,Italy,8,Plaza,Historic Site,Monument / Landmark,Ice Cream Shop,Italian Restaurant,Church,Sandwich Place,Fountain,Pizza Place,Hotel
17,Turin,Italy,8,Plaza,Ice Cream Shop,Piedmontese Restaurant,Historic Site,Pizza Place,Café,Hotel,Japanese Restaurant,Park,Monument / Landmark


In [70]:
clust_8.shape

(2, 13)

#### Cluster 9

In [71]:
clust_9= city_merged.loc[city_merged['Cluster Labels'] == 9, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_9

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Warsaw,Poland,9,Park,Cocktail Bar,Café,Hotel,Vegetarian / Vegan Restaurant,Coffee Shop,Polish Restaurant,Plaza,Beer Bar,Ice Cream Shop
10,Munich,Germany,9,Café,Plaza,German Restaurant,Hotel,Cocktail Bar,Ice Cream Shop,Beer Garden,Italian Restaurant,Coffee Shop,Beach
14,Cologne,Germany,9,Park,Café,Hotel,Coffee Shop,Pedestrian Plaza,Scenic Lookout,Plaza,Italian Restaurant,Ice Cream Shop,Burger Joint
15,Stockholm,Sweden,9,Scandinavian Restaurant,Café,Hotel,Park,Bakery,Bookstore,Coffee Shop,Falafel Restaurant,Cocktail Bar,Plaza
24,Frankfurt,Germany,9,Café,Hotel,Park,Japanese Restaurant,Coffee Shop,Bar,Italian Restaurant,Art Museum,Ice Cream Shop,Plaza
26,Łódź,Poland,9,Hotel,Park,Coffee Shop,Italian Restaurant,Café,Bar,Theater,Restaurant,Pub,Performing Arts Venue
31,Helsinki,Finland,9,Coffee Shop,Scandinavian Restaurant,Café,Park,Hotel,French Restaurant,Art Museum,Restaurant,Asian Restaurant,Music Venue
32,Wrocław,Poland,9,Café,Park,Ice Cream Shop,Hotel,Beer Bar,Pizza Place,Pub,Bar,Coffee Shop,Vegetarian / Vegan Restaurant
33,Stuttgart,Germany,9,Bar,Park,Café,Ice Cream Shop,German Restaurant,Cocktail Bar,Italian Restaurant,Plaza,Japanese Restaurant,Scenic Lookout
34,Riga,Latvia,9,Park,Bar,Plaza,Hotel,Eastern European Restaurant,Restaurant,Beer Store,Gym,Wine Bar,Spa


In [72]:
clust_9.shape

(21, 13)

#### Cluster 10

In [73]:
clust_10=city_merged.loc[city_merged['Cluster Labels'] == 10, city_merged.columns[[0] + [1] + list(range(6, city_merged.shape[1]))]]
clust_10

Unnamed: 0,City,Member State,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,Athens,Greece,10,Café,Coffee Shop,Historic Site,Bar,Hotel,Wine Bar,Meze Restaurant,History Museum,Boutique,Park
30,Rotterdam,Netherlands,10,Hotel,Bar,Coffee Shop,Café,Ice Cream Shop,Park,Vegetarian / Vegan Restaurant,French Restaurant,Sandwich Place,Bakery
49,Antwerp,Belgium,10,Coffee Shop,Cocktail Bar,Bar,Italian Restaurant,Restaurant,French Restaurant,Plaza,Clothing Store,Asian Restaurant,Seafood Restaurant
53,Aberdeen,Scotland,10,Bar,Beer Bar,Hotel,Beach,Coffee Shop,Restaurant,Café,Park,Department Store,Seafood Restaurant
58,Glasgow,Scotland,10,Bar,Coffee Shop,Italian Restaurant,Café,Park,Indian Restaurant,Cocktail Bar,Pizza Place,Hotel,Beer Bar
59,Leeds,England,10,Bar,Coffee Shop,Pub,Café,Italian Restaurant,Cocktail Bar,Beer Bar,Shopping Mall,Thai Restaurant,Park


In [74]:
clust_10.shape

(6, 13)