In [2]:
import numpy as np 

import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 

import requests 
from pandas.io.json import json_normalize


import matplotlib.cm as cm
import matplotlib.colors as colors


from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

print('Libraries imported.')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Libraries imported.


In [3]:
# function that extracts the category of the venue from Forsquare Json fille
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
# Create URL for Forsquare API

CLIENT_ID = 'xxx' # your Foursquare ID
CLIENT_SECRET = 'xxx' # your Foursquare Secret
ACCESS_TOKEN = 'xxx' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 5000
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: xxx
CLIENT_SECRET:xxx


In [5]:
#Get coordinations
address = 'Athens, GR' # Change this

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

37.9839412 23.7283052


In [6]:
# Get Resorts and Hotels
radius = 2000

search_query = 'Resort'
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude,ACCESS_TOKEN, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()
ath1 = results['response']['venues']
ath1 = pd.json_normalize(ath1)

search_query = 'Hotel'
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude,ACCESS_TOKEN, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()
ath2 = results['response']['venues']
ath2 = pd.json_normalize(ath2)

# Make them 1 dataframe
df=pd.concat([ath1, ath2])

# Clean column labels
df.drop(["referralId",'hasPerk','location.labeledLatLngs','location.distance','location.cc',"location.neighborhood","location.address","location.formattedAddress","location.postalCode","location.city","venuePage.id","location.state","location.crossStreet","location.crossStreet","location.country"],axis=1,inplace=True)
df.reset_index(inplace=True)
df.drop(columns='index',inplace=True)
df.rename(columns={'location.lat': "lat","location.lng": "lng"},inplace=True)
df.head()

# Get categorie from the dictionary in the dataframme
df['categories'] = df.apply(get_category_type, axis=1)
df.columns = [column.split('.')[-1] for column in df.columns]

#Clean it from Dog Resorts and Sex Hotels :)
df.drop(df[df.categories != "Hotel"].index,inplace=True)
df.reset_index(inplace=True)
df.drop(columns='index',inplace=True)

df.head()

Unnamed: 0,id,name,categories,lat,lng
0,4fc8e136e4b031d74b98e3c6,Hapimag Resort Athens,Hotel,37.972182,23.732455
1,4b2971d7f964a5200a9f24e3,Polis Grand Hotel,Hotel,37.985458,23.72974
2,4adcdadef964a520ac5721e3,Titania Hotel,Hotel,37.982936,23.730753
3,57beeff9498ef6d5e7385d9d,Ambrosia Hotel & Suites ****,Hotel,37.985092,23.72697
4,4adcdadef964a520b85721e3,Dorian Inn Hotel,Hotel,37.982877,23.72541


In [7]:
# Explore the area to get venues
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
results = requests.get(url).json()
k= results['response']['groups'][0]['items']
venues = pd.json_normalize(k)
venues.head()

#Clean it
venues=venues[["venue.id","venue.name","venue.categories","venue.location.lat","venue.location.lng"]]
venues.rename(columns={'venue.id': "id","venue.name": "name","venue.categories": "categories","venue.location.lat": "lat","venue.location.lng": "lng"},inplace=True)
venues['categories'] = venues.apply(get_category_type, axis=1)
venues.columns = [column.split('.')[-1] for column in venues.columns]
venues.head()

Unnamed: 0,id,name,categories,lat,lng
0,4e822369f5b91d2e46d16f27,National Theatre of Greece (Εθνικό Θέατρο),Theater,37.984742,23.725541
1,4c038ca13f03b713b6fb5141,Στάνη Γαλακτοπωλείο,Dessert Shop,37.985463,23.727908
2,4b64103df964a520759d2ae3,Taf Coffee,Coffee Shop,37.983451,23.730853
3,56ed54a1498e1bc48d70301d,Mr Bean Coffee Brewers,Coffee Shop,37.98361,23.73125
4,4c0a3d99bbc676b0c57549d5,Πρωτοπορία,Bookstore,37.984841,23.732511


In [8]:
df_map = folium.Map(location=[latitude, longitude], zoom_start=14) # generate map centred around Athens

# add a red circle marker to the Centre
folium.CircleMarker(
    [latitude, longitude],
    radius=5,
    color='red',
    popup='Centre',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(df_map)

# add Hotels as blue circle markers
for lat, lng, label in zip(df.lat, df.lng,df.name):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(df_map)

for lat, lng, label in zip(venues.lat, venues.lng,venues.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='green',
        popup=label,
        fill = True,
        fill_color='green',
        fill_opacity=0.6
    ).add_to(df_map)

    
# display map
df_map

In [9]:
# More cleaning
hotels=df[["name","lat","lng"]]
places=venues[["name","lat","lng"]]

hotels.head()

Unnamed: 0,name,lat,lng
0,Hapimag Resort Athens,37.972182,23.732455
1,Polis Grand Hotel,37.985458,23.72974
2,Titania Hotel,37.982936,23.730753
3,Ambrosia Hotel & Suites ****,37.985092,23.72697
4,Dorian Inn Hotel,37.982877,23.72541


In [10]:
# tolist for distance calculation and df recreation

hotels_lat=hotels.lat.tolist()
hotels_lng=hotels.lng.tolist()

places_lat=places.lat.tolist()
places_lng=places.lng.tolist()

hotel_names=hotels["name"].tolist()
place_names=places["name"].tolist()

In [11]:
#calculate distance
from math import sin, cos, sqrt, atan2, radians

dinstance_array=np.zeros((len(hotels_lat), len(places_lat))) 

for i in range(len(hotels_lat)):
    
    lat1 = radians(hotels_lat[i])
    lon1 = radians(hotels_lng[i])
    
    for j in range(len(places_lat)):
        
        lat2 = radians(places_lat[j])
        lon2 = radians(places_lng[j])
        
        dlon = lon2 - lon1
        dlat = lat2 - lat1
        a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
        c = 2000 * atan2(sqrt(a), sqrt(1 - a)) #distance in meters
        dinstance_array[i][j]=c # i=hotels, j=place


In [12]:
#create df with the distances
dist_df=pd.DataFrame(dinstance_array)
dist_df.columns=place_names
dist_df.insert(0,"hotel name",hotel_names)
dist_df.head()

Unnamed: 0,hotel name,National Theatre of Greece (Εθνικό Θέατρο),Στάνη Γαλακτοπωλείο,Taf Coffee,Mr Bean Coffee Brewers,Πρωτοπορία,Της Θεάτρου Το Στέκι,Καραμανλίδικα του Φάνη,Μιράν,mama tierra,Anäna,I Cake You,2 γουλιές & 2 μπουκιές,Falafel Mohammad Ali,The Handlebar,Politeia Bookstore (Βιβλιοπωλείο Πολιτεία),The Clumsies,Montagu (Μόντακιου),Falafellas,Feyrouz,Dope Roasting Co.,Harvest,Πριγκιπώ,Crème Royale,Crust,Μπλε Παπαγάλος,Smak,Warehouse Speciality Blends,Zara,St. Eirini Square (Πλατεία Αγίας Ειρήνης),Just Made 33,Lukumades (LUKUMAΔES),Kuko's The Bar,Ιπποπόταμος,Πορτατίφ,Food Str,Εκδόσεις Ίκαρος,The Dark Side Of Chocolate,Kalimeres,Scala Vinoteca,Coffee Berry,Kaya,Choureal,Match Point,Τα Καραμανλίδικα του Φάνη - Μεζετζίδικο,Full Spoon,Grain - Espresso Bar,Αφοι Ασημακόπουλοι,Toyroom Athens,Louis Vuitton,Άριστον,360°,The Seven Jokers,Indian Masala,Kapnikareas Square (Πλατεία Καπνικαρέας),Hotel Grande Bretagne,Le Greche,Λαϊκή Αγορά Καλλιδρομίου,Χυμοποιείο,ΡΑΚΟΡ,GB Roof Garden Restaurant,Music Factory,CityLink,Temple of Hephaistos (Ναός Ηφαίστου),Désiré,Tower of the Winds - Horologion of Andronicos (Αέρηδες - Ωρολόγιο του Κυρρήστου),THIRD PLACE.,sushimou,Ancient Agora (Αρχαία Αγορά),Elvis (Έλβις),Attica,The James Joyce Irish Pub & Restaurant,Hans & Gretel,Taresso,Roman Agora (Ρωμαϊκή Αγορά),Άλσος Λυκαβηττού,Amandine Bagels & Gourmandises,Οινότυπο,Carpo,Oinoscent,Yiasemi (Γιασεμί),Ο Κώστας 1950,it,Kuzina,Flâneur Souvenirs & Supplies,Blue Fox,Syntagma Square (Πλατεία Συντάγματος),Vasilakou Theatre (Θέατρο Βασιλάκου),Κιμωλία Art Cafe,Η Ταράτσα Του Φοίβου,Nora's Deli,Ciné Paris,Άλ3ξ,Nespresso Boutique,λούης,Erechtheion (Ερέχθειο),Parthenon (Παρθενώνας),MI-RO,National Garden (Εθνικός Κήπος),Πλατεία Δεξαμενής,Yoleni's
0,Hapimag Resort Athens,0.238965,0.240088,0.197914,0.200139,0.220952,0.177631,0.167101,0.165903,0.213374,0.130848,0.223643,0.129971,0.284118,0.15054,0.171773,0.124042,0.128157,0.12496,0.12173,0.131815,0.140555,0.100992,0.144354,0.141261,0.251638,0.097131,0.217558,0.130709,0.106083,0.101367,0.106104,0.092644,0.187692,0.174915,0.10079,0.08566,0.155597,0.161605,0.182412,0.194526,0.08296,0.072418,0.340232,0.1577,0.166124,0.152751,0.244126,0.104593,0.110932,0.080551,0.119314,0.083024,0.169619,0.08874,0.080064,0.053606,0.260999,0.10877,0.255459,0.079052,0.144972,0.091983,0.160287,0.136856,0.078869,0.048448,0.039135,0.143892,0.273211,0.102719,0.156825,0.121639,0.150824,0.10376,0.240735,0.046771,0.259641,0.121009,0.032693,0.066651,0.049695,0.149528,0.162032,0.047318,0.247573,0.067737,0.307201,0.023822,0.268888,0.156621,0.021795,0.164266,0.136142,0.279932,0.083308,0.078182,0.166821,0.0703,0.178201,0.131678
1,Polis Grand Hotel,0.059097,0.025197,0.038234,0.038379,0.03961,0.102286,0.107356,0.106961,0.038927,0.109278,0.036954,0.105974,0.062711,0.120065,0.094905,0.112936,0.109191,0.124603,0.130971,0.131114,0.104179,0.136302,0.118073,0.14214,0.141232,0.140365,0.100833,0.10611,0.144327,0.139687,0.152033,0.153708,0.112224,0.127565,0.142818,0.152291,0.131187,0.145621,0.132095,0.097105,0.157618,0.162336,0.108935,0.174818,0.176521,0.137574,0.116532,0.160037,0.163786,0.156896,0.16857,0.154757,0.178373,0.16029,0.179197,0.182497,0.119726,0.171826,0.195306,0.180104,0.183105,0.15807,0.206267,0.159696,0.202519,0.188967,0.196537,0.2067,0.198218,0.154,0.177857,0.1815,0.173267,0.200527,0.159359,0.191311,0.13285,0.188517,0.202025,0.210239,0.185018,0.177196,0.190337,0.212647,0.149497,0.186005,0.211069,0.212029,0.230204,0.201782,0.237056,0.202627,0.211018,0.213337,0.24043,0.243574,0.214721,0.22568,0.202961,0.185396
2,Titania Hotel,0.078316,0.058955,0.009089,0.013603,0.041119,0.083383,0.083999,0.08301,0.032771,0.068568,0.042207,0.062491,0.10421,0.091283,0.060001,0.069988,0.063027,0.086756,0.093496,0.097147,0.066776,0.092691,0.086496,0.112419,0.147224,0.096725,0.086898,0.059938,0.104684,0.097425,0.113705,0.112238,0.084953,0.095969,0.101235,0.106136,0.09422,0.122094,0.102762,0.072848,0.111451,0.116739,0.151846,0.149833,0.153494,0.100222,0.111334,0.115611,0.120302,0.11077,0.134786,0.108612,0.156101,0.119043,0.133594,0.136563,0.120819,0.135983,0.193243,0.134472,0.155016,0.112351,0.180389,0.11991,0.162924,0.144292,0.150706,0.177606,0.20049,0.108997,0.152571,0.148573,0.136072,0.164568,0.14523,0.145264,0.130224,0.146898,0.156465,0.169288,0.13953,0.13974,0.165707,0.169708,0.139196,0.140024,0.220231,0.166239,0.22604,0.164874,0.192501,0.16689,0.171012,0.214765,0.201088,0.203561,0.178808,0.180224,0.169564,0.145147
3,Ambrosia Hotel & Suites ****,0.020583,0.014439,0.060606,0.064317,0.076348,0.079613,0.087532,0.087681,0.07341,0.106915,0.074183,0.108373,0.046629,0.103438,0.122821,0.113709,0.121329,0.116978,0.122353,0.118521,0.098241,0.136638,0.10429,0.124505,0.10337,0.14053,0.137304,0.117948,0.137579,0.13727,0.143055,0.149174,0.144511,0.157933,0.139071,0.161432,0.158064,0.12249,0.16371,0.1303,0.167909,0.167113,0.126196,0.151373,0.151509,0.164177,0.154735,0.177819,0.183355,0.165362,0.153422,0.163607,0.152737,0.155068,0.193328,0.188506,0.158351,0.1588,0.158857,0.194103,0.161895,0.172048,0.18247,0.18412,0.19317,0.189742,0.201573,0.185582,0.160605,0.170464,0.154529,0.164896,0.200108,0.186542,0.196486,0.198023,0.17136,0.210755,0.205568,0.20274,0.188661,0.203823,0.166047,0.208668,0.187263,0.198036,0.172483,0.216515,0.194072,0.228917,0.236412,0.230707,0.235165,0.175808,0.229755,0.233823,0.242689,0.239873,0.232715,0.209299
4,Dorian Inn Hotel,0.032604,0.056727,0.075536,0.081353,0.103524,0.037946,0.047507,0.048114,0.097605,0.08213,0.102522,0.088633,0.08459,0.064792,0.1316,0.091402,0.110801,0.085479,0.089413,0.082238,0.070889,0.111418,0.068094,0.083968,0.073724,0.114814,0.159329,0.107516,0.105282,0.109187,0.108429,0.11823,0.158101,0.167632,0.109538,0.14376,0.162572,0.079071,0.175106,0.146349,0.1511,0.144374,0.170338,0.107597,0.107415,0.167918,0.18157,0.169181,0.176635,0.14669,0.113628,0.14547,0.108567,0.123163,0.17952,0.165958,0.188444,0.120319,0.122512,0.180105,0.118753,0.159198,0.138492,0.183731,0.156476,0.16189,0.177479,0.142298,0.127928,0.160659,0.110765,0.123939,0.202426,0.146776,0.218321,0.175802,0.199745,0.206241,0.179828,0.167237,0.163676,0.205679,0.122027,0.175892,0.211462,0.181604,0.146736,0.191345,0.156761,0.230653,0.205862,0.233828,0.232399,0.142574,0.191458,0.196095,0.245297,0.224394,0.238436,0.207089


In [13]:
# set number of clusters
kclusters = 3

dist_df_clustering = dist_df.drop('hotel name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dist_df_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:len(hotels_lat)-1] 

array([2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 2, 0, 1, 1, 0, 1, 2, 0, 2, 1, 2, 2, 2, 1])

In [14]:
hotels.insert(1, 'cluster', kmeans.labels_)

hotels

Unnamed: 0,name,cluster,lat,lng
0,Hapimag Resort Athens,2,37.972182,23.732455
1,Polis Grand Hotel,0,37.985458,23.72974
2,Titania Hotel,0,37.982936,23.730753
3,Ambrosia Hotel & Suites ****,0,37.985092,23.72697
4,Dorian Inn Hotel,0,37.982877,23.72541
5,Hotel Ritsi,0,37.98406,23.731205
6,Athens Center Square Hotel,0,37.980447,23.726258
7,Iniohos Hotel,0,37.985746,23.727555
8,Diros Hotel,0,37.984466,23.725862
9,Hotel Grande Bretagne,2,37.976178,23.735312


In [20]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=14)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


for lat, lng, label in zip(venues.lat, venues.lng,venues.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        #color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.2
    ).add_to(map_clusters)

    
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(hotels['lat'], hotels['lng'], hotels['name'], hotels['cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.9).add_to(map_clusters)
       
map_clusters

In [16]:
cluster0= hotels[hotels['cluster']==0]["name"].tolist()
cluster1= hotels[hotels['cluster']==1]["name"].tolist()
cluster2= hotels[hotels['cluster']==2]["name"].tolist()

In [17]:
cluster0 # In between hotels

['Polis Grand Hotel',
 'Titania Hotel',
 'Ambrosia Hotel & Suites ****',
 'Dorian Inn Hotel',
 'Hotel Ritsi',
 'Athens Center Square Hotel',
 'Iniohos Hotel',
 'Diros Hotel',
 'Parnon Hotel',
 'Vienna Hotel',
 'Fresh Hotel',
 'Elite Hotel',
 'Ionis Hotel',
 'Epidavros Hotel',
 'Delphi Art Hotel',
 'Elysium Hotel',
 'CHIC Hotel',
 'Joker Hotel',
 'Marina Hotel',
 'Achillion Hotel',
 'Best Western My Athens Hotel',
 'Sparta Team Hotel',
 'Stalis Hotel',
 'Exarchion Hotel',
 'King Jason Hotel']

In [18]:
cluster1 # Off the beaten path

['Katerina Hotel',
 'Apollo Hotel',
 'Best Western Museum Hotel',
 'Art Hotel',
 'Moka Hotel',
 'Nafsika Hotel',
 'Novus City Hotel',
 'Delta Athens Hotel',
 'Radisson Blu Park Hotel',
 'Rio Hotel',
 'Oscar Hotel']

In [19]:
cluster2 # On the beaten path

['Hapimag Resort Athens',
 'Hotel Grande Bretagne',
 'NJV Athens Plaza Hotel',
 'Cecil Hotel',
 'Amalia Hotel',
 'Central Athens Hotel',
 'Carolina Hotel',
 'Tempi Hotel']