In [9]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library
import wget

print('Libraries imported.')

Libraries imported.


## 1. Download and Explore Dataset

#### Load  the data

In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent <em>mad_explorer</em>, as shown below.

In [10]:
address = 'Madrid'

geolocator = Nominatim(user_agent="mad_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Madrid are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Madrid are 40.4167047, -3.7035825.


#### Define Foursquare Credentials and Version

In [11]:
CLIENT_ID = 'HT0JPXUKRX3VMKPHIDMUOUH0R5ERT20UJ0UGNZHO1G0ZT3WK' # your Foursquare ID
CLIENT_SECRET = '2SWM32GS2354FUW4QG2ZRV2EE3ZFGOPSMAAVIGH3KHHFU4GS' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: HT0JPXUKRX3VMKPHIDMUOUH0R5ERT20UJ0UGNZHO1G0ZT3WK
CLIENT_SECRET:2SWM32GS2354FUW4QG2ZRV2EE3ZFGOPSMAAVIGH3KHHFU4GS


#### Let's explore the first neighborhood in our dataframe.

First, let's create the GET request URL. Name your URL **url**.

In [36]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 2000 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=HT0JPXUKRX3VMKPHIDMUOUH0R5ERT20UJ0UGNZHO1G0ZT3WK&client_secret=2SWM32GS2354FUW4QG2ZRV2EE3ZFGOPSMAAVIGH3KHHFU4GS&v=20180604&ll=40.4167047,-3.7035825&radius=2000&limit=100'

Send the GET request and examine the resutls

In [37]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e917e2bc94979001bdf1320'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Sol',
  'headerFullLocation': 'Sol, Madrid',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 238,
  'suggestedBounds': {'ne': {'lat': 40.43470471800001,
    'lng': -3.6799843833873767},
   'sw': {'lat': 40.39870468199998, 'lng': -3.7271806166126233}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4adcda37f964a5201f3c21e3',
       'name': 'Puerta del Sol',
       'location': {'address': 'Pl. Puerta del Sol',
        'lat': 40.4170267569777,
        'lng': -3.703442763596807,
        'distance': 37,
        'postalCode': '28013',
        'cc': 'ES',
    

The information is in the *items* key. 

In [38]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a *pandas* dataframe.

In [39]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Puerta del Sol,Plaza,40.417027,-3.703443
1,Club del Gourmet Corte Ingles,Gourmet Shop,40.417497,-3.704686
2,Rosi La Loca,Tapas Restaurant,40.415821,-3.702955
3,La Pulpería de Victoria,Seafood Restaurant,40.416506,-3.701709
4,Apple Puerta del Sol,Electronics Store,40.416898,-3.702065


We check what type of venues we have

In [40]:
nearby_venues['categories'].unique()

array(['Plaza', 'Gourmet Shop', 'Tapas Restaurant', 'Seafood Restaurant',
       'Electronics Store', 'Mexican Restaurant', 'Cosmetics Shop',
       'Cocktail Bar', 'Spanish Restaurant', 'Argentinian Restaurant',
       'Gym', 'Pastry Shop', 'Hotel', 'Other Nightlife',
       'Italian Restaurant', 'Market', 'Hostel', 'Chocolate Shop',
       'Restaurant', 'Theater', 'Café', 'Indie Movie Theater',
       'Ice Cream Shop', 'Bistro', 'Historic Site',
       'Gym / Fitness Center', 'Bookstore', 'Monument / Landmark',
       'Performing Arts Venue', 'Pub', 'Roof Deck', 'Sushi Restaurant',
       'Opera House', 'Movie Theater', 'Art Museum', 'Coffee Shop',
       'Miscellaneous Shop', 'Church', 'Pizza Place',
       'Mediterranean Restaurant', 'Clothing Store', 'Other Event',
       'Food & Drink Shop', 'Beer Bar', 'Beer Store',
       'American Restaurant', 'Vegetarian / Vegan Restaurant', 'Road',
       'Candy Store', 'Wine Bar', 'Garden', 'Palace',
       'Peruvian Restaurant', 'Dessert S

We get a data set for the hotels in Madrid

In [41]:
hotels = nearby_venues[nearby_venues.categories == 'Hotel']
hotels

Unnamed: 0,name,categories,lat,lng
15,Hotel NH Collection Madrid Palacio de Tepa,Hotel,40.4138,-3.701676
16,The Hat Madrid,Hotel,40.414343,-3.70712
25,Gran Vía Capital,Hotel,40.420693,-3.70646
33,Gran Meliá Palacio de los Duques *****,Hotel,40.419835,-3.709494
55,Room Mate Óscar Hotel,Hotel,40.420664,-3.69917
91,Only YOU Hotel&Lounge,Hotel,40.422227,-3.695762


Now we get another data set for cultural spaces

In [42]:
List = ['Plaza', 'Theater', 'History Museum','Performing Arts Venue','Historic Site','Monument / Landmark', 'Museum','Opera House']
cultural_sites = nearby_venues.loc[nearby_venues['categories'].isin(List)]
cultural_sites

Unnamed: 0,name,categories,lat,lng
0,Puerta del Sol,Plaza,40.417027,-3.703443
5,Plaza de Santa Ana,Plaza,40.414631,-3.701033
8,Plaza Mayor,Plaza,40.415527,-3.707506
17,Plaza del Callao,Plaza,40.420145,-3.705763
26,Teatro de La Zarzuela,Theater,40.417184,-3.697055
32,Plaza de la Villa,Historic Site,40.415409,-3.710391
39,Círculo de Bellas Artes,Monument / Landmark,40.418486,-3.696612
40,Casa Patas,Performing Arts Venue,40.412677,-3.701737
46,Teatro Real de Madrid,Opera House,40.418226,-3.711064
51,Plaza de Oriente,Plaza,40.418326,-3.712196


We show the venues we have in a map

In [48]:
# create map of Madrid using latitude and longitude values
map_madrid = folium.Map(location=[latitude, longitude], zoom_start=15)

# add hotel markers to map
for lat, lng, name, in zip(hotels['lat'], hotels['lng'], hotels['name']):
    label1 = '{}'.format(name)
    label1 = folium.Popup(label1, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label1,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_madrid)  
# add cultural venues markers to map
for lat, lng, ven_names, venues in zip (cultural_sites['lat'], cultural_sites['lng'],cultural_sites['name'], cultural_sites['categories']):
    label2 = '{}'.format(ven_names, venues)
    label2 = folium.Popup(label2, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label2,
        color='red',
        fill=True,
        fill_color='#FFB0B0',
        fill_opacity=0.7,
        parse_html=False).add_to(map_madrid)  
    
map_madrid