# Tourist Advisor
Tool that helps with the Tourist on the trip plan.  Takes the City to be visited as the input.  Finds the list of popular places to visit in the City.  Chart out a day to day itinerary.

In [None]:
import requests
import pandas as pd
pd.set_option('display.max_rows', 500)

!conda install -c conda-forge geocoder -y
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np

print('Libraries imported.')

In [2]:
CLIENT_ID = 'LSC31VKD4CEXLCRBGKZXS1ZGXYVWG2ANHWRG0XKS1WEPBZQG' # your Foursquare ID
CLIENT_SECRET = '11RZFCKAHBOEKJCWFJHDQALNAIJFNBCURFYL5MZAAP40PUXA' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: LSC31VKD4CEXLCRBGKZXS1ZGXYVWG2ANHWRG0XKS1WEPBZQG
CLIENT_SECRET:11RZFCKAHBOEKJCWFJHDQALNAIJFNBCURFYL5MZAAP40PUXA


### Step 0:  Input the City being visited and the number of days of stay

In [3]:
address = 'Bangalore, India'
duration = 3   # 3 days

### Step 1:  Using geocoders Nominatim MAP API find the latitude and longitude of the City

In [4]:
geolocator = Nominatim()
location = geolocator.geocode(address)
lat = location.latitude
lng = location.longitude
print('The geograpical coordinate of Bangalore are {}, {}.'.format(lat, lng))



The geograpical coordinate of Bangalore are 12.9791198, 77.5912997.


### Step 2: Using Foursqaure Venues/Categories API get the list of supported venue categories.

In [5]:
cat_url = 'https://api.foursquare.com/v2/venues/categories?&client_id={}&client_secret={}&v={}'.format(CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION)
cat_result = requests.get(cat_url).json()

### Step 3:  Filter tourist attractive categories from the response

In [42]:
tourism_categories = ['Theme Park', 'Park', 'National Park', 'Botanical Garden', 'Museum', 'Palace', 'Temple', 
                      'Aquarium', 'Planetarium', 'Zoo', 'Monument / Landmark', 'Capitol Building', 'Spiritual Center']  # Only a limited list is taken for this demo

In [61]:
fs_tourism_cat_list = []
fs_tourism_cat_id = []
for c1 in cat_result['response']['categories']:
    if c1['name'] in tourism_categories:
        fs_tourism_cat_list.append([c1['name'],c1['id']])
        fs_tourism_cat_id.append(c1['id'])
    for c2 in c1['categories']:
        if c2['name'] in tourism_categories:
                fs_tourism_cat_list.append([c2['name'],c2['id']])
                fs_tourism_cat_id.append(c2['id'])
        if (len(c2['categories']) != 0):
            for c3 in c2['categories']:
                if c3['name'] in tourism_categories:
                    fs_tourism_cat_list.append([c3['name'],c3['id']])
                    fs_tourism_cat_id.append(c3['id'])
                

### Step 3:  List the tourism categories, so that the user can select the categories of interested to him

#### This step is ommitted in this implementation.  Instead all the filtered tourism categories are used.

### Step 4: Using the Foursqaure Venues/explore API get the list of Venues matching the tourism categories.

In [62]:
# Parameters for the foursquare APIs
LIMIT=100
radius = 500000  # 500km

In [64]:
results = []
for index, cid in enumerate(fs_tourism_cat_id):
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            cid,
            radius, 
            LIMIT)
    results.append(requests.get(url).json())

#### Step 5: Convert the response received in the json format to Dataframe with columns, Venue Name, ID, Category, Latitude and Longitude. 

In [65]:
venues_list=[]
for res in results:
    if (len(res['response'])  != 0):
        for v in res['response']['venues']:
            # return only relevant information for each nearby venue
            if (len(v['categories']) != 0):
                venues_list.append((
                    v['name'],         
                    v['location']['lat'], 
                    v['location']['lng'],
                    v['categories'][0]['name'],
                    v['categories'][0]['id']))

In [67]:
travel_venues = pd.DataFrame(venues_list)
travel_venues.columns = ['Name', 'Latitude', 'Longtitude', 'Category', 'Id']

### Step 6: Using the Foursquare Venues/VenueID API get the details of each travel venue.  

### Step 7: Add the columns 'Rating' and 'Likes' to the dataframe based on the values received from the Venue details

### Step 8: Sort the dataframe based on the Rating and Likes.  Pick up top N travel venues.  

##### The above steps are ommitted for this implementation.  As they require multiple calls to the venue/venueID api and venue/venueID calls are limitted 50 per day in Sandbox account. Hence the top travel venues are picked up based on the below list.

In [68]:
top_travel_venues = ['Cubbon Park', 'Lalbagh Botanical Garden', 'Bangalore Fort', "Tipu's Summer Palace", 'Bangalore Palace', 'Vidhana Soudha', 'Government Museum', 
                    'Heritage Centre & Aerospace Museum', 'Visvesvaraya Industrial and Technological Museum', 'Jawaharlal Nehru Planetarium', 'Bannerghatta National Park', 'Butterfly Park',
                    'Bangalore Aquarium', 'Wonder la Amusement Park', 'Innovative Film City', 'ISKCON Bangalore', 'Bull Temple', 'Art of Living International Center']

In [69]:
top_venues = pd.DataFrame(columns=['Name', 'Category', 'Id', 'Latitude', 'Longtitude'])
for index, v in travel_venues.iterrows():
    if v['Name'] in top_travel_venues:
        top_venues = top_venues.append(v, ignore_index=True)
top_venues.drop_duplicates('Name', keep='first', inplace=True)
top_venues.reset_index(inplace=True)
top_venues

Unnamed: 0,index,Name,Category,Id,Latitude,Longtitude
0,0,Government Museum,History Museum,4bf58dd8d48988d190941735,12.97228,77.60425
1,1,Visvesvaraya Industrial and Technological Museum,Science Museum,4bf58dd8d48988d191941735,12.974884,77.596526
2,2,Heritage Centre & Aerospace Museum,History Museum,4bf58dd8d48988d190941735,12.95481,77.680989
3,3,Jawaharlal Nehru Planetarium,Planetarium,4bf58dd8d48988d192941735,12.98457,77.590163
4,5,Wonder la Amusement Park,Theme Park Ride / Attraction,5109983191d435c0d71c2bb1,12.834399,77.414302
5,6,Innovative Film City,Theme Park,4bf58dd8d48988d182941735,12.929113,77.518061
6,8,Bannerghatta National Park,Zoo,4bf58dd8d48988d17b941735,12.801465,77.576233
7,9,Lalbagh Botanical Garden,Botanical Garden,52e81612bcbc57f1066b7a22,12.948948,77.586951
8,11,Cubbon Park,Park,4bf58dd8d48988d163941735,12.977042,77.595277
9,12,Tipu's Summer Palace,Park,4bf58dd8d48988d163941735,12.959561,77.573744


### Step 9: Form a new dataframe by dropping Id, Category, Rating and Likes columns.

In [70]:
top_venues_clustering = top_venues.drop(['index', 'Name', 'Category', 'Id'], 1)
top_venues_clustering

Unnamed: 0,Latitude,Longtitude
0,12.97228,77.60425
1,12.974884,77.596526
2,12.95481,77.680989
3,12.98457,77.590163
4,12.834399,77.414302
5,12.929113,77.518061
6,12.801465,77.576233
7,12.948948,77.586951
8,12.977042,77.595277
9,12.959561,77.573744


### Step 10:  Use K-Means clustering to segment venues based on the Latitude and Longitude values

In [74]:
# set number of clusters
kclusters = 3

# run k-means clustering
kmeans = KMeans(init = "k-means++", n_clusters=kclusters, n_init = 12).fit(top_venues_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int32)

In [76]:
venues_grouped = top_venues.drop(['index', 'Id'], axis=1)

# add clustering labels
venues_grouped['Cluster Labels'] = kmeans.labels_
venues_grouped 

Unnamed: 0,Name,Category,Latitude,Longtitude,Cluster Labels
0,Government Museum,History Museum,12.97228,77.60425,0
1,Visvesvaraya Industrial and Technological Museum,Science Museum,12.974884,77.596526,0
2,Heritage Centre & Aerospace Museum,History Museum,12.95481,77.680989,0
3,Jawaharlal Nehru Planetarium,Planetarium,12.98457,77.590163,0
4,Wonder la Amusement Park,Theme Park Ride / Attraction,12.834399,77.414302,2
5,Innovative Film City,Theme Park,12.929113,77.518061,0
6,Bannerghatta National Park,Zoo,12.801465,77.576233,1
7,Lalbagh Botanical Garden,Botanical Garden,12.948948,77.586951,0
8,Cubbon Park,Park,12.977042,77.595277,0
9,Tipu's Summer Palace,Park,12.959561,77.573744,0


### Step 11:  Display the list of venues (Itinerary for each day) 

#### Day1 itinerary

In [77]:
venues_grouped.loc[venues_grouped['Cluster Labels'] == 0, ['Name', 'Category']]

Unnamed: 0,Name,Category
0,Government Museum,History Museum
1,Visvesvaraya Industrial and Technological Museum,Science Museum
2,Heritage Centre & Aerospace Museum,History Museum
3,Jawaharlal Nehru Planetarium,Planetarium
5,Innovative Film City,Theme Park
7,Lalbagh Botanical Garden,Botanical Garden
8,Cubbon Park,Park
9,Tipu's Summer Palace,Park
11,Vidhana Soudha,Capitol Building
12,Bangalore Palace,Monument / Landmark


#### Day2 itinerary

In [78]:
venues_grouped.loc[venues_grouped['Cluster Labels'] == 1, ['Name', 'Category']]

Unnamed: 0,Name,Category
6,Bannerghatta National Park,Zoo
10,Butterfly Park,Park


#### Day3 itinerary

In [79]:
venues_grouped.loc[venues_grouped['Cluster Labels'] == 2, ['Name', 'Category']]

Unnamed: 0,Name,Category
4,Wonder la Amusement Park,Theme Park Ride / Attraction


### Step 12: Use Folium to visualize the travel venues on the City map. 
#### (Day1 = Red , Day2 = Purple, Day3 = Green)

In [57]:
# create map
map_clusters = folium.Map(location=[lat, lng], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(venues_grouped['Latitude'], venues_grouped['Longtitude'], venues_grouped['Name'], venues_grouped['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters