# "Advisor for one day travel"
### Applied Data Science Capstone project
### *by Anton Dziavitsyn 2019*

### This is the implementation of project. See project description here: [Project report](https://github.com/Muritiku/Coursera_Capstone/blob/master/%5Breport%5D%20advisor%20for%20one%20day%20travel.ipynb "Project report")

## Input parameters

In [1]:
Request = {
    'Location': {
        'Latitude': 53.8838069, # Minsk (Republic of Belarus) coordinates
        'Longitude': 27.5796468
    },
    'MaxPrice': 3,
    'Interests': ['restaurant', 'museum']
}

API_data = {
    'ID': 'PFQ2RIUSGT21J15YNJPAKGXGZNZZVSA1JML4CN4JQFHBG5D1',
    'Secret': '5KZDMCVZ0GS31EF5XOQQVUPGCT0FI0IIWMYJLSPBVAUAGBN5'
}

## Step 1 (get venues dataframe)

### get filtered categiries

In [2]:
import requests
import pandas as pd

url = 'https://api.foursquare.com/v2/venues/categories?&client_id={}&client_secret={}&v={}'.format(
            API_data['ID'], 
            API_data['Secret'], 
            '20190101')
# call API to get categories tree            
results = requests.get(url).json()["response"]['categories']

def str_contains(string, search):
    return string.lower().find(search.lower()) > -1

def tree_search(interest, tree):
    categories_list = []
    for category in tree:
        if str_contains(category['name'], interest) or str_contains(category['pluralName'],interest) or str_contains(category['shortName'], interest):
                categories_list.append({
                    'id': category['id'],
                    'name': category['name']
                })
        categories_list = categories_list + tree_search(interest, category['categories'])
    return categories_list

#get categories based on our parameters
categories_data = []
for interest in Request['Interests']:
    categories_data = categories_data + tree_search(interest, results)
df_categories = pd.DataFrame(categories_data)

# Categories shape
print('Categories shape: {0}'.format(df_categories.shape))
# Show top 5 finded categories
df_categories.head()

Categories shape: (253, 2)


Unnamed: 0,id,name
0,503288ae91d4c4b30a586d67,Afghan Restaurant
1,4bf58dd8d48988d1c8941735,African Restaurant
2,4bf58dd8d48988d10a941735,Ethiopian Restaurant
3,4bf58dd8d48988d14e941735,American Restaurant
4,4bf58dd8d48988d157941735,New American Restaurant


### get venues by categiries

In [3]:
# we have 253 categories - this is too much for request string
# will be calling by 50 categories

results = []

for i in range(0, df_categories.shape[0], 50):
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}'.format(
            API_data['ID'], 
            API_data['Secret'], 
            '20190101',
            Request['Location']['Latitude'],
            Request['Location']['Longitude'],
            ','.join(df_categories['id'].values.tolist()[i:50])
            )
    # call API to get venues
    results = results + requests.get(url).json()["response"]['venues']

df_venues = pd.DataFrame({
    'Id': v['id'],
    'Name': v['name']
} for v in results)

# Venues shape
print('Venues shape: {0}'.format(df_venues.shape))
# Show top 5 finded venues
df_venues.head()

Venues shape: (180, 2)


Unnamed: 0,Id,Name
0,592f04efb1538e78d49d55d4,WOK
1,4b980432f964a5204c2635e3,TGI Friday's
2,5a5daf6912c8f042a4e69998,на элитных креслах 61
3,5a1d6ddfdb1d81317738faf2,McDonald's
4,579ddede498ee8329a757137,Шикари


In [4]:
### Now we should get detailed info about venue, with price and rating and filter by price (only 50 becuse of premium calls limit)

In [5]:
data = []

for v in df_venues['Id'].values.tolist()[0:50]: # We can not make more premium calls, but only 50
    url = 'https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}'.format(
            v,
            API_data['ID'], 
            API_data['Secret'], 
            '20190101')
    # call API to get venue details
    v_details = requests.get(url).json()["response"]['venue']
    
    data.append({
            'Id': v_details['id'],
            'Name': v_details['name'],
            'Latitude': v_details['location']['lat'],
            'Longitude': v_details['location']['lng'],
            'Rating': v_details['rating'] if 'rating' in v_details.keys() else 0,
            'PriceTier': v_details['price']['tier'] if 'price' in v_details else 0,
            'CheckinsCount': v_details['stats']['checkinsCount'] if 'checkinsCount' in v_details['stats'].keys() else 0,
            'UsersCount': v_details['stats']['usersCount']  if 'usersCount' in v_details['stats'].keys() else 0,
            'TipCount': v_details['stats']['tipCount']  if 'tipCount' in v_details['stats'].keys() else 0,
            'VisitsCount': v_details['stats']['visitsCount']  if 'visitsCount' in v_details['stats'].keys() else 0
    })

df_venues = pd.DataFrame(data)

# Venues shape
print('Venues shape: {0}'.format(df_venues.shape))
# Show top 5 finded venues
df_venues.head()

Venues shape: (50, 10)


Unnamed: 0,CheckinsCount,Id,Latitude,Longitude,Name,PriceTier,Rating,TipCount,UsersCount,VisitsCount
0,0,592f04efb1538e78d49d55d4,53.889836,27.556876,WOK,1,0.0,2,0,0
1,0,4b980432f964a5204c2635e3,53.900611,27.559529,TGI Friday's,2,6.6,281,0,0
2,0,5a5daf6912c8f042a4e69998,53.93112,27.657958,на элитных креслах 61,0,0.0,0,0,0
3,0,5a1d6ddfdb1d81317738faf2,53.908582,27.472353,McDonald's,1,6.8,0,0,0
4,0,579ddede498ee8329a757137,53.899094,27.556203,Шикари,2,6.2,48,0,0


In [6]:
# filter dataframe by PriceTier Parameter

df_venues = df_venues[df_venues['PriceTier'] <= Request['MaxPrice']]
# Venues shape
print('Venues shape: {0}'.format(df_venues.shape))
# Show top 5 finded venues
df_venues.head()

Venues shape: (50, 10)


Unnamed: 0,CheckinsCount,Id,Latitude,Longitude,Name,PriceTier,Rating,TipCount,UsersCount,VisitsCount
0,0,592f04efb1538e78d49d55d4,53.889836,27.556876,WOK,1,0.0,2,0,0
1,0,4b980432f964a5204c2635e3,53.900611,27.559529,TGI Friday's,2,6.6,281,0,0
2,0,5a5daf6912c8f042a4e69998,53.93112,27.657958,на элитных креслах 61,0,0.0,0,0,0
3,0,5a1d6ddfdb1d81317738faf2,53.908582,27.472353,McDonald's,1,6.8,0,0,0
4,0,579ddede498ee8329a757137,53.899094,27.556203,Шикари,2,6.2,48,0,0


## Step 2 (get top rated venues and then top active venues)

In [8]:
# we just get 30 top rated and then 20 top tips count - becuse we have only 50 records, and 30% will be very few
top_venues = df_venues.sort_values('Rating', ascending=False).head(30).sort_values('TipCount', ascending=False).head(20)

# Venues shape
print('TOP Venues shape: {0}'.format(top_venues.shape))
# Show top 5 finded venues
top_venues.head()

TOP Venues shape: (20, 10)


Unnamed: 0,CheckinsCount,Id,Latitude,Longitude,Name,PriceTier,Rating,TipCount,UsersCount,VisitsCount
1,0,4b980432f964a5204c2635e3,53.900611,27.559529,TGI Friday's,2,6.6,281,0,0
29,0,4bb66e716edc76b06bee301c,53.897257,27.545912,Doodah King,0,7.4,166,0,0
6,0,4d273619849f3704c5b96641,53.902241,27.556918,Планета Суши,2,6.6,162,0,0
8,0,4d835d30e83fa1433dd34aa1,53.928695,27.587135,Il Patio / Планета Суши,2,5.1,140,0,0
22,0,515c49b9e4b015618fe45d1a,53.901697,27.553229,Йо! Суши,2,5.3,114,0,0


## Step 3 (K-Mean clustering by coordinates)

In [48]:
# import k-means
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

top_venues_clustering = top_venues[['Latitude','Longitude']]

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(top_venues_clustering)

top_venues['Cluster'] = kmeans.labels_

# Venues shape
print('TOP Venues shape with Clusters: {0}'.format(top_venues.shape))
# Show top 5 finded venues
top_venues.head()

TOP Venues shape with Clusters: (20, 11)


Unnamed: 0,CheckinsCount,Id,Latitude,Longitude,Name,PriceTier,Rating,TipCount,UsersCount,VisitsCount,Cluster
1,0,4b980432f964a5204c2635e3,53.900611,27.559529,TGI Friday's,2,6.6,281,0,0,0
29,0,4bb66e716edc76b06bee301c,53.897257,27.545912,Doodah King,0,7.4,166,0,0,0
6,0,4d273619849f3704c5b96641,53.902241,27.556918,Планета Суши,2,6.6,162,0,0,0
8,0,4d835d30e83fa1433dd34aa1,53.928695,27.587135,Il Patio / Планета Суши,2,5.1,140,0,0,4
22,0,515c49b9e4b015618fe45d1a,53.901697,27.553229,Йо! Суши,2,5.3,114,0,0,0


## Step 4 (find cluster with max venues count and show on map)

In [49]:
# Clusters with venues ID count
top_venues.groupby('Cluster').count()['Id']

Cluster
0    11
1     2
2     1
3     1
4     5
Name: Id, dtype: int64

In [50]:
import folium

# filter by TOP count cluster (Cluster 0)
map_venues = top_venues[top_venues['Cluster'] == 0]

# create map
map_result = folium.Map(location=[Request['Location']['Latitude'], Request['Location']['Longitude']], zoom_start=10)

# add df_toronto markers to map
for lat, lng, name in zip(map_venues['Latitude'], map_venues['Longitude'], map_venues['Name']):
    label = name
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_result)  

# show result map
map_result

## Now our User may see MAP with the TOP rated venues which satisfy his request. And they placed near each other, and may be visited in one day.
## Thank You! Best regards, Anton Dziavitsyn 2019