In [26]:
import json
from dotenv import load_dotenv
from os.path import join, dirname
import os
import requests
import pandas as pd
import numpy as np
from collections import Counter
import watson_developer_cloud
import watson_developer_cloud.natural_language_understanding.features.v1 as features

In [27]:
load_dotenv('.env')

True

# User History

Our appmakes a User/VenueHistory API call on behalf of both users to create a profile of the places they like. This will inform the recommendations later on.

We accomplish this by using Watson's Natural Language Understanding API to find "concepts" among everything in each user's history.

We use the Main Category insight learned from the Foursquare API tutorial to make the list relevant to the end goal.

In [1]:
from User import Users

A = Users('a', 'Food')
A.eval_user()
A.concepts_for_all_relevant_visits()
A.all_concepts()
ra = A.pool_concepts()
ma = A.mean_concepts(ra)

B = Users('y', 'Food')
B.eval_user()
B.concepts_for_all_relevant_visits()
B.all_concepts()
rb = B.pool_concepts()
mb = B.mean_concepts(rb)

`ma` and `mb` are dicts with an aggregate topic analysis of all places user A and user B have visited.

In [3]:
for i in ma:
    print("{}\t{}".format(i, ma[i]))
print()

for i in mb:
    print("{}\t{}".format(i, mb[i]))


Flatbreads	0.03442984210526316
Apple	0.04742078947368421
Hamburger	0.05144894736842105
Iced coffee	0.04958947368421053
Mister Donut	0.030257052631578945
Kringle	0.040217368421052634
New Mexican cuisine	0.039545473684210526
Juice	0.037463315789473685
Mayonnaise	0.041639368421052626
Pretzel	0.04419631578947368
The Skinny	0.025386789473684213
Bartender	0.026027894736842107
Jack Daniel's	0.025390105263157894
Latte	0.03423473684210526
Barista	0.02056557894736842
Chicken	0.043589368421052634
Soft drink	0.050869
Coffeehouse	0.095212
Dunkin' Donuts	0.045095052631578945
German cuisine	0.03911036842105263
Maize	0.04847305263157895
Coors Brewing Company	0.025368105263157893
Fast food	0.07426363157894737
Appetizers	0.04930021052631579
Bagel	0.102008
Waiting staff	0.026100473684210524
Bag-In-Box	0.038551894736842104
Taste	0.032445842105263155
Meat	0.04627768421052631
Sandwich	0.05175457894736842
Ice cream	0.02725284210526316
Pie	0.04557631578947369
Debut albums	0.053903
Breakfast	0.0344878947368421

### Define Midpoint and return Venues

In [19]:
from __future__ import print_function
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
import folium
import matplotlib.cm as cm
#from shapely.geometry import MultiPoint

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

In [20]:
address_U1 = "176 34th Street Brooklyn, NY"
address_U2 = "1660 Madison Avenue New York, NY"# Geolocator translate addrss into lat, lon
geolocator = Nominatim()
location_U1 = geolocator.geocode(address_U1)
latitude_U1 = location_U1.latitude
longitude_U1 = location_U1.longitude
location_U2 = geolocator.geocode(address_U2)
latitude_U2 = location_U2.latitude
longitude_U2 = location_U2.longitude
print (latitude_U1, longitude_U1, latitude_U2, longitude_U2)

40.6555065 -74.0035751 40.7965898 -73.9476099084148


In [None]:
# Multipoint has a function called centroid, that find the midpoint among any number of points listed
points = MultiPoint([(latitude_U1, longitude_U1), (latitude_U2, longitude_U2)])
latitude = points.centroid.x
longitude = points.centroid.y
print (points.centroid) #True centroid, not necessarily an existing point on map

In [22]:
latitude = 40.72604815

In [21]:
longitude = -73.97559250420741

In [25]:
map_nyc = folium.Map(location=[40.765937,-73.977304], zoom_start=11)
folium.CircleMarker([latitude_U1, longitude_U1], color = 'blue',
                   radius = 7).add_to(map_nyc)
folium.CircleMarker([latitude_U2, longitude_U2], color = 'blue',
                   radius = 7).add_to(map_nyc)
folium.CircleMarker([latitude, longitude], color = 'green',
                   radius = 7).add_to(map_nyc)
map_nyc

In [28]:
foursquare_client_id=os.environ.get('FOURSQUARE_CLIENT_ID')

In [29]:
foursquare_client_secret=os.environ.get('FOURSQUARE_CLIENT_SECRET')

In [30]:
CLIENT_ID = foursquare_client_id;
CLIENT_SECRET = foursquare_client_secret;
VERSION = "20170511"
LIMIT = 30

In [31]:
# these ids help us to find the class of places we want to
food_id = "4d4b7105d754a06374d81259"
bars_id = "4d4b7105d754a06376d81259"

In [34]:
radius = 500 # range in meters
category_id = food_id
price = "2,3" # price range. 1 very cheap, 4 very expensivecategory_id = food_id
url="https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&categoryId={}&limit={}&price={}".format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, category_id, LIMIT, price)
results = requests.get(url).json()

In [36]:
items = results["response"]["groups"][0]["items"]
items[0]["venue"].keys()

dict_keys(['id', 'name', 'contact', 'location', 'categories', 'verified', 'stats', 'url', 'price', 'hasMenu', 'rating', 'ratingColor', 'ratingSignals', 'menu', 'allowMenuUrlEdit', 'beenHere', 'hours', 'photos', 'venuePage', 'storeId', 'hereNow'])

In [37]:
# function that extracts the category of the venue
def get_category_type(row):
   try:
       categories_list = row["categories"]
   except:
       categories_list = row["venue.categories"]
       
   if len(categories_list) == 0:
       return None
   else:
       return categories_list[0]["name"].encode('ascii',errors='ignore')

In [39]:
dataframe = json_normalize(items) # flatten JSON# filter columns
filtered_columns = ['venue.name', 'venue.url', 'venue.categories'] + ["venue.rating"] + \
                  ["venue.id"] + ['venue.hours.isOpen'] + ['venue.price.tier'] + \
                  [col for col in dataframe.columns if col.startswith('venue.location.')]
                 
dataframe_filtered = dataframe.ix[:, filtered_columns]# filter the category for each row
dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)# clean columns
dataframe_filtered.columns = [col.split(".")[-1] for col in dataframe_filtered.columns]# filter just open places
open_places = dataframe_filtered[dataframe_filtered['isOpen'] == True]
dataframe_filtered.head(10)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  after removing the cwd from sys.path.


Unnamed: 0,name,url,categories,rating,id,isOpen,tier,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state
0,Esperanto,http://esperantony.com,b'Brazilian Restaurant',8.9,458d06def964a520ff3f1fe3,True,2,145 Avenue C,US,New York,United States,at E 9th St.,217,"[145 Avenue C (at E 9th St.), New York, NY 100...","[{'label': 'display', 'lat': 40.72545004678564...",40.72545,-73.978051,10009,NY
1,Kafana,http://www.kafananyc.com,b'Eastern European Restaurant',8.9,49e8896cf964a5204e651fe3,True,3,116 Avenue C,US,New York,United States,at E 8th St,312,"[116 Avenue C (at E 8th St), New York, NY 1000...","[{'label': 'display', 'lat': 40.72445009594483...",40.72445,-73.978638,10009,NY
2,Zum Schneider,http://www.zumschneider.com,b'German Restaurant',9.0,3fd66200f964a52022e51ee3,False,2,107 Avenue C,US,New York,United States,at E 7th St,346,"[107 Avenue C (at E 7th St), New York, NY 1000...","[{'label': 'display', 'lat': 40.72417, 'lng': ...",40.72417,-73.978871,10009,NY
3,Virginia's,http://www.virginiasnyc.com,b'American Restaurant',7.8,5549770d498ebdb938cbbfdb,False,3,647 E 11th St,US,New York,United States,btwn Avenue B & Avenue C,184,"[647 E 11th St (btwn Avenue B & Avenue C), New...","[{'label': 'display', 'lat': 40.72663915585264...",40.726639,-73.97764,10009,NY
4,Bobwhite Counter,http://www.bobwhitecounter.com/,b'Fried Chicken Joint',9.4,4f00dea9f9abd5b3917d422c,True,2,94 Avenue C,US,New York,United States,btwn E 6th & E 7th St,395,"[94 Avenue C (btwn E 6th & E 7th St), New York...","[{'label': 'display', 'lat': 40.72376488808975...",40.723765,-73.979189,10009,NY
5,Edi & The Wolf,http://www.ediandthewolf.com,b'German Restaurant',8.7,4ca0d958e9a7ef3be3085416,True,3,102 Avenue C,US,New York,United States,btwn 6th & 7th,385,"[102 Avenue C (btwn 6th & 7th), New York, NY 1...","[{'label': 'display', 'lat': 40.72371374468727...",40.723714,-73.978972,10009,NY
6,Donostia,http://www.donostianyc.com,b'Spanish Restaurant',8.5,528bf7bb498eb53e3fc85459,False,2,155 Avenue B,US,New York,United States,at 10th St,382,"[155 Avenue B (at 10th St), New York, NY 10009...","[{'label': 'display', 'lat': 40.72678946581906...",40.726789,-73.980021,10009,NY
7,Joselito,,b'Spanish Restaurant',7.5,4b81d8ccf964a520efc030e3,True,2,125 Avenue D,US,New York,United States,btwn 8th & 9th,237,"[125 Avenue D (btwn 8th & 9th), New York, NY 1...","[{'label': 'display', 'lat': 40.72392069000751...",40.723921,-73.975751,10009,NY
8,Gnocco,http://gnocco.com,b'Italian Restaurant',8.3,49c6a988f964a5206e571fe3,True,2,337 E 10th St,US,New York,United States,Avenue B,430,"[337 E 10th St (Avenue B), New York, NY 10009,...","[{'label': 'display', 'lat': 40.7272018229237,...",40.727202,-73.980457,10009,NY
9,Gruppo,http://www.gruppothincrust.com/,b'Pizza Place',8.8,3fd66200f964a5203ce51ee3,True,2,98 Avenue B,US,New York,United States,btwn 6th & 7th Sts,499,"[98 Avenue B (btwn 6th & 7th Sts), New York, N...","[{'label': 'display', 'lat': 40.72494964561905...",40.72495,-73.981333,10009,NY


In [40]:
list_id = list(dataframe_filtered['id'].astype(str))
print ('number of places found:', len(list_id))
print ('radius: %d m' %radius)
print ('price class:', price)
list_id[:10]

number of places found: 30
radius: 500 m
price class: 2,3


['458d06def964a520ff3f1fe3',
 '49e8896cf964a5204e651fe3',
 '3fd66200f964a52022e51ee3',
 '5549770d498ebdb938cbbfdb',
 '4f00dea9f9abd5b3917d422c',
 '4ca0d958e9a7ef3be3085416',
 '528bf7bb498eb53e3fc85459',
 '4b81d8ccf964a520efc030e3',
 '49c6a988f964a5206e571fe3',
 '3fd66200f964a5203ce51ee3']

In [41]:
venues_id = list_id

### Recommendation

In [35]:
ma

{'2005 albums': 0.024021526315789476,
 '2006 albums': 0.07026657894736843,
 'Albany, New York': 0.028263842105263157,
 'Alcoholic beverage': 0.03396836842105263,
 'American cuisine': 0.03288747368421053,
 'American films': 0.027467631578947372,
 'Appetizers': 0.04930021052631579,
 'Apple': 0.04742078947368421,
 'Bacon': 0.030486157894736843,
 'Bag-In-Box': 0.038551894736842104,
 'Bagel': 0.102008,
 'Barista': 0.02056557894736842,
 'Bartender': 0.026027894736842107,
 'Beef': 0.039538789473684215,
 'Block': 0.02600021052631579,
 'Bread': 0.06575463157894737,
 'Breakfast': 0.034487894736842106,
 'Breakfast foods': 0.04515447368421053,
 'Buffalo wings': 0.07243063157894737,
 'Cashier': 0.025363315789473685,
 'Chicken': 0.043589368421052634,
 'Coca-Cola': 0.0476398947368421,
 'Coca-Cola Black Cherry Vanilla': 0.03817884210526316,
 'Coffee': 0.2663545263157895,
 'Coffee culture': 0.05156984210526316,
 'Coffeehouse': 0.095212,
 'Coors Brewing Company': 0.025368105263157893,
 'Customer': 0.051

In [17]:
mb

{'American cuisine': 0.677592,
 'Beer style': 0.484654,
 'Burger': 0.505451,
 'Debut albums': 0.553642,
 'Fast food': 0.733611,
 'Hamburger': 0.957893,
 'Onions': 0.57151,
 'Potato': 0.490209}

In [42]:
# Function to pull venues JSON using venue id
def pull_foursquare_json(venue_id):
    VERSION = "20170511"
    foursquare_client_id=os.environ.get('FOURSQUARE_CLIENT_ID')
    foursquare_client_secret=os.environ.get('FOURSQUARE_CLIENT_SECRET')
    url="https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit=150".format(venue_id, foursquare_client_id, foursquare_client_secret, VERSION)
    json_f = requests.get(url).json()
    return json_f


In [43]:
# Helper function to return list of tips from restaurant's JSON file
def tips_list(json_file):
    try:
        num_tips = json_file['response']['tips']['count']
        return [json_file['response']['tips']['items'][k]['text'] for k in range(num_tips)]
    except:
        return [""]

In [46]:
# Combine two user pref vecs
def combine_u_prefs(u1_dict, u2_dict, k):
    u1_counter = Counter(u1_dict)
    u2_counter = Counter(u2_dict)
    u_prefs = u1_counter + u2_counter
    # Pull k highest values
    u_prefs_top = dict(u_prefs.most_common(k))
    vallist = [val for val in u_prefs_top.values()]
    factor = np.median(vallist)
    normed_val = [val/factor for val in vallist]
    topic_idx = [key for key in u_prefs_top.keys()]
    pref_vec = pd.Series(data=normed_val, index=topic_idx)
    return pref_vec, topic_idx

In [56]:
pref_vec, topic_idx = combine_u_prefs(ma, mb, 20)

In [47]:
# Construct empty matrix

def construct_matrix(venue_ids, topic_idx):
    empty_matrix = pd.DataFrame(index=venue_ids, columns=topic_idx)
    return empty_matrix

In [53]:
empty_mat = construct_matrix(venues_id, topic_idx)

In [48]:
# Call Watson API to get sentiment score of a single tip
def sentiment(tips):
    # Helper function to return text sentiment analysis
    # Load Watson credentials
    username=os.environ.get('NLU_USERNAME')
    password = os.environ.get('NLU_PASSWORD')
    nlu = watson_developer_cloud.NaturalLanguageUnderstandingV1(version='2017-02-27',
        username=username, password=password)
    output = nlu.analyze(text=tips, features=[features.Sentiment()])
    return output['sentiment']['document']['score']

In [49]:
# Fill empty matrix with sentiment score
# Median sentiment score of all tips including a particular topic

def fill_sentiment_matrix(mat):
    for j in range(mat.shape[0]):
        venue_id = mat.index[j]
        json_f = pull_foursquare_json(venue_id)
        tips = tips_list(json_f)
        for k in range(mat.shape[1]):
            topic = mat.columns[k]
            score = np.median([sentiment(tip) for tip in tips if topic in tip])
            mat.loc[venue_id, topic] = score
    return mat.fillna(0)

In [55]:
sent_mat = fill_sentiment_matrix(empty_mat)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [50]:
# Matrix multiply sentiment matrix and user pref vec
# Returns numpy array`
def recommend(score_mat, user_vec, venues_ids, top_n):
    score_vec = pd.Series(np.dot(score_mat, user_vec), index=venues_ids)
    return score_vec.sort_values(ascending=False)[:top_n].index.values

In [57]:
recommend(sent_mat, pref_vec, venues_id, 5)

array(['3fd66200f964a5203ce51ee3', '3fd66200f964a5203fe51ee3',
       '515392c4e4b0f03a43c69fce', '4ae1b55df964a520df8621e3',
       '49e8896cf964a5204e651fe3'], dtype=object)