# Connected Recommenders

In [1]:
import pandas as pd
import pymongo

def _connect_mongo():
    """ Connects to Mongo """
    global conn
    try:
        #use your database name, user and password here:
        #mongodb://<dbuser>:<dbpassword>@<mlab_url>.mlab.com:57066/<database_name>
        with open("credentials.txt", 'r') as f:
            [name,password,url,dbname]=f.read().splitlines()
        conn=pymongo.MongoClient("mongodb://{}:{}@{}/{}".format(name,password,url,dbname))
        
        print ("Connected successfully to", dbname)

    except pymongo.errors.ConnectionFailure as e:
        print ("Could not connect to MongoDB: %s" % e) 
        
    print(conn)
    db=conn["agile_data_science_group_3"]
    
    return db


def read_collection_as_df(collection_name, no_id=True):
    """ Read from Mongo and Store into DataFrame """

    # Connect to MongoDB
    db = _connect_mongo()
    
    collection = db[collection_name]
    
    # Make a query to the specific DB and Collection
    cursor = collection.find()

    # Expand the cursor and construct the DataFrame
    df =  pd.DataFrame(list(cursor)) 

    conn.close()
    
    return df

def read_collection_as_dict(collection_name):
    """ Read from Mongo and Store into dict """

    # Connect to MongoDB
    db = _connect_mongo()
    
    collection = db[collection_name]

    l=list()
    for d in collection.find():
        l.append(d)
      
    dic=dict()
    for d in l:
        for key, value in d.items():
            if key!='_id':
                dic[key] = value
    
    conn.close()
    
    return dic

In [2]:
users_recipes_ratings_df = read_collection_as_df("users_recipes_ratings")
recipes_ingredients_dic = read_collection_as_dict("recipes_ingredients")

('Connected successfully to', 'agile_data_science_group_3')
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)
('Connected successfully to', 'agile_data_science_group_3')
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


In [3]:
users_recipes_ratings_df.head()

Unnamed: 0,_id,rating,recipe,user
0,5a298de1166d3310bc8207d1,4,236853,user_1
1,5a298de1166d3310bc8207d2,5,241469,user_1
2,5a298de1166d3310bc8207d3,5,85980,user_1
3,5a298de1166d3310bc8207d4,5,224046,user_2
4,5a298de1166d3310bc8207d5,5,297457,user_2


In [4]:
[{key:val} for key, val in recipes_ingredients_dic.items()][:5]

[{u'chicken-pumpkin-curry': [u'ghee',
   u'oil',
   u'onion',
   u'pack',
   u'asda',
   u'chicken',
   u'thigh',
   u'fillet',
   u'skin',
   u'garlic',
   u'clove',
   u'ginger',
   u'turmeric',
   u'cumin',
   u'asda',
   u'garam',
   u'masala',
   u'chillie',
   u'can',
   u'coconut',
   u'milk',
   u'pumpkin',
   u'skin',
   u'fibre',
   u'cornflour',
   u'cold',
   u'water',
   u'make',
   u'paste',
   u'coriander',
   u'rice',
   u'serve']},
 {u'masterclass-pulled-ham-creamy-herb-salad': [u'butcher\u2019',
   u'gammon',
   u'joint',
   u'cider',
   u'bag',
   u'asda',
   u'tender',
   u'leaf',
   u'salad',
   u'caper',
   u'frozen',
   u'pea',
   u'minute',
   u'radishe',
   u'chosen',
   u'gherkin',
   u'cider',
   u'vinegar',
   u'olive',
   u'oil',
   u'fat',
   u'cr\xe8me',
   u'fra\xeeche',
   u'dijon',
   u'mustard',
   u'parsley']},
 {u'tuna-with-couscous': [u'couscou',
   u'lemon',
   u'olive',
   u'oil',
   u'olive',
   u'onion',
   u'tomatoe',
   u'parsley',
   u'tuna'

### Search Engine

In [5]:
import operator
def search_engine(user_input, recipes_dict = read_collection_as_dict("recipes_ingredients")):
    """
    Given a list of ingredients from user and the "recipes: [ingredients]" dictionary, order the recipes by a score of suitability.
    It also provides those matching ingredients and missing ingredients for each recommendation. 
    
    Input: 
    - user_input as list. Example:["ingedient1", "ingredient2", ...]
    - recipes_dict as dictionary. Example: "{recipe1: [ingredients], recipe2: [ingredients] recipe3: [ingredients], ...}"
    
    The output are: 
    - recomendations as list. Example: [recipe134, recipe43234, recipe544, ...]
    - matchings as list of sets. Example: [{ingedient1, ingedient2}, {ingedient1}, ...]
    - missings as list of sets. Example: [{}, {ingredient43}, {ingredient32, ingredient45}, ...]
    """
    
    matching = dict()
    missing = dict()
    score = dict()
    
    user_ingredients = set(user_input)
    
    for recipe in recipes_dict.keys():
        
        recom_ingredients = set(recipes_dict[recipe])
        
        matching[recipe] = user_ingredients.intersection(recom_ingredients)   # ingredients in user_input & in recommendation  
        missing[recipe] = recom_ingredients.difference(user_ingredients)   # ingredients from recommendation that user_input not have
        score[recipe] = 10*len(matching[recipe])/(len(missing[recipe])+ len(matching[recipe]))
    
    recommendations_sorted = sorted(score.items(), key=operator.itemgetter(1), reverse=True)
    matching_sorted = [matching[recipe] for recipe, score in dict(recommendations_sorted).items()]
    missing_sorted = [missing[recipe] for recipe, score in dict(recommendations_sorted).items()]
    recommendations_sorted = [recipe_score[0] for recipe_score in recommendations_sorted]
    
    return recommendations_sorted, matching_sorted, missing_sorted

('Connected successfully to', 'agile_data_science_group_3')
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


In [6]:
grocery = ["egg", "chicken"]

recom_list, matching_ingredients, missing_ingredients = search_engine(grocery, recipes_ingredients_dic)

In [7]:
recom_list[:10]

[u'basic-royal-icing',
 u'icing-recipe',
 u'oven-cooked-new-potatoes',
 u'miguel-barclays-green-shakshuka',
 u'sausage-mummies',
 u'meringue-bones-and-blood-dip',
 u'clementine-lemon-curd',
 u'paprika-chicken',
 u'meringue-nests',
 u'chicken-noodle-stir-fry']

In [8]:
matching_ingredients[:10]

[{'chicken'},
 set(),
 {'egg'},
 {'egg'},
 set(),
 set(),
 set(),
 {'egg'},
 {'egg'},
 set()]

In [9]:
missing_ingredients[:10]

[{u'asda',
  u'can',
  u'chillie',
  u'clove',
  u'coconut',
  u'cold',
  u'coriander',
  u'cornflour',
  u'cumin',
  u'fibre',
  u'fillet',
  u'garam',
  u'garlic',
  u'ghee',
  u'ginger',
  u'make',
  u'masala',
  u'milk',
  u'oil',
  u'onion',
  u'pack',
  u'paste',
  u'pumpkin',
  u'rice',
  u'serve',
  u'skin',
  u'thigh',
  u'turmeric',
  u'water'},
 {u'asda',
  u'bag',
  u'butcher\u2019',
  u'caper',
  u'chosen',
  u'cider',
  u'cr\xe8me',
  u'dijon',
  u'fat',
  u'fra\xeeche',
  u'frozen',
  u'gammon',
  u'gherkin',
  u'joint',
  u'leaf',
  u'minute',
  u'mustard',
  u'oil',
  u'olive',
  u'parsley',
  u'pea',
  u'radishe',
  u'salad',
  u'tender',
  u'vinegar'},
 {u'banana',
  u'beaten',
  u'brown',
  u'butter',
  u'cinnamon',
  u'date',
  u'flour',
  u'light',
  u'pecan',
  u'piece',
  u'plu',
  u'powder',
  u'raisin-size',
  u'ripe',
  u'salt',
  u'soft',
  u'sugar',
  u'weight'},
 {u'asda',
  u'asparagu',
  u'beaten',
  u'berio',
  u'breadcrumb',
  u'butter',
  u'can',
  u'

Testing:

In [10]:
set(recipes_ingredients_dic['icing-recipe'])

{u'egg', u'sugar'}

In [11]:
set(recipes_ingredients_dic['basic-royal-icing'])

{u'egg', u'sugar'}

### Best Rated

In [12]:
import numpy as np
import pandas as pd
import csv

In [13]:
def best_rated(data=users_recipes_ratings_df, n=10):
    """
    Return top n recipes by maximum mean rating. In case of draw, then by minimum standard deviation rating.
    """
    # top rated
    recipe_rating_mean = users_recipes_ratings_df.groupby(['recipe'])['rating'].mean()
    recipe_rating_std = users_recipes_ratings_df.groupby(['recipe'])['rating'].std()
    recipe_rating = pd.concat([recipe_rating_mean, recipe_rating_std], axis=1)
    recipe_rating.columns=["mean", "std"]
    recs = recipe_rating.sort(["mean", "std"], ascending=[0,1])
    
    return list(recs.index.values)[:n]

In [14]:
best_rated(100)



[u'106881',
 u'107166',
 u'107668',
 u'107680',
 u'108930',
 u'110600',
 u'111322',
 u'112629',
 u'113252',
 u'113789']

### Collaborative filtering

In [15]:
import numpy as np
import pandas as pd
import csv
import graphlab as gl

#### Option 1: Built the recommender + recommend
Execution time: 3.13 s

In [16]:
def collaborative_filtering(data=users_recipes_ratings_df, user=["user_1"], n=10):
    
    t1 = time
    # select only user, recipe, ratings (what we need)
    df = users_recipes_ratings_df[["user","recipe", "rating"]]
    df["rating"] = df["rating"].astype(float)
    
    sf = gl.SFrame(df)
                                       
    collaborative_filtering = gl.item_similarity_recommender.create(sf, user_id='user', item_id='recipe', target='rating',                                                similarity_type='pearson')
    pred_itemitem_pearson = collaborative_filtering.predict(sf)
    recs_itemitem_pearson = collaborative_filtering.recommend(users=user, k=n)     # top 10 collaborative filtering recomendations

    return list(recs_itemitem_pearson["recipe"])

In [17]:
import time
start = time.time()
recs = collaborative_filtering()

print("Execution time:", time.time()-start)
print(recs)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


This non-commercial license of GraphLab Create for academic use is assigned to gonzaloespinosaduelo@gmail.com and will expire on November 08, 2018.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1512764733.log


('Execution time:', 5.351465940475464)
['96181', '205980', '295802', '300909', 'sticky-toffee-puddings', '296256', '296621', '80843', '268707', '300037']


#### Option 2: Load recommender and recommend in two parts
Recommendation time execution: 0.61s

In [18]:
def load_collaborative_filtering(data=users_recipes_ratings_df):
    
    # select only user, recipe, ratings (what we need)
    df = users_recipes_ratings_df[["user","recipe", "rating"]]
    df["rating"] = df["rating"].astype(float)
    
    global sf
    sf = gl.SFrame(df)
                                       
    collaborative_filtering = gl.item_similarity_recommender.create(sf, user_id='user', item_id='recipe', target='rating',                                                similarity_type='pearson')
    return collaborative_filtering

In [19]:
recom = load_collaborative_filtering(data=users_recipes_ratings_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [20]:
def recommend_collaborative_filtering(recommender_object, user=["user_1"], n=10):

    pred_itemitem_pearson = recommender_object.predict(sf)
    recs_itemitem_pearson = recommender_object.recommend(users=user, k=n)     # top 10 collaborative filtering recomendations

    return list(recs_itemitem_pearson["recipe"])

In [21]:
import time
start = time.time()
recs = recommend_collaborative_filtering(recom, user=["user_1"], n=10)

print("Execution time:", time.time()-start)

print(recs)

('Execution time:', 0.6640620231628418)
['96181', '205980', '295802', '300909', 'sticky-toffee-puddings', '296256', '296621', '80843', '268707', '300037']


### Content-based Recommender

recipes_ingredients_dic