# Connected Recommenders

In [1]:
import pandas as pd
import pymongo

def _connect_mongo():
    """ Connects to Mongo """
    global conn
    try:
        #use your database name, user and password here:
        #mongodb://<dbuser>:<dbpassword>@<mlab_url>.mlab.com:57066/<database_name>
        with open("credentials.txt", 'r') as f:
            [name,password,url,dbname]=f.read().splitlines()
        conn=pymongo.MongoClient("mongodb://{}:{}@{}/{}".format(name,password,url,dbname))
        
        print ("Connected successfully to", dbname)

    except pymongo.errors.ConnectionFailure as e:
        print ("Could not connect to MongoDB: %s" % e) 
        
    print(conn)
    db=conn["agile_data_science_group_3"]
    
    return db


def read_collection_as_df(collection_name, no_id=True):
    """ Read from Mongo and Store into DataFrame """

    # Connect to MongoDB
    db = _connect_mongo()
    
    collection = db[collection_name]
    
    # Make a query to the specific DB and Collection
    cursor = collection.find()

    # Expand the cursor and construct the DataFrame
    df =  pd.DataFrame(list(cursor)) 

    conn.close()
    
    return df

def read_collection_as_dict(collection_name):
    """ Read from Mongo and Store into dict """

    # Connect to MongoDB
    db = _connect_mongo()
    
    collection = db[collection_name]

    l=list()
    for d in collection.find():
        l.append(d)
      
    dic=dict()
    for d in l:
        for key, value in d.items():
            if key!='_id':
                dic[key] = value
    
    conn.close()
    
    return dic

In [2]:
users_recipes_ratings_df = read_collection_as_df("users_recipes_ratings")
recipes_ingredients_dic = read_collection_as_dict("recipes_ingredients")

('Connected successfully to', 'agile_data_science_group_3')
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)
('Connected successfully to', 'agile_data_science_group_3')
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


In [3]:
users_recipes_ratings_df.head()

Unnamed: 0,_id,rating,recipe,user
0,5a298de1166d3310bc8207d1,4,236853,user_1
1,5a298de1166d3310bc8207d2,5,241469,user_1
2,5a298de1166d3310bc8207d3,5,85980,user_1
3,5a298de1166d3310bc8207d4,5,224046,user_2
4,5a298de1166d3310bc8207d5,5,297457,user_2


In [4]:
recipes_ingredients_dic

{u'chicken-pumpkin-curry': [u'ghee',
  u'oil',
  u'onion',
  u'pack',
  u'asda',
  u'chicken',
  u'thigh',
  u'fillet',
  u'skin',
  u'garlic',
  u'clove',
  u'ginger',
  u'turmeric',
  u'cumin',
  u'asda',
  u'garam',
  u'masala',
  u'chillie',
  u'can',
  u'coconut',
  u'milk',
  u'pumpkin',
  u'skin',
  u'fibre',
  u'cornflour',
  u'cold',
  u'water',
  u'make',
  u'paste',
  u'coriander',
  u'rice',
  u'serve'],
 u'masterclass-pulled-ham-creamy-herb-salad': [u'butcher\u2019',
  u'gammon',
  u'joint',
  u'cider',
  u'bag',
  u'asda',
  u'tender',
  u'leaf',
  u'salad',
  u'caper',
  u'frozen',
  u'pea',
  u'minute',
  u'radishe',
  u'chosen',
  u'gherkin',
  u'cider',
  u'vinegar',
  u'olive',
  u'oil',
  u'fat',
  u'cr\xe8me',
  u'fra\xeeche',
  u'dijon',
  u'mustard',
  u'parsley'],
 u'tuna-with-couscous': [u'couscou',
  u'lemon',
  u'olive',
  u'oil',
  u'olive',
  u'onion',
  u'tomatoe',
  u'parsley',
  u'tuna',
  u'steak'],
 u'easy-peasy-fish-cakes': [u'pack',
  u'smash',
  u'i

### Search Engine

In [5]:
import operator
def search_engine(user_input, recipes_dict = read_collection_as_dict("recipes_ingredients")):
    """
    Given a list of ingredients from user and the "recipes: [ingredients]" dictionary, order the recipes by a score of suitability.
    It also provides those matching ingredients and missing ingredients for each recommendation. 
    
    Input: 
    - user_input as list. Example:["ingedient1", "ingredient2", ...]
    - recipes_dict as dictionary. Example: "{recipe1: [ingredients], recipe2: [ingredients] recipe3: [ingredients], ...}"
    
    The output are: 
    - recomendations as list. Example: [recipe134, recipe43234, recipe544, ...]
    - matchings as list of sets. Example: [{ingedient1, ingedient2}, {ingedient1}, ...]
    - missings as list of sets. Example: [{}, {ingredient43}, {ingredient32, ingredient45}, ...]
    """
    
    matching = dict()
    missing = dict()
    score = dict()
    
    user_ingredients = set(user_input)
    
    for recipe in recipes_dict.keys():
        
        recom_ingredients = set(recipes_dict[recipe])
        
        matching[recipe] = user_ingredients.intersection(recom_ingredients)   # ingredients in user_input & in recommendation  
        missing[recipe] = recom_ingredients.difference(user_ingredients)   # ingredients from recommendation that user_input not have
        score[recipe] = 10*len(matching[recipe])/(len(missing[recipe])+ len(matching[recipe]))
    
    recommendations_sorted = sorted(score.items(), key=operator.itemgetter(1), reverse=True)
    matching_sorted = [matching[recipe] for recipe, score in dict(recommendations_sorted).items()]
    missing_sorted = [missing[recipe] for recipe, score in dict(recommendations_sorted).items()]
    recommendations_sorted = [recipe_score[0] for recipe_score in recommendations_sorted]
    
    return recommendations_sorted, matching_sorted, missing_sorted

('Connected successfully to', 'agile_data_science_group_3')
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


In [6]:
grocery = ["egg", "chicken"]

recom_list, matching_ingredients, missing_ingredients = search_engine(grocery, recipes_ingredients_dic)

In [7]:
recom_list

[u'basic-royal-icing',
 u'icing-recipe',
 u'oven-cooked-new-potatoes',
 u'miguel-barclays-green-shakshuka',
 u'sausage-mummies',
 u'meringue-bones-and-blood-dip',
 u'clementine-lemon-curd',
 u'paprika-chicken',
 u'meringue-nests',
 u'chicken-noodle-stir-fry',
 u'zesty-lemon-cupcakes',
 u'bara-brith',
 u'tipsy-cherry-delight',
 u'cheese-red-onion-baked-omelette',
 u'make-your-own-fortune-cookie',
 u'masterclass-chicken-gravy',
 u'cheesy-chicken-tomato-bake',
 u'ham-pea-frittata',
 u'plum-pie',
 u'prosciutto-wrapped-asparagus',
 u'coffee-nut-meringues',
 u'lemon-curd',
 u'flipping-fantastic-pancakes',
 u'tombstone-biscuits',
 u'mini-marzipan-cherry-cake',
 u'autumn-apple-cake',
 u'toffee-pistachio-meringue',
 u'breaded-chicken-drumsticks',
 u'Stained-glass-biscuit-recipe',
 u'scrambled-eggs',
 u'coconut-jam-puddings',
 u'chocolate-almond-mixed-berries-torte',
 u'sponge-recipe',
 u'potato-chicken-pad-thai',
 u'grey-and-black-macarons',
 u'victoria-sponge-cake',
 u'kluski-polish-dumplings'

In [8]:
matching_ingredients

[{'chicken'},
 set(),
 {'egg'},
 {'egg'},
 set(),
 set(),
 set(),
 {'egg'},
 {'egg'},
 set(),
 {'chicken'},
 {'egg'},
 set(),
 {'egg'},
 {'chicken'},
 set(),
 set(),
 {'chicken'},
 set(),
 {'egg'},
 {'egg'},
 set(),
 {'egg'},
 set(),
 {'egg'},
 set(),
 set(),
 {'chicken'},
 {'egg'},
 set(),
 {'chicken'},
 set(),
 {'chicken'},
 set(),
 {'egg'},
 {'egg'},
 set(),
 {'chicken'},
 set(),
 {'chicken'},
 set(),
 set(),
 set(),
 set(),
 {'chicken'},
 {'chicken'},
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 set(),
 {'egg'},
 set(),
 set(),
 set(),
 {'egg'},
 set(),
 {'egg'},
 set(),
 set(),
 {'egg'},
 set(),
 {'egg'},
 set(),
 set(),
 set(),
 {'chicken'},
 set(),
 set(),
 {'egg'},
 set(),
 set(),
 {'chicken'},
 set(),
 {'egg'},
 {'egg'},
 set(),
 set(),
 set(),
 set(),
 {'egg'},
 {'egg'},
 set(),
 {'egg'},
 set(),
 {'egg'},
 {'chicken', 'egg'},
 set(),
 set(),
 {'egg'},
 {'egg'},
 set(),
 set(),
 {'egg'},
 set(),
 {'chicken', 'egg'},
 set(),


In [9]:
missing_ingredients

[{u'asda',
  u'can',
  u'chillie',
  u'clove',
  u'coconut',
  u'cold',
  u'coriander',
  u'cornflour',
  u'cumin',
  u'fibre',
  u'fillet',
  u'garam',
  u'garlic',
  u'ghee',
  u'ginger',
  u'make',
  u'masala',
  u'milk',
  u'oil',
  u'onion',
  u'pack',
  u'paste',
  u'pumpkin',
  u'rice',
  u'serve',
  u'skin',
  u'thigh',
  u'turmeric',
  u'water'},
 {u'asda',
  u'bag',
  u'butcher\u2019',
  u'caper',
  u'chosen',
  u'cider',
  u'cr\xe8me',
  u'dijon',
  u'fat',
  u'fra\xeeche',
  u'frozen',
  u'gammon',
  u'gherkin',
  u'joint',
  u'leaf',
  u'minute',
  u'mustard',
  u'oil',
  u'olive',
  u'parsley',
  u'pea',
  u'radishe',
  u'salad',
  u'tender',
  u'vinegar'},
 {u'banana',
  u'beaten',
  u'brown',
  u'butter',
  u'cinnamon',
  u'date',
  u'flour',
  u'light',
  u'pecan',
  u'piece',
  u'plu',
  u'powder',
  u'raisin-size',
  u'ripe',
  u'salt',
  u'soft',
  u'sugar',
  u'weight'},
 {u'asda',
  u'asparagu',
  u'beaten',
  u'berio',
  u'breadcrumb',
  u'butter',
  u'can',
  u'

Testing:

In [11]:
set(recipes_ingredients_dic['icing-recipe'])

{u'egg', u'sugar'}

In [10]:
set(recipes_ingredients_dic['basic-royal-icing'])

{u'egg', u'sugar'}

In [84]:
users_recipes_ratings_df.shape[0]

(59742, 4)

In [152]:
recipe_rating_mean = users_recipes_ratings_df.groupby(['recipe'])['rating'].mean()
recipe_rating_std = users_recipes_ratings_df.groupby(['recipe'])['rating'].std()
recipe_rating = pd.concat([recipe_rating_mean, recipe_rating_std], axis=1)
recipe_rating.columns=["mean", "std"]
recs = recipe_rating.sort(["mean", "std"], ascending=[0,1])



[u'106881',
 u'107166',
 u'107668',
 u'107680',
 u'108930',
 u'110600',
 u'111322',
 u'112629',
 u'113252',
 u'113789',
 u'114993',
 u'115022',
 u'115343',
 u'117247',
 u'118645',
 u'119259',
 u'127960',
 u'128829',
 u'128860',
 u'128864',
 u'130604',
 u'131770',
 u'133943',
 u'134184',
 u'13481',
 u'137058',
 u'138426',
 u'14055',
 u'14063',
 u'141262',
 u'142449',
 u'143032',
 u'144993',
 u'145370',
 u'146192',
 u'14628',
 u'147212',
 u'147226',
 u'147381',
 u'148156',
 u'152451',
 u'152649',
 u'153269',
 u'153287',
 u'155909',
 u'157148',
 u'15719',
 u'15857',
 u'159251',
 u'159896',
 u'160319',
 u'162212',
 u'162355',
 u'16253',
 u'163940',
 u'164646',
 u'165718',
 u'16595',
 u'166364',
 u'166863',
 u'16760',
 u'16769',
 u'168051',
 u'168057',
 u'16812',
 u'168770',
 u'16901',
 u'169173',
 u'170459',
 u'170527',
 u'171466',
 u'173277',
 u'17394',
 u'174937',
 u'174972',
 u'176340',
 u'177251',
 u'177350',
 u'177714',
 u'177846',
 u'178233',
 u'179565',
 u'180476',
 u'18061',
 u'181

### Best Rated

In [12]:
import numpy as np
import pandas as pd
import csv

In [153]:
def best_rated(data=users_recipes_ratings_df, n=10):
    """
    Return top n recipes by maximum mean rating. In case of draw, then by minimum standard deviation rating.
    """
    # top rated
    recipe_rating_mean = users_recipes_ratings_df.groupby(['recipe'])['rating'].mean()
    recipe_rating_std = users_recipes_ratings_df.groupby(['recipe'])['rating'].std()
    recipe_rating = pd.concat([recipe_rating_mean, recipe_rating_std], axis=1)
    recipe_rating.columns=["mean", "std"]
    recs = recipe_rating.sort(["mean", "std"], ascending=[0,1])
    
    return list(recs.index.values)[:n]

In [154]:
best_rated(100)



[u'106881',
 u'107166',
 u'107668',
 u'107680',
 u'108930',
 u'110600',
 u'111322',
 u'112629',
 u'113252',
 u'113789']

### Collaborative filtering

In [41]:
import numpy as np
import pandas as pd
import csv
import graphlab as gl

#### Option 1: Built the recommender + recommend
Execution time: 3.13 s

In [100]:
def collaborative_filtering(data=users_recipes_ratings_df, user=["user_1"], n=10):
    
    t1 = time
    # select only user, recipe, ratings (what we need)
    df = users_recipes_ratings_df[["user","recipe", "rating"]]
    df["rating"] = df["rating"].astype(float)
    
    sf = gl.SFrame(df)
                                       
    collaborative_filtering = gl.item_similarity_recommender.create(sf, user_id='user', item_id='recipe', target='rating',                                                similarity_type='pearson')
    pred_itemitem_pearson = collaborative_filtering.predict(sf)
    recs_itemitem_pearson = collaborative_filtering.recommend(users=user, k=n)     # top 10 collaborative filtering recomendations

    return list(recs_itemitem_pearson["recipe"])

In [109]:
import time
start = time.time()
recs = collaborative_filtering()

print("Execution time:", time.time()-start)
print(recs)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


('Execution time:', 3.1365580558776855)
['96181', '205980', '295802', '300909', 'sticky-toffee-puddings', '296256', '296621', '80843', '268707', '300037']


#### Option 2: Load recommender and recommend in two parts
Recommendation time execution: 0.61s

In [60]:
def load_collaborative_filtering(data=users_recipes_ratings_df):
    
    # select only user, recipe, ratings (what we need)
    df = users_recipes_ratings_df[["user","recipe", "rating"]]
    df["rating"] = df["rating"].astype(float)
    
    global sf
    sf = gl.SFrame(df)
                                       
    collaborative_filtering = gl.item_similarity_recommender.create(sf, user_id='user', item_id='recipe', target='rating',                                                similarity_type='pearson')
    return collaborative_filtering

In [61]:
recom = load_collaborative_filtering(data=users_recipes_ratings_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [104]:
def recommend_collaborative_filtering(recommender_object, user=["user_1"], n=10):

    pred_itemitem_pearson = recommender_object.predict(sf)
    recs_itemitem_pearson = recommender_object.recommend(users=user, k=n)     # top 10 collaborative filtering recomendations

    return list(recs_itemitem_pearson["recipe"])

In [106]:
import time
start = time.time()
recs = recommend_collaborative_filtering(recom, user=["user_1"], n=10)

print("Execution time:", time.time()-start)

('Execution time:', 0.6113440990447998)


### Content-based Recommender

col_recipes_ingredients.find() iterates over each recipes:[products]

In [82]:
for key, value in recipes_ingredients_dic.items():
    print(key, value)

chicken-pumpkin-curry ['ghee', 'oil', 'onion', 'pack', 'asda', 'chicken', 'thigh', 'fillet', 'skin', 'garlic', 'clove', 'ginger', 'turmeric', 'cumin', 'asda', 'garam', 'masala', 'chillie', 'can', 'coconut', 'milk', 'pumpkin', 'skin', 'fibre', 'cornflour', 'cold', 'water', 'make', 'paste', 'coriander', 'rice', 'serve']
masterclass-pulled-ham-creamy-herb-salad ['butcher’', 'gammon', 'joint', 'cider', 'bag', 'asda', 'tender', 'leaf', 'salad', 'caper', 'frozen', 'pea', 'minute', 'radishe', 'chosen', 'gherkin', 'cider', 'vinegar', 'olive', 'oil', 'fat', 'crème', 'fraîche', 'dijon', 'mustard', 'parsley']
banana-pecan-date-muffins ['butter', 'plu', 'light', 'soft', 'brown', 'sugar', 'ripe', 'banana', 'weight', 'egg', 'beaten', 'flour', 'salt', 'powder', 'cinnamon', 'plu', 'pecan', 'date', 'raisin-size', 'piece', 'sugar']
easy-peasy-fish-cakes ['pack', 'smash', 'instant', 'mash', 'potato', 'can', 'asda', 'tuna', 'chunk', 'water', 'chive', 'pinch', 'cayenne', 'egg', 'beaten', 'butter', 'asda', 

lighter-dauphinoise ['butter', 'olive', 'oil', 'onion', 'clove', 'garlic', 'mari', 'piper', 'potatoe', 'edward', 'potatoe', 'weight', 'cornflour', 'dijon', 'mustard', 'milk', 'reduced-fat', 'crème', 'fraîche', 'reduced-fat', 'mature', 'cheddar']
individual-spooky-beef-pies ['pack', 'asda', 'casserole', 'beef', 'flour', 'plu', 'sunflower', 'oil', 'onion', 'beef', 'stock', 'cube', 'worcestershire', 'sauce', 'asda', 'brown', 'sauce', 'asda', 'tomato', 'ketchup', 'butternut', 'squash', 'flesh', 'cube', 'jus-rol', 'shortcrust', 'pastry', 'egg', 'beaten']
bacon-pumpkin-soup ['rasher', 'streaky', 'bacon', 'sunflower', 'oil', 'butter', 'onion', 'pumpkin', 'skin', 'fibre', 'potato', 'split', 'lentil', 'litre', 'hot', 'vegetable', 'stock', 'thyme', 'crème', 'fraîche']
dutch-baby-pancake ['egg', 'almond', 'milk', 'sugar', 'vanilla', 'extract', 'flour', 'sunflower', 'oil', 'raspberrie', 'blueberrie', 'blackberrie', 'sugar', 'serve']
gin-fizz ['gordon’', 'elderflower', 'gin', 'elderflower', 'cordia

loukoumades-honey-doughnuts ['flour', 'sachet', 'asda', 'easy', 'bake', 'yeast', 'cinnamon', 'honey', 'lemon', 'zest', 'juice', 'plu', 'zest', 'rosewater', 'sunflower', 'oil']
turkey-bang-bang-wraps ['pack', 'asda', 'turkey', 'breast', 'strip', 'sunflower', 'oil', 'asda', 'soy', 'sauce', 'sweet', 'chilli', 'sauce', 'peanut', 'butter', 'tahini', 'asda', 'mexican', 'wrap', 'onion', 'piece', 'cucumber', 'thin', 'stick', 'pepper']
salmon-spinach-lasagne ['salmon', 'fillet', 'pack', 'asda', 'spinach', 'pinch', 'nutmeg', 'optional', 'pouche', 'asda', 'parsley', 'sauce', 'sheet', 'asda', 'egg', 'lasagne', 'cheddar', 'salad', 'serve']
salmon-en-croute ['onion', 'root', 'ginger', 'pepper', 'sunflower', 'oil', 'good', 'pinch', 'chillie', 'soy', 'sauce', 'apricot', 'asda', 'special', 'apricot', 'conserve', 'egg', 'pack', 'asda', 'chosen', 'ready', 'puff', 'pastry', 'salmon', 'fillet', 'fish', 'counter', 'asda', 'special', 'tenderstem', 'broccoli', 'potatoe', 'serve']
masterclass-maple-roasted-par

wonkas-gobstopper-cake ['caster', 'sugar', 'soft', 'margarine', 'butter', 'flour', 'egg', 'vanilla', 'extract', 'sweet', 'smartie', 'million', 'raspberry', 'jam', 'sugar', 'fine', 'paint', 'brush', 'blue', 'asda', 'natural', 'food', 'liquid']
quesadillas ['bunch', 'onion', 'asda', 'chillie', 'optional', 'sunflower', 'oil', 'can', 'asda', 'kidney', 'bean', 'can', 'asda', 'cannellini', 'bean', 'asda', 'organic', 'tomatoe', 'feta', 'cheese', 'mature', 'cheddar', 'cheese', 'pack', 'asda', 'mexican', 'flour', 'tortilla', 'tortilla']
moroccan-style-chick-pea-soup ['olive', 'oil', 'onion', 'root', 'ginger', 'coriander', 'cumin', 'cinnamon', 'sweet', 'potato', 'weight', 'cube', 'carrot', 'can', 'tomatoe', 'vegetable', 'stock', 'can', 'chick', 'pea', 'pitta', 'bread']
witches-wands ['pack', 'asda', 'ready', 'bake', 'shortbread', 'dough', 'flour', 'wooden', 'stick', 'royal', 'sugar', 'pot', 'asda', 'pink', 'glimmer']
easter-biscuits ['butter', 'caster', 'sugar', 'lemon', 'zest', 'free-range', 'e