# Connected Recommenders

In [1]:
import pandas as pd
import pymongo
import operator

def _connect_mongo():
    """ Connects to Mongo """
    global conn
    try:
        #use your database name, user and password here:
        #mongodb://<dbuser>:<dbpassword>@<mlab_url>.mlab.com:57066/<database_name>
        with open("credentials.txt", 'r') as f:
            [name,password,url,dbname]=f.read().splitlines()
        conn=pymongo.MongoClient("mongodb://{}:{}@{}/{}".format(name,password,url,dbname))
        
        print ("Connected successfully to", dbname)

    except pymongo.errors.ConnectionFailure as e:
        print ("Could not connect to MongoDB: %s" % e) 
        
    print(conn)
    db=conn["agile_data_science_group_3"]
    
    return db


def read_collection_as_df(collection_name, no_id=True):
    """ Read from Mongo and Store into DataFrame """

    # Connect to MongoDB
    db = _connect_mongo()
    
    collection = db[collection_name]
    
    # Make a query to the specific DB and Collection
    cursor = collection.find()

    # Expand the cursor and construct the DataFrame
    df =  pd.DataFrame(list(cursor)) 

    conn.close()
    
    return df

def read_collection_as_dict(collection_name):
    """ Read from Mongo and Store into dict """

    # Connect to MongoDB
    db = _connect_mongo()
    
    collection = db[collection_name]

    l=list()
    for d in collection.find():
        l.append(d)
      
    dic=dict()
    for d in l:
        for key, value in d.items():
            if key!='_id':
                dic[key] = value
    
    conn.close()
    
    return dic

In [2]:
users_recipes_ratings_df = read_collection_as_df("users_recipes_ratings")
recipes_ingredients_dic = read_collection_as_dict("recipes_ingredients")

Connected successfully to agile_data_science_group_3
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)
Connected successfully to agile_data_science_group_3
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


In [4]:
recipes_ingredients_dic.keys()

dict_keys(['chicken-pumpkin-curry', 'masterclass-pulled-ham-creamy-herb-salad', 'banana-pecan-date-muffins', 'easy-peasy-fish-cakes', 'fruity-fro-yo-bombe', 'salmon-fillets-with-pink-grapefruit-salsa', 'mac-n-cheese-salad', 'ginger-cake', 'nadiya-hussain-berry-breakfast-muffins', 'three-bean-salad', 'glazed-chicken-vegetable-roast', 'beetroot-potato-chorizo-hash', 'spinach-prawn-mango-salad', 'chocolate-bread-butter-pudding', 'tuna-with-couscous', 'chicken-broccoli-pie', 'pork-chops-with-mushroom-sauce', 'chicken-bean-hotpot', 'baked-parma-ham-and-gorgonzola-figs', 'fondant-fancies', 'crepe-in-orange-liqueur', 'summer-fruit-pudding-with-basil', 'carrot-shaped-chocolate-cake', 'moules-marinieres', 'tomato-quiche', 'pea-ham-soup', 'spiced-persian-turkey', 'chicken-and-chickpea-hotpot', 'tunisian-style-omelette', 'smoky-mexican-pork-fajitas', 'chicken-couscous-salad', 'butternut-squash-quesadillas', 'orange-carrot-soup', 'greek-style-slow-roasted-leg-of-lamb', 'rich-chocolate-ginger-cupca

In [4]:
recipes_ingredients_dic

{'chicken-pumpkin-curry': ['ghee',
  'oil',
  'onion',
  'pack',
  'asda',
  'chicken',
  'thigh',
  'fillet',
  'skin',
  'garlic',
  'clove',
  'ginger',
  'turmeric',
  'cumin',
  'asda',
  'garam',
  'masala',
  'chillie',
  'can',
  'coconut',
  'milk',
  'pumpkin',
  'skin',
  'fibre',
  'cornflour',
  'cold',
  'water',
  'make',
  'paste',
  'coriander',
  'rice',
  'serve'],
 'masterclass-pulled-ham-creamy-herb-salad': ['butcher’',
  'gammon',
  'joint',
  'cider',
  'bag',
  'asda',
  'tender',
  'leaf',
  'salad',
  'caper',
  'frozen',
  'pea',
  'minute',
  'radishe',
  'chosen',
  'gherkin',
  'cider',
  'vinegar',
  'olive',
  'oil',
  'fat',
  'crème',
  'fraîche',
  'dijon',
  'mustard',
  'parsley'],
 'banana-pecan-date-muffins': ['butter',
  'plu',
  'light',
  'soft',
  'brown',
  'sugar',
  'ripe',
  'banana',
  'weight',
  'egg',
  'beaten',
  'flour',
  'salt',
  'powder',
  'cinnamon',
  'plu',
  'pecan',
  'date',
  'raisin-size',
  'piece',
  'sugar'],
 'easy-

### Search Engine

In [5]:
def search_engine(user_input, recipes_dict = read_collection_as_dict("recipes_ingredients")):
    """
    Given a list of ingredients from user and the "recipes: [ingredients]" dictionary, order the recipes by a score of suitability.
    It also provides those matching ingredients and missing ingredients for each recommendation. 
    
    Input: 
    - user_input as list. Example:["ingedient1", "ingredient2", ...]
    - recipes_dict as dictionary. Example: "{recipe1: [ingredients], recipe2: [ingredients] recipe3: [ingredients], ...}"
    
    The output are: 
    - recomendations as list. Example: [recipe134, recipe43234, recipe544, ...]
    - matchings as list of sets. Example: [{ingedient1, ingedient2}, {ingedient1}, ...]
    - missings as list of sets. Example: [{}, {ingredient43}, {ingredient32, ingredient45}, ...]
    """
    
    matching = dict()
    missing = dict()
    score = dict()
    
    user_ingredients = set(user_input)
    
    for recipe in recipes_dict.keys():
        
        recom_ingredients = set(recipes_dict[recipe])
        
        matching[recipe] = user_ingredients.intersection(recom_ingredients)   # ingredients in user_input & in recommendation  
        missing[recipe] = recom_ingredients.difference(user_ingredients)   # ingredients from recommendation that user_input not have
        score[recipe] = 10*len(matching[recipe])/(len(missing[recipe])+ len(matching[recipe]))
    
    recommendations_sorted = sorted(score.items(), key=operator.itemgetter(1), reverse=True)
    matching_sorted = [matching[recipe] for recipe, score in dict(recommendations_sorted).items()]
    missing_sorted = [missing[recipe] for recipe, score in dict(recommendations_sorted).items()]
    recommendations_sorted = [recipe_score[0] for recipe_score in recommendations_sorted]
    
    return recommendations_sorted, matching_sorted, missing_sorted

Connected successfully to agile_data_science_group_3
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


In [6]:
grocery = ["egg", "chicken"]

recom_list, matching_ingredients, missing_ingredients = search_engine(grocery, recipes_ingredients_dic)

In [7]:
recom_list

['basic-royal-icing',
 'icing-recipe',
 'oven-cooked-new-potatoes',
 'chicken-with-parmesan-lemon-and-thyme-recipe',
 'meringue-nests',
 'masterclass-chicken-gravy',
 'scrambled-eggs',
 'chocolate-mousse',
 'cauliflower-fritters',
 'prosciutto-wrapped-asparagus',
 'flipping-fantastic-pancakes',
 'sponge-recipe',
 'victoria-sponge-cake',
 'kluski-polish-dumplings',
 'chicken-sweetcorn-quiche',
 'crispy-courgette-sticks',
 'banana-pancakes',
 'zesty-lime-coriander-chicken-noodles',
 'chicken-stir-fry',
 'zesty-lemon-cupcakes',
 'bara-brith',
 'cheesy-chicken-tomato-bake',
 'tombstone-biscuits',
 'meringue-bones-and-blood-dip',
 'kransekake',
 'ghoulish-ghosts',
 'kid-friendly-pancakes',
 'pike',
 'miguel-barclays-green-shakshuka',
 'clementine-lemon-curd',
 'tipsy-cherry-delight',
 'cheese-red-onion-baked-omelette',
 'make-your-own-fortune-cookie',
 'lemon-curd',
 'autumn-apple-cake',
 'breaded-chicken-drumsticks',
 'spanish-tortilla',
 'potato-chicken-pad-thai',
 'homemade-chicken-nugge

In [8]:
matching_ingredients

[{'egg'},
 {'egg'},
 {'chicken'},
 {'chicken', 'egg'},
 {'egg'},
 {'chicken'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'chicken', 'egg'},
 {'egg'},
 {'egg'},
 {'chicken', 'egg'},
 {'chicken', 'egg'},
 {'egg'},
 {'egg'},
 {'chicken'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'chicken', 'egg'},
 {'egg'},
 {'chicken', 'egg'},
 {'chicken', 'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'chicken', 'egg'},
 {'chicken', 'egg'},
 {'chicken', 'egg'},
 {'chicken', 'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'chicken', 'egg'},
 {'egg'},
 {'chicken'},
 {'egg'},
 {'egg'},
 {'chicken'},
 {'egg'},
 {'chicken', 'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'egg'},
 {'chicken'},
 {'chicken', 'egg'},
 {'chicken'},
 {'egg'},
 {'egg'},
 {'egg'}

In [9]:
missing_ingredients

[{'sugar'},
 {'sugar'},
 {'butter', 'potatoe', 'stock'},
 {'breadcrumb',
  'breast',
  'butter',
  'fillet',
  'flour',
  'lemon',
  'parmesan',
  'sprig',
  'thyme'},
 {'cherrie', 'cream', 'double', 'strawberrie', 'sugar'},
 {'flour', 'frozen', 'juice', 'roast', 'stock'},
 {'bread', 'butter', 'milk', 'serve', 'wholegrain'},
 {'chocolate', 'cream', 'dark', 'free-range', 'yolk'},
 {'cauliflower', 'floret', 'flour', 'milk', 'oil'},
 {'asparagu', 'chosen', 'grower’', 'pack', 'prosciutto', 'serve'},
 {'flour', 'free-range', 'milk', 'oil', 'pinch', 'salt'},
 {'butter', 'caster', 'extract', 'flour', 'sugar', 'vanilla'},
 {'butter', 'caster', 'flour', 'jam', 'raspberry', 'sugar'},
 {'beaten', 'butter', 'flour', 'salt', 'warm', 'water'},
 {'bag',
  'bistro',
  'case',
  'cream',
  'double',
  'leftover',
  'pastry',
  'potato',
  'salad',
  'savoury',
  'sweetcorn',
  'wedge'},
 {'courgette', 'flour', 'lengthwise', 'milk', 'polenta', 'stick'},
 {'banana', 'blueberrie', 'pack', 'raspberrie', 'r

Testing:

In [10]:
set(recipes_ingredients_dic['basic-royal-icing'])

{'egg', 'sugar'}

In [11]:
set(recipes_ingredients_dic['icing-recipe'])

{'egg', 'sugar'}

### Best Rated

In [13]:
import numpy as np
import pandas as pd
import csv
#import graphlab as gl

In [14]:
def best_rated_recommender(data=users_recipes_ratings_df):
    #convert to SFrame (internal dataframe format of graphlab)
    sf = gl.SFrame(data)
    # top rated
    recipe_rating_stats = sf.groupby(key_columns='recipe',
                                    operations={'mean_rating': gl.aggregate.MEAN('rating'), 
                                                'std_rating': gl.aggregate.STD('rating')})
    return recipe_rating_stats.sort([('mean_rating', True), ('std_rating', False)])

### Collaborative filtering

In [15]:
import numpy as np
import pandas as pd
import csv
#import graphlab as gl

In [16]:
def collaborative_filtering(data=users_recipes_ratings_df, user=["user_1"], K=10):
    sf = gl.SFrame(data)
    
    collaborative_filtering = gl.item_similarity_recommender.create(sf, user_id='user', item_id='recipe', target='rating',                                                similarity_type='pearson')
    pred_itemitem_pearson = collaborative_filtering.predict(sf)
    recs_itemitem_pearson = collaborative_filtering.recommend(users=user, k=K)     # top 10 collaborative filtering recomendations

    return recs_itemitem_pearson

### Content-based Recommender

col_recipes_ingredients.find() iterates over each recipes:[products]

In [17]:
for key, value in recipes_ingredients_dic.items():
    print(key, value)

chicken-pumpkin-curry ['ghee', 'oil', 'onion', 'pack', 'asda', 'chicken', 'thigh', 'fillet', 'skin', 'garlic', 'clove', 'ginger', 'turmeric', 'cumin', 'asda', 'garam', 'masala', 'chillie', 'can', 'coconut', 'milk', 'pumpkin', 'skin', 'fibre', 'cornflour', 'cold', 'water', 'make', 'paste', 'coriander', 'rice', 'serve']
masterclass-pulled-ham-creamy-herb-salad ['butcher’', 'gammon', 'joint', 'cider', 'bag', 'asda', 'tender', 'leaf', 'salad', 'caper', 'frozen', 'pea', 'minute', 'radishe', 'chosen', 'gherkin', 'cider', 'vinegar', 'olive', 'oil', 'fat', 'crème', 'fraîche', 'dijon', 'mustard', 'parsley']
banana-pecan-date-muffins ['butter', 'plu', 'light', 'soft', 'brown', 'sugar', 'ripe', 'banana', 'weight', 'egg', 'beaten', 'flour', 'salt', 'powder', 'cinnamon', 'plu', 'pecan', 'date', 'raisin-size', 'piece', 'sugar']
easy-peasy-fish-cakes ['pack', 'smash', 'instant', 'mash', 'potato', 'can', 'asda', 'tuna', 'chunk', 'water', 'chive', 'pinch', 'cayenne', 'egg', 'beaten', 'butter', 'asda', 

smoky-pork-burgers ['asda', 'butcher’', 'pork', 'mince', 'les', 'fat', 'onion', 'jack', 'daniel', 'whiskey', 'egg', 'yolk', 'rasher', 'streaky', 'bacon', 'asda', 'baker’', 'burger', 'bun', 'asda', 'garlic', 'squeezy', 'mayonnaise', 'co', 'lettuce', 'tomato', 'rocket', 'optional']
pulled-ham-kale-lentil-salad-with-fried-egg ['olive', 'oil', 'cider', 'vinegar', 'english', 'mustard', 'bag', 'grower’', 'british', 'kale', 'bag', 'good', 'green', 'lentil', 'parsley', 'stem', 'torn', 'frozen', 'pea', 'onion', 'chosen', 'ham', 'egg']
sweet-potato-chorizo-soup ['onion', 'olive', 'oil', 'butter', 'carrot', 'sweet', 'potatoe', 'paprika', 'hot', 'chicken', 'stock', 'full-fat', 'milk', 'asda', 'special', 'chorizo', 'crusty', 'bread', 'serve']
homemade-burger ['slice', 'bread', 'day', 'old', 'pack', 'asda', 'british', 'lean', 'beef', 'steak', 'mince', 'lean', 'garlic', 'clove', 'shallot', 'egg', 'yolk', 'asda', 'dijon', 'mustard', 'olive', 'oil', 'worcester', 'sauce', 'flat-leaf', 'parsley', 'slice'

In [28]:
recipes_ingredients_dic.items()

dict_items([('chicken-pumpkin-curry', ['ghee', 'oil', 'onion', 'pack', 'asda', 'chicken', 'thigh', 'fillet', 'skin', 'garlic', 'clove', 'ginger', 'turmeric', 'cumin', 'asda', 'garam', 'masala', 'chillie', 'can', 'coconut', 'milk', 'pumpkin', 'skin', 'fibre', 'cornflour', 'cold', 'water', 'make', 'paste', 'coriander', 'rice', 'serve']), ('masterclass-pulled-ham-creamy-herb-salad', ['butcher’', 'gammon', 'joint', 'cider', 'bag', 'asda', 'tender', 'leaf', 'salad', 'caper', 'frozen', 'pea', 'minute', 'radishe', 'chosen', 'gherkin', 'cider', 'vinegar', 'olive', 'oil', 'fat', 'crème', 'fraîche', 'dijon', 'mustard', 'parsley']), ('banana-pecan-date-muffins', ['butter', 'plu', 'light', 'soft', 'brown', 'sugar', 'ripe', 'banana', 'weight', 'egg', 'beaten', 'flour', 'salt', 'powder', 'cinnamon', 'plu', 'pecan', 'date', 'raisin-size', 'piece', 'sugar']), ('easy-peasy-fish-cakes', ['pack', 'smash', 'instant', 'mash', 'potato', 'can', 'asda', 'tuna', 'chunk', 'water', 'chive', 'pinch', 'cayenne', '

In [14]:
def distance_recipes(recipe1,recipe2):
    rec1=set(recipes_ingredients_dic[recipe1])
    rec2=set(recipes_ingredients_dic[recipe2])
    return len(rec1.intersection(rec2))/len(rec1)

In [15]:
distance_recipes('fruity-fro-yo-bombe','banana-pecan-date-muffins')

0.08695652173913043

In [16]:
def rec_user(recipe1,recipes_dict = read_collection_as_dict("recipes_ingredients")):
    
    dis = dict()
    rec1 = set(recipe1)
    
    for rec in recipes_dict.keys():
        dis[rec] = distance_recipes(recipe1,rec)

    df_return = sorted(dis.items(),key=operator.itemgetter(1),reverse=True)[0:10]
    
    return df_return

Connected successfully to agile_data_science_group_3
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


In [17]:
rec_user('banana-pecan-date-muffins',recipes_dict = read_collection_as_dict("recipes_ingredients"))

Connected successfully to agile_data_science_group_3
MongoClient(host=['ds233895.mlab.com:33895'], document_class=dict, tz_aware=False, connect=True)


[('banana-pecan-date-muffins', 1.0),
 ('banana-muffins', 0.6842105263157895),
 ('pumpkin-fruit-tea-bread', 0.6842105263157895),
 ('alice-in-wonderland-teapot-cake', 0.5789473684210527),
 ('banana-pecan-honey-loaf', 0.5789473684210527),
 ('gluten-free-carrot-cake', 0.5789473684210527),
 ('apple-crumble-parmesan-cake', 0.5263157894736842),
 ('deep-dish-apple-crumble-pie', 0.5263157894736842),
 ('ginger-pear-upside-down-cake', 0.5263157894736842),
 ('caramelised-pear-chocolate-upside-down-cake', 0.5263157894736842)]