In [17]:
import numpy as np
import pandas as pd
import pymongo
import bson
from tqdm import tqdm
from bson.objectid import ObjectId
import operator
from random import shuffle

In [30]:
class MongoServer():
    credentials = None
    conn = None
    db = None
    collections = {}
    def __init__(self, credentials, run = False, db_name = "agile_data_science_group_3"):
        if run:
            if not self.connect2Mongo(credentials, db_name):
                print("Connection to server Failed.")
            
            if not self.connect2DataBase(db_name):
                print("Connection Data Base Failed.")
    """Rotine to connect to Mongo DB"""    
    def connect2Mongo(self, credentials, db):
        try:
            #use your database name, user and password here:
            name,password,url,dbname = credentials['name'], credentials['password'], credentials['url'], credentials['dbname']
            conn=pymongo.MongoClient("mongodb://{}:{}@{}/{}".format(name,password,url,dbname))
            self.conn = conn
            return True
        except pymongo.errors.ConnectionFailure as e:
            return False
    
    """Routine to connect to a Data Base"""
    def connect2DataBase(self, db_name = "agile_data_science_group_3"):
        try:
            self.db = self.conn[db_name]
            return True
        except:
            return False
    
    """Return the available collections in a data base"""
    def listOfCollections (self):
        return self.db.collection_names()
    
    """Donwload all the ollections from the data base"""
    def getAllCollections (self):
        collections = self.listOfCollections()
        for col in collections:
            self.getCollectionFromServer(col)
        return True
    
    
    """Routine to get one collection of the Data Base"""
    # return the collection
    def getCollectionFromServer (self, name_collection):
        if name_collection in self.db.collection_names():
            self.collections[name_collection] = self.db.get_collection(name_collection)
            print ("Collection ",name_collection," Update in Local.")
            return True
        return False
    
    """ Take the collection from the local copy"""
    def getCollection(self, name_collection):
        if not name_collection in self.collections:
            if not self.getCollectionFromServer(name_collection):
                return False
        return self.collections[name_collection]
    
    """ Gets all the items of the collection"""
    def getItems(self, name_collection, N = None):
        if not name_collection in self.collections:
            if not self.getCollectionFromServer(name_collection):
                return False
        if N != None:
            return [element for element in self.collections[name_collection].find().limit(N)]
        else:
            return [element for element in self.collections[name_collection].find()]
    
    """Query in one Collection"""
    def searchInCollection(self, name_collection, field, patro, N = None):
        if N == None:
            return [element for element in self.db.get_collection(name_collection).find({field:patro})]
        else:
            return [element for element in self.db.get_collection(name_collection).find({field:patro}).limit(N)]
    
    """Query in all the Collections"""
    def searchInDB (self, field, patro):
        query = {}
        for collection in self.db.collection_names():
            query[collection] = [element for element in self.db.get_collection(collection).find({field:patro})]
        return query
    
    """Find one in the collection"""
    def findOne(self, collection_name):
        return self.db.get_collection(collection_name).find_one()
    
    """Seach in collection with multiple querys"""
    def searchWithMultiplyConditions(self, collection_name, _query, condition ='$and', N = 6, skip = 0):
        query = []
        for item in self.db.get_collection(collection_name).find({condition:_query}).limit( N + skip):
            query.append(item)
        return query[skip:]
             
    """Find N elements in one collection"""
    def findNElement(self, collection_name, N):
        query = []
        for item in self.db.get_collection(collection_name).find().limit( N ):
            query.append(item)
        return query
    
    """Insert one element into collection"""
    def insertInCollection (self, collection_name,  item):
        assert type(item) == {}, "Item must be a dictionary"
        return self.db.get_collection(collection_name).insert(item)
     
        
MS = MongoServer({'name':'huang', 'password':'chen1992', 'url':'ds233895.mlab.com:33895', 'dbname': 'agile_data_science_group_3'})   

In [34]:
class Recommender:
    
    def __init__(self, credentials):
        # connect to mongo with MongoServer object
        self.server = MongoServer(credentials, True)
        
    """Dummie Recommender"""
    def dummieRecommendation(self, N = 10):
        listObjectIds = []
        for item in self.server.findNElement('recipes', 1000):
            listObjectIds.append(item['_id'])
        shuffle(listObjectIds)
        return listObjectIds[:N]
        
    """Method that check if the object is a ObjectId"""
    def isObjectId(self, _id):
        try:
            # Do a query in a specific user collection
            if not type(_id) == bson.objectid.ObjectId:
                # creation of a objectID
                if type(_id) == str:
                    idUser = bson.objectid.ObjectId(_id)
                else:
                    return None
            return _id
        except:
            return None
            
    """Search user by idRecepie(ObjectId)""" 
    def searchRecepieWithIngredientsByIs(self, idRecepie):
        idRecepie = self.isObjectId(idRecepie)
        if idRecepie == None:
            print ("idRecepie is not a ObjectId")
            return []
        
        # Search the recepie    
        _collection = 'recipes_ingredients'
        _field = '_id' # ObjectId
        _patro = idRecepie
        query = self.server.searchInCollection(name_collection = _collection, field = _field, patro = _patro)[0]    
        # obtain the ingredients
        return query, query.values()[1]
    

    """Search user by idUser(ObjectId)"""
    def searchUsersById(self, idUser):
        idUser = self.isObjectId(idUser)
        if idUser == None:
            print ("Id User is not a ObjectId")
            return []
        
        _collection = 'users'
        _field = '_id' # ObjectId
        _patro = idUser
        query = self.server.searchInCollection(name_collection = _collection, field = _field, patro = _patro)[0]
        
        return query
    
    """Method that compute a smaller matrix for BestRated"""
    def computeRecommenderMatrixBestRated(self, idRecipe):
        # look the ingredients used in the recipe
        # look the recipe that use on of the ingridients
        # compute Recommender Matrix 
        return None
    
    """Method that compute a smaller matrix for collaborativeFiltering"""
    def computeRecommenderMatrixCollaborativeFiltering(self, idUser, n=10):
        m_ids = []
        m_user = []
        
        idUser = self.isObjectId(idUser)
        if idUser == None:
            print ("Id User is not a ObjectId")
            return []
        
        # look for the ratings of the user
        ratingsUser = self.server.searchInCollection(name_collection='ratings', field='user_id', patro=idUser, N=n) 
        if ratingsUser == []:
            print ("User has no ratings. Cold Start.")
            return None
        ratings = []
        m_user.append(idUser) 
        
        # obtain ratings from the same recipes
        for rating in tqdm(ratingsUser):
            m_ids.append(rating['recipe_id'])
            # search for more ratings in the same recipe
            recipes = self.server.searchInCollection(name_collection='ratings', field='recipe_id', patro=rating['recipe_id'])
            
            # acumulate the ratings
            ratings += recipes
            
            # look if objectId of recipe is in the list
            for recipe in recipes:
                if not recipe['recipe_id'] in m_ids:
                    m_ids.append(recipe['recipe_id'])
                    
                if not recipe['user_id'] in m_user:
                    m_user.append(recipe['user_id'])

        # compute Recommender Matrix 
        matrix = pd.DataFrame(np.full((len(m_ids), len(m_user)), np.nan), index=m_ids, columns=m_user)
        for rates in ratings:
            matrix[rates['user_id']][rates['recipe_id']] = rates['rating']
        
        return matrix
    
    """Method that donwliad the matrix from ratings and generates this one"""
    def generateRatingMatrix(self):
        m_ids = set()
        m_user = set()
        ratings = self.server.getItems('ratings')
        for item in ratings:
            m_ids.add(item["recipe_id"])
            m_user.add(item["user_id"])
        
        matrix = pd.DataFrame(np.full((len(m_ids), len(m_user)), np.nan), index=m_ids, columns=m_user)
        for rates in ratings:
            matrix[rates['user_id']][rates['recipe_id']] = rates['rating']
        
        return matrix
            
    def computeRecomendation(self):
        # Introduce here the distance function for each of the cases
        # maybe it is necesary to separete the funciton in two
        pass
    
   
    """Recommender of Collaborative Filtering"""
    def collaborativeFiltering(self, idUser, n = 10):
        # take the ratings of the user
        # with the recipes of the user, find which recepes we can generate
        # generate the recommender matrix for user
        # call the distance function
        return self.dummieRecommendation(N)
    
    """ Method that search in function of the ingredients"""
    def searchRecepieByIngredients(self, listIngredients, N = 6, skip = 0):
        query = []
        for ingredient in listIngredients:
            query.append({'ingredients':ingredient})
        
        respons = self.server.searchWithMultiplyConditions('RecIng', query, N = N, skip = skip)
        
        objectsIds = []
        for recepie in respons:
            objectsIds.append(recepie['recipe_id'])
            
        return objectsIds
    
    """ Return top n recipes by maximum mean rating. In case of draw, then by minimum standard deviation rating. """
    def bestRatedWeb(self, n=10):
        data = pd.DataFrame(self.server.getItems('ratings', N = 2000))
        # top rated
        data["rating"] = data["rating"].astype(float)
        recipe_rating_mean = data.groupby(['recipe_id'])['rating'].mean()
        recipe_rating_std = data.groupby(['recipe_id'])['rating'].std()
        recipe_rating = pd.concat([recipe_rating_mean, recipe_rating_std], axis=1)
        recipe_rating.columns=["mean", "std"]
        recs = recipe_rating.sort_values(["mean", "std"], ascending=[0,1])
        
        return list(map(ObjectId, list(recs.index.values)[:int(n)]))
    
    def distance_recipes(self, ing1, ing2):
        rec1 = set(ing1)
        rec2 = set(ing2)
        d = len(rec1.intersection(rec2))/len(rec1)
        if d == 1.0:
            return 0
        return d

    """Recommender based on content"""
    def bestRated(self, idRecepie, N = 6, skip = 0):
        recipes_dict = self.server.getItems('RecIng', N = 2000)
        ingridents = self.server.searchInCollection('RecIng', 'recipe_id', idRecepie)[0]['ingredients']

        dis = dict()
        for rec in recipes_dict:
            dis[rec['recipe_id']] = self.distance_recipes(ingridents, rec['ingredients'])

        df_return = sorted(dis.items(), key=operator.itemgetter(1), reverse=True)[skip:N]

        return [obj for obj, rat in df_return]
    
rec = Recommender({'name':'huang', 'password':'chen1992', 'url':'ds233895.mlab.com:33895', 'dbname': 'agile_data_science_group_3'})

recomendations = rec.bestRated(bson.objectid.ObjectId('59fb0d77eee3642f3c0685cd'))
#rec.searchRecepieByIngredients(['oil', 'cold', 'water'], 10)
#rec.computeRecommenderMatrixCollaborativeFiltering(bson.objectid.ObjectId('5a1aa2cb2cfaa80d6ff251f3'))
#rec.generateRatingMatrix()
print(rec.bestRated(objectId, N = 4, skip = 0))
print(rec.bestRated(objectId, N = 2, skip = 0))
print(rec.bestRated(objectId, N = 4, skip = 2))

[ObjectId('59fb0d77eee3642f3c0685d5'), ObjectId('59fb0d77eee3642f3c0685da'), ObjectId('59fb0d77eee3642f3c068ca2'), ObjectId('59fb0d77eee3642f3c0688b6')]
[ObjectId('59fb0d77eee3642f3c0685d5'), ObjectId('59fb0d77eee3642f3c0685da')]
[ObjectId('59fb0d77eee3642f3c068ca2'), ObjectId('59fb0d77eee3642f3c0688b6')]


In [13]:
A = {'name':'huang', 'password':'chen1992', 'url':'ds233895.mlab.com:33895', 'dbname': 'agile_data_science_group_3'}
A['name']

'huang'

In [33]:
objectId = recomendations[0]
%timeit rec.bestRated( objectId)

1 loop, best of 3: 540 ms per loop


In [11]:
%timeit rec.bestRatedWeb()

Collection  ratings  Update in Local.
1 loop, best of 3: 238 ms per loop


In [21]:
u1 = rec.server.searchInCollection('recipes', '_id', recp)
u1

[{'_id': ObjectId('59fb0d77eee3642f3c068c8a'),
  'energy_KCal': 125,
  'fat_g': '5.1',
  'id': 'superbowl-biscuits',
  'image': 'https://be35832fa5168a30acd6-5c7e0f2623ae37b4a933167fe83d71b5.ssl.cf3.rackcdn.com/3819/superbowl-football-biscuits__square.jpg',
  'list_ingredients': ['125g unsalted butter',
   '100g caster sugar',
   '25g golden syrup',
   '325g plain flour',
   '1 level tsp bicarbonate of soda',
   '2 level tsp ground ginger',
   '½ level tsp ground cinnamon',
   '2 medium eggs(beaten)',
   '500g Silver Spoon Royal Icing',
   'ASDA Orange Natural Food Colouring',
   'ASDA Blue Natural Food Colouring',
   'ASDA Black Natural Food Colouring',
   'ASDA Red Natural Food Colouring',
   'ASDA Piping Bag & Nozzle Set'],
  'name': 'Superbowl biscuits',
  'ningredients': 14.0,
  'price_p': '57',
  'salt_mg': '0.25',
  'saturated_fat_g': '3.1',
  'serves': '23',
  'sugar_g': '5.8',
  'time_min': '140',
  'url': 'https://www.asdagoodliving.co.uk/food/recipes/superbowl-biscuits'}]

In [None]:
import timeit
rec.computeRecommenderMatrixCollaborativeFiltering(u1)

In [None]:
%timeit rec.searchRecepieByIngredients(['oil', 'water'])

In [20]:
rec.server.listOfCollections()

['test',
 'ingredients',
 'RecIng',
 'system.indexes',
 'users',
 'recipes',
 'recipes_ingredients',
 'ratings']

In [25]:
rec.server.findOne('RecIng')

{'_id': ObjectId('5a306ed890ea86097ce0dbe6'),
 'ingredients': ['ghee',
  'oil',
  'onion',
  'pack',
  'asda',
  'chicken',
  'thigh',
  'fillet',
  'skin',
  'garlic',
  'clove',
  'ginger',
  'turmeric',
  'cumin',
  'asda',
  'garam',
  'masala',
  'chillie',
  'can',
  'coconut',
  'milk',
  'pumpkin',
  'skin',
  'fibre',
  'cornflour',
  'cold',
  'water',
  'make',
  'paste',
  'coriander',
  'rice',
  'serve'],
 'recipe_id': ObjectId('59fb0d77eee3642f3c0685cd')}