In [2]:
import numpy as np
import pandas as pd
import pymongo
import bson
from tqdm import tqdm

In [37]:
class MongoServer():
    credentials = None
    conn = None
    db = None
    collections = {}
    def __init__(self, run = False, credentials = "credentials.txt", db_name = "agile_data_science_group_3"):
        if run:
            if not self.connect2Mongo(credentials):
                print("Connection to server Failed.")
            
            if not self.connect2DataBase(db_name):
                print("Connection Data Base Failed.")
    """Rotine to connect to Mongo DB"""    
    def connect2Mongo(self, credentials = "credentials.txt"):
        try:
            #use your database name, user and password here:
            with open(credentials, 'r', encoding='utf-8') as f:
                [name,password,url,dbname]=f.read().splitlines()
            conn=pymongo.MongoClient("mongodb://{}:{}@{}/{}".format(name,password,url,dbname))
            self.conn = conn
            return True
        except pymongo.errors.ConnectionFailure as e:
            return False
    
    """Routine to connect to a Data Base"""
    def connect2DataBase(self, db_name = "agile_data_science_group_3"):
        try:
            self.db = self.conn[db_name]
            return True
        except:
            return False
    
    """Return the available collections in a data base"""
    def listOfCollections (self):
        return self.db.collection_names()
    
    """Donwload all the ollections from the data base"""
    def getAllCollections (self):
        collections = self.listOfCollections()
        for col in collections:
            self.getCollectionFromServer(col)
        return True
    
    
    """Routine to get one collection of the Data Base"""
    # return the collection
    def getCollectionFromServer (self, name_collection):
        if name_collection in self.db.collection_names():
            self.collections[name_collection] = self.db.get_collection(name_collection)
            print ("Collection ",name_collection," Update in Local.")
            return True
        return False
    
    """ Take the collection from the local copy"""
    def getCollection(self, name_collection):
        if not name_collection in self.collections:
            if not self.getCollectionFromServer(name_collection):
                return False
        return self.collections[name_collection]
    
    """ Gets all the items of the collection"""
    def getItems(self, name_collection):
        if not name_collection in self.collections:
            if not self.getCollectionFromServer(name_collection):
                return False
        return [element for element in self.collections[name_collection].find()]
    
    """Query in one Collection"""
    def searchInCollection(self, name_collection, field, patro, N = None):
        if N == None:
            return [element for element in self.db.get_collection(name_collection).find({field:patro})]
        else:
            return [element for element in self.db.get_collection(name_collection).find({field:patro}).limit(N)]
    
    """Query in all the Collections"""
    def searchInDB (self, field, patro):
        query = {}
        for collection in self.db.collection_names():
            query[collection] = [element for element in self.db.get_collection(collection).find({field:patro})]
        return query
    
    """Find one in the collection"""
    def findOne(self, collection_name):
        return self.db.get_collection(collection_name).find_one()
    
    """Seach in collection with multiple querys"""
    def searchWithMultiplyConditions(self, collection_name, _query, N = 10):
        query = []
        for item in self.db.get_collection(collection_name).find({"$and":_query}).limit( N ):
            query.append(item)
        return query
             
    """Find N elements in one collection"""
    def findNElement(self, collection_name, N):
        query = []
        for item in self.db.get_collection(collection_name).find().limit( N ):
            query.append(item)
        return query
    
    """Insert one element into collection"""
    def insertInCollection (self, collection_name,  item):
        assert type(item) == {}, "Item must be a dictionary"
        return self.db.get_collection(collection_name).insert(item)
     
        
MS = MongoServer(True)   

In [34]:
class Recommender:
    
    def __init__(self):
        # connect to mongo with MongoServer object
        self.server = MongoServer(True)
        
    """Dummie Recommender"""
    def dummieRecommendation(self, N = 10):
        listObjectIds = []
        for item in self.server.findNElement('recipes', N):
            listObjectIds.append(item['_id'])
        return listObjectIds
        
    """Method that check if the object is a ObjectId"""
    def isObjectId(self, _id):
        try:
            # Do a query in a specific user collection
            if not type(_id) == bson.objectid.ObjectId:
                # creation of a objectID
                if type(_id) == str:
                    idUser = bson.objectid.ObjectId(_id)
                else:
                    return None
            return _id
        except:
            return None
            
    """Search user by idRecepie(ObjectId)""" 
    def searchRecepieWithIngredientsByIs(self, idRecepie):
        idRecepie = self.isObjectId(idRecepie)
        if idRecepie == None:
            print ("idRecepie is not a ObjectId")
            return []
        
        # Search the recepie    
        _collection = 'recipes_ingredients'
        _field = '_id' # ObjectId
        _patro = idRecepie
        query = self.server.searchInCollection(name_collection = _collection, field = _field, patro = _patro)[0]    
        # obtain the ingredients
        return query, query.values()[1]
    

    """Search user by idUser(ObjectId)"""
    def searchUsersById(self, idUser):
        idUser = self.isObjectId(idUser)
        if idUser == None:
            print ("Id User is not a ObjectId")
            return []
        
        _collection = 'users'
        _field = '_id' # ObjectId
        _patro = idUser
        query = self.server.searchInCollection(name_collection = _collection, field = _field, patro = _patro)[0]
        
        return query
    
    """Method that compute a smaller matrix for BestRated"""
    def computeRecommenderMatrixBestRated(self, idRecipe):
        # look the ingredients used in the recipe
        # look the recipe that use on of the ingridients
        # compute Recommender Matrix 
        return None
    
    """Method that compute a smaller matrix for collaborativeFiltering"""
    def computeRecommenderMatrixCollaborativeFiltering(self, idUser, n=20):
        m_ids = []
        m_user = []
        
        idUser = self.isObjectId(idUser)
        if idUser == None:
            print ("Id User is not a ObjectId")
            return []
        
        # look for the ratings of the user
        ratingsUser = self.server.searchInCollection(name_collection='ratings', field='user_id', patro=idUser, N=n) 
        if ratingsUser == []:
            print ("User has no ratings. Cold Start.")
            return None
        ratings = []
        m_user.append(idUser) 
        
        # obtain ratings from the same recipes
        for rating in tqdm(ratingsUser):
            m_ids.append(rating['recipe_id'])
            # search for more ratings in the same recipe
            recipes = self.server.searchInCollection(name_collection='ratings', field='recipe_id', patro=rating['recipe_id'])
            
            # acumulate the ratings
            ratings += recipes
            
            # look if objectId of recipe is in the list
            for recipe in recipes:
                if not recipe['recipe_id'] in m_ids:
                    m_ids.append(recipe['recipe_id'])
                    
                if not recipe['user_id'] in m_user:
                    m_user.append(recipe['user_id'])

        # compute Recommender Matrix 
        matrix = pd.DataFrame(np.full((len(m_ids), len(m_user)), np.nan), index=m_ids, columns=m_user)
        for rates in ratings:
            matrix[rates['user_id']][rates['recipe_id']] = rates['rating']
        
        return matrix
    
    """Method that donwliad the matrix from ratings and generates this one"""
    def generateRatingMatrix(self):
        m_ids = set()
        m_user = set()
        ratings = self.server.getItems('ratings')
        for item in ratings:
            m_ids.add(item["recipe_id"])
            m_user.add(item["user_id"])
        
        matrix = pd.DataFrame(np.full((len(m_ids), len(m_user)), np.nan), index=m_ids, columns=m_user)
        for rates in ratings:
            matrix[rates['user_id']][rates['recipe_id']] = rates['rating']
        
        return matrix
            
    def computeRecomendation(self):
        # Introduce here the distance function for each of the cases
        # maybe it is necesary to separete the funciton in two
        pass
    
    """Recommender based on content"""
    def bestRated(self, idRecepie, n):
        # take the recepie
        # look for all the recepie containing similar ingridients
        # generate the recommender matrix recepie - ingridient
        return self.dummieRecommendation(N)
    
    """Recommender of Collaborative Filtering"""
    def collaborativeFiltering(self, idUser, n = 10):
        # take the ratings of the user
        # with the recipes of the user, find which recepes we can generate
        # generate the recommender matrix for user
        # call the distance function
        return self.dummieRecommendation(N)
    
    """ Method that search in function of the ingredients"""
    def searchRecepieByIngredients(self, listIngredients, N = 10):
        query = []
        for ingredient in listIngredients:
            query.append({'ingredients':ingredient})
        
        respons = self.server.searchWithMultiplyConditions('RecIng', query)
        
        objectsIds = []
        for recepie in respons:
            objectsIds.append(recepie['_id'])
            
        return objectsIds
        
    
    
rec = Recommender()
#rec.searchRecepieByIngredients(['oil', 'cold', 'water'], 10)
#rec.computeRecommenderMatrixCollaborativeFiltering(bson.objectid.ObjectId('5a1aa2cb2cfaa80d6ff251f3'))
#rec.generateRatingMatrix()

In [35]:
u1 = rec.server.findOne('users')['_id']
u1

ObjectId('5a1aa2cb2cfaa80d6ff251f3')

In [36]:
rec.computeRecommenderMatrixCollaborativeFiltering(u1)

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:02<00:00,  8.74it/s]


Unnamed: 0,5a1aa2cb2cfaa80d6ff251f3,5a1aa2cb2cfaa80d6ff25204,5a1aa2cb2cfaa80d6ff25208,5a1aa2cb2cfaa80d6ff2520d,5a1aa2cb2cfaa80d6ff25210,5a1aa2cb2cfaa80d6ff25213,5a1aa2cb2cfaa80d6ff25218,5a2c589294f1473f579ae190,5a2c663094f1474116393736,5a1aa2cb2cfaa80d6ff251fa,...,5a1aa2cb2cfaa80d6ff2520a,5a1aa2cb2cfaa80d6ff25223,5a2c58f794f1473f67719b43,5a1aa2cb2cfaa80d6ff25220,5a1aa2cb2cfaa80d6ff251ff,5a1aa2cb2cfaa80d6ff25209,5a1aa2cb2cfaa80d6ff251f5,5a1aa2cb2cfaa80d6ff251f6,5a1aa2cb2cfaa80d6ff251f8,5a1aa2cb2cfaa80d6ff251fb
59fb0d77eee3642f3c0684a4,2.0,4.0,1.0,2.0,4.0,1.0,4.0,1.0,5.0,,...,,,,,,,,,,
59fb0d77eee3642f3c068409,3.0,,,3.0,,,,,,1.0,...,,,,,,,,,,
59fb0d77eee3642f3c068428,2.0,,,,,,,,,,...,,,,,,,,,,
59fb0d77eee3642f3c06843b,3.0,,,,,,,,,,...,,,,,,,,,,
59fb0d77eee3642f3c06843e,3.0,,,,,,,,,,...,,,,,,,,,,
59fb0d77eee3642f3c06845c,4.0,4.0,,,,,2.0,,,,...,,,,,,,,,,
59fb0d77eee3642f3c06847a,2.0,,,,,,,,3.0,4.0,...,,,,,,,,,,
59fb0d77eee3642f3c068489,4.0,,,,,5.0,,,,,...,,,,,,,,,,
59fb0d77eee3642f3c0684a2,3.0,,,,,3.0,,,,,...,,,,,,,,,,
59fb0d77eee3642f3c0684a3,5.0,,,,,2.0,,,,,...,,,,,,,,,,


In [14]:
rec.server.listOfCollections()

['test',
 'ingredients',
 'RecIng',
 'system.indexes',
 'users',
 'recipes',
 'recipes_ingredients',
 'ratings']