In [1]:
pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install tabulate

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [3]:
# %%
import pandas as pd
import numpy as np
import sklearn
from sklearn.neighbors import NearestNeighbors
from collections import Counter
from tabulate import tabulate
import warnings
warnings.filterwarnings('ignore')

In [4]:
# %%
class Recommender:
    
    def __init__(self,profiles,recent_activity,dataset):
        self.df = dataset
        self.profiles = profiles
        self.recent_activity = recent_activity
    
    def get_features(self,dataframe):
        #getting dummies of dataset
        nutrient_dummies = dataframe.Nutrient.str.get_dummies()
        disease_dummies = dataframe.Disease.str.get_dummies(sep=' ')
        diet_dummies = dataframe.Diet.str.get_dummies(sep=' ')
        feature_df = pd.concat([nutrient_dummies,disease_dummies,diet_dummies],axis=1)
     
        return feature_df
    
    def find_neighbors(self,dataframe,features,k):
        features_df = self.get_features(dataframe)
        total_features = features_df.columns  
        d = dict()
        for i in total_features:
            d[i]= 0
        for i in features:
            d[i] = 1
        final_input = list(d.values())
        
        similar_neighbors = self.k_neighbor([final_input],features_df,dataframe,k)
        return similar_neighbors
    
    def k_neighbor(self,inputs,feature_df,dataframe,k):
        
        #initializing model with k neighbors
        model = NearestNeighbors(n_neighbors=k,algorithm='ball_tree')
        
        # fitting model with dataset features
        model.fit(feature_df)
        
        df_results = pd.DataFrame(columns=list(dataframe.columns))
        
        # getting distance and indices for k nearest neighbor
        distnaces , indices = model.kneighbors(inputs)

        for i in list(indices):
            df_results = pd.concat([df_results, pd.DataFrame(dataframe.loc[i])])

        df_results = df_results.reset_index(drop=True)
        return df_results
    
    def user_based(self,features,user_id):
       
        similar_users = self.find_neighbors(self.profiles,features,10)
        users = list(similar_users.User_Id)
    
        results = self.recent_activity[self.recent_activity.User_Id.isin(users)] #taking acitivies
   
        results = results[results['User_Id']!=user_id] # selecting those which are not reviewed by user
 
        meals = list(results.Meal_Id.unique())
      
        results = self.df[self.df.Meal_Id.isin(meals)]
    
        results = results.filter(['Meal_Id','Name','Nutrient','Veg_Non','description','Price','Review'])

        results = results.drop_duplicates(subset=['Name'])
        results = results.reset_index(drop=True)
        return results
        
    def recent_activity_based(self,user_id):
        recent_df = self.recent_activity[self.recent_activity['User_Id']==user_id]
        meal_ids = list(recent_df.Meal_Id.unique())
        recent_data = self.df[self.df.Meal_Id.isin(meal_ids)][['Nutrient','catagory','Disease','Diet']].reset_index(drop=True)

        disease = []
        diet = []
        for i in range(recent_data.shape[0]):
            for j in recent_data.loc[i,'Disease'].split():
                disease.append(j)
        for i in range(recent_data.shape[0]):
            for j in recent_data.loc[i,'Diet'].split():
                diet.append(j)
                
        value_counts = recent_data.Nutrient.value_counts()
        m = recent_data.Nutrient.value_counts().mean()
        features = list(value_counts[recent_data.Nutrient.value_counts()>m].index)
        a = dict(Counter(disease))
        
        m = np.mean(list(a.values()))
        for i in a.items():
            if i[1]>m:
                features.append(i[0])
        a = dict(Counter(diet))
        m = np.mean(list(a.values()))
        for i in a.items():
            if i[1]>m:
                features.append(i[0])
                
        similar_neighbors = self.find_neighbors(self.df,features,10)
        return similar_neighbors.filter(['Meal_Id','Name','Nutrient','Veg_Non','description','Price','Review'])
        
    def recommend(self,user_id):
        #finding user's profile features by id
        profile = self.profiles[self.profiles['User_Id']==user_id]
        features = []
        features.append(profile['Nutrient'].values[0])
        features.extend(profile['Disease'].values[0].split())
        features.extend(profile['Diet'].values[0].split())
        df1 = self.user_based(features,user_id)
 
        df2 = self.recent_activity_based(user_id)
        df = pd.concat([df1,df2])
      
        df = df.drop_duplicates('description').reset_index(drop=True)
        return df

# %%
print("Nearest Neighbour Recommender:")
user_id = 'User_44'  # user id of current user

profiles = pd.read_csv('../Data processing/user_Profiles.csv') # profiles of all users
recent_activity = pd.read_csv('../Data processing/recent_activity.csv') # recent activities of current user (meals liked,rated,searched,Purchased)
dataset = pd.read_csv('../Data processing/dataset.csv') # main dataset


ob = Recommender(profiles,recent_activity,dataset)
result = ob.recommend(user_id)
print(result)



'''class UserUserCFRecommender(Recommender):
    def user_user_collab_filtering(self, user_id):
        # Filter recent activity for the current user
        user_activity = self.recent_activity[self.recent_activity['User_Id'] == user_id]
        user_meal_ids = user_activity['Meal_Id'].tolist()
        user_purchased_meals = user_activity[user_activity['Purchased'] == 1]['Meal_Id'].tolist()
        print(user_purchased_meals)
        user_purchased = self.df[self.df['Meal_Id'].isin(user_purchased_meals)]
        print(user_purchased)

        # Filter dataset for meals interacted by the current user
        user_meals = self.df[self.df['Meal_Id'].isin(user_meal_ids)]
        print("Here")
        # Create a pivot table of user interactions (ratings/likes) with meals
        #interactions = self.recent_activity.pivot_table(index='User_Id')#user_meals.pivot_table(index='User_Id', columns='Meal_Id', values='Purchased', fill_value=0)
        interactions = user_purchased.pivot_table(index='User_Id', columns='Meal_Id', values='Purchased', fill_value=0)

        print(len(interactions))
        # Calculate cosine similarity between users based on their meal interactions
        similarity_matrix = cosine_similarity(interactions)

        # Find similar users to the current user
        similar_users_indices = np.argsort(similarity_matrix[user_id])[::-1][1:]  # Exclude the user itself

        # Gather meals liked by similar users
        similar_users_meals = interactions.iloc[similar_users_indices]
        recommended_meals = similar_users_meals[similar_users_meals > 0].fillna(0)
        recommended_meals = recommended_meals.sum().sort_values(ascending=False)

        # Filter recommended meals that the user hasn't interacted with yet
        final_recommendations = recommended_meals[~recommended_meals.index.isin(user_meal_ids)].index.tolist()

        return self.df[self.df['Meal_Id'].isin(final_recommendations)][['Meal_Id', 'Name', 'Nutrient', 'Veg_Non', 'description', 'Price', 'Review']]
'''
# Usage:
'''user_id = 'User_71'  # user id of current user
ob = UserUserCFRecommender(profiles, recent_activity, dataset)
result = ob.user_user_collab_filtering(user_id)
print(result)'''

'''class ItemItemRecommender:
    
    def __init__(self, dataset, profiles, recent_activity):
        self.df = dataset
        self.profiles = profiles
        self.recent_activity = recent_activity
    
    def create_item_item_matrix(self):
        # Create a multi-hot encoded matrix for the features
        features_matrix = self.df.pivot_table(index='Meal_Id', columns='Nutrient', aggfunc='size', fill_value=0)
        
        # Adding other categorical features using get_dummies
        other_features = ['Disease', 'Diet']
        for feature in other_features:
            dummies = self.df[feature].str.get_dummies(sep=' ')
            features_matrix = pd.concat([features_matrix, dummies], axis=1)
        
        return features_matrix
    def create_item_item_matrix(self):
        # Create a pivot table with Meal_Id as rows, features as columns, and Review as values
        item_item_matrix = self.df.pivot_table(index='Meal_Id', columns='Nutrient', values='Review').fillna(0)
        return item_item_matrix
    
    def find_similar_items(self, meal_id, item_item_matrix, k=5):
        # Finding similarity between items based on their reviews
        similarities = item_item_matrix.corrwith(item_item_matrix.loc[meal_id], axis=0)
        similarities = similarities.sort_values(ascending=False)
        
        # Exclude the queried meal itself
        similarities = similarities.drop(meal_id)
        
        # Get the top k similar items
        similar_items = similarities.head(k)
        return similar_items
    
    def recommend_items(self, user_id, k=10):
        # Get meals that the user hasn't interacted with
        print(self.profiles.columns)
        profile = self.profiles[self.profiles['User_Id'] == user_id]
        print(profile)
        features = profile.filter(['Nutrient', 'Disease', 'Diet'])
        print(features)
        similar_items = self.find_similar_items(features)
        user_meals = set(self.profiles[self.profiles['User_Id'] == user_id]['Meal_Id'])
        print(user_meals)
        all_meals = set(self.df['Meal_Id'])
        not_interacted = list(all_meals - user_meals)
        print(len(not_interacted))
        # Create an item-item matrix
        item_item_matrix = self.create_item_item_matrix()
        
        recommended_items = {}
        for meal_id in not_interacted:
            # Find similar items for each not-interacted meal
            similar_items = self.find_similar_items(meal_id, item_item_matrix, k)
            
            # Store the top k similar items for each meal
            recommended_items[meal_id] = list(similar_items.index)
            print(recommended_items)
        
        return recommended_items'''



'''class ContentBasedRecommender:
    def __init__(self, dataset, profiles):
        self.df = dataset
        self.profile = profiles
        self.tfidf_matrix, self.tfidf = self.generate_tfidf_matrix()
    
    def generate_tfidf_matrix(self):
        # Create a TF-IDF Vectorizer
        tfidf = TfidfVectorizer(stop_words='english')
        
        # Fill NaN values in 'Nutrient', 'Disease', 'Diet' columns in the dataset
        self.df['Nutrient'] = self.df['Nutrient'].fillna('')
        self.df['Disease'] = self.df['Disease'].fillna('')
        self.df['Diet'] = self.df['Diet'].fillna('')
        
        # Combine text features into a single column for the dataset
        self.df['combined_features'] = self.df['Nutrient'] + ' ' + self.df['Disease'] + ' ' + self.df['Diet']
        
        # Fit and transform the TF-IDF Vectorizer on the dataset
        tfidf_matrix = tfidf.fit_transform(self.df['combined_features'])
        
        return tfidf_matrix, tfidf  # Return both TF-IDF matrix and vectorizer
    
    def content_based_recommendation(self, user_id, top_n=10):
        user_profile = self.profile[self.profile['User_Id'] == user_id]
        user_features = user_profile['Nutrient'].values[0] + ' ' + user_profile['Disease'].values[0] + ' ' + user_profile['Diet'].values[0]
        
        # Calculate TF-IDF for user features using the same Vectorizer
        user_tfidf_matrix = self.tfidf.transform([user_features])
        
        # Calculate cosine similarity between user and meal features
        cosine_sim = linear_kernel(user_tfidf_matrix, self.tfidf_matrix).flatten()
        
        # Get indices of top similar meals
        similar_meals_indices = cosine_sim.argsort()[:-top_n-1:-1]
        
        # Return top recommended meals
        recommended_meals = self.df.iloc[similar_meals_indices].reset_index(drop=True)
        del recommended_meals['combined_features']
        return recommended_meals#[['Meal_Id', 'Name', 'Nutrient', 'Veg_Non', 'description', 'Price', 'Review']]'''
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

class ContentBasedRecommender:
    def __init__(self, dataset, profiles):
        self.df = dataset
        self.profile = profiles
        self.tfidf_matrix, self.tfidf = self.generate_tfidf_matrix()
    
    def generate_tfidf_matrix(self):
        # Create a TF-IDF Vectorizer
        tfidf = TfidfVectorizer(stop_words='english')
        
        # Fill NaN values in 'Nutrient', 'Disease', 'Diet' columns in the dataset
        self.df['Nutrient'] = self.df['Nutrient'].fillna('')
        self.df['Disease'] = self.df['Disease'].fillna('')
        self.df['Diet'] = self.df['Diet'].fillna('')
        
        # Combine text features into a single column for the dataset
        self.df['combined_features'] = self.df['Nutrient'] + ' ' + self.df['Disease'] + ' ' + self.df['Diet']
        
        # Fit and transform the TF-IDF Vectorizer on the dataset
        tfidf_matrix = tfidf.fit_transform(self.df['combined_features'])
        
        return tfidf_matrix, tfidf  # Return both TF-IDF matrix and vectorizer
    
    '''def content_based_recommendation(self, user_id, top_n=10):
        user_profile = self.profile[self.profile['User_Id'] == user_id]
        user_features = user_profile['Nutrient'].values[0] + ' ' + user_profile['Disease'].values[0] + ' ' + user_profile['Diet'].values[0]
        
        # Calculate TF-IDF for user features using the same Vectorizer
        user_tfidf_matrix = self.tfidf.transform([user_features])
        
        # Calculate cosine similarity between user and meal features
        cosine_sim = linear_kernel(user_tfidf_matrix, self.tfidf_matrix).flatten()
        
        # Get indices of top similar meals
        similar_meals_indices = cosine_sim.argsort()[:-top_n-1:-1]
        
        # Retrieve recommended meals
        recommended_meals = self.df.iloc[similar_meals_indices].reset_index(drop=True)
        
        # Remove duplicates based on 'Meal_Id'
        recommended_meals = recommended_meals.drop_duplicates(subset=['Meal_Id'])
        del recommended_meals['combined_features']
        return recommended_meals#[['Meal_Id', 'Name', 'Nutrient', 'Veg_Non', 'description', 'Price', 'Review']]'''
    def content_based_recommendation(self, user_id, top_n=10):
        user_profile = self.profile[self.profile['User_Id'] == user_id]
        user_features = user_profile['Nutrient'].values[0] + ' ' + user_profile['Disease'].values[0] + ' ' + user_profile['Diet'].values[0]
        user_tfidf_matrix = self.tfidf.transform([user_features])
        cosine_sim = linear_kernel(user_tfidf_matrix, self.tfidf_matrix).flatten()
        
        # Initialize a set to track unique Meal_Id recommendations
        unique_meals = set()
        recommendations = []
        meal_indices = cosine_sim.argsort()[::-1]  # Sort indices in descending order of similarity
        
        for idx in meal_indices:
            meal_id = self.df.at[idx, 'Meal_Id']
            if meal_id not in unique_meals:
                recommendations.append(self.df.iloc[idx])
                unique_meals.add(meal_id)
                if len(recommendations) == top_n:
                    break
        
        recommendations = pd.DataFrame(recommendations)
        del recommendations['combined_features']
        return recommendations[['Meal_Id', 'Name', 'Nutrient', 'Veg_Non', 'description', 'Price']]

# Usage
print()
print("Content Based Recommendation:")
user_id = 'User_44'
ob = ContentBasedRecommender(dataset, profiles)
result = ob.content_based_recommendation(user_id)
print(result)


from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split

class SVDRecommender:
    def __init__(self, profiles, recent_activity, dataset):
        self.df = dataset
        self.profiles = profiles
        self.recent_activity = recent_activity
        self.data = self.prepare_data()
    
    def prepare_data(self):
        # Create a Surprise Dataset
        reader = Reader(rating_scale=(1, 5))  # Assuming a rating scale
        data = Dataset.load_from_df(self.recent_activity[['User_Id', 'Meal_Id', 'Rated']], reader)
        return data
    
    def svd_recommendation(self, user_id, top_n=10):
        trainset = self.data.build_full_trainset()  # Use full data for training

        # Define and train the model (SVD)
        model = SVD()
        model.fit(trainset)

        # Get all meal IDs
        all_meals = self.df['Meal_Id'].unique()

        # Get the meals that the user hasn't interacted with
        user_meals = self.recent_activity[self.recent_activity['User_Id'] == user_id]['Meal_Id']
        unseen_meals = np.setdiff1d(all_meals, user_meals)

        # Predict ratings for the unseen meals
        predictions = [model.predict(user_id, meal_id) for meal_id in unseen_meals]

        # Sort predictions by estimated ratings in descending order
        sorted_predictions = sorted(predictions, key=lambda x: x.est, reverse=True)

        # Get top N recommendations
        top_recommendations = sorted_predictions[:top_n]

        # Format and return top recommendations
        top_meal_ids = [pred.iid for pred in top_recommendations]
        recommended_meals = self.df[self.df['Meal_Id'].isin(top_meal_ids)]
        recommended_meals = recommended_meals.drop_duplicates(subset=['Meal_Id'])
        return recommended_meals[['Meal_Id', 'Name', 'Nutrient', 'Veg_Non', 'description', 'Price']]

# Usage
print()
print("SVD Recommender:")
user_id = 'User_44'  # User ID of current user

profiles = pd.read_csv('../Data processing/user_Profiles.csv')  # Profiles of all users
recent_activity = pd.read_csv('../Data processing/recent_activity.csv')  # Recent activities of current user
dataset = pd.read_csv('../Data processing/dataset.csv')  # Main dataset

svd_ob = SVDRecommender(profiles, recent_activity, dataset)
svd_result = svd_ob.svd_recommendation(user_id)
print(svd_result)


Nearest Neighbour Recommender:
      Meal_Id                                               Name  \
0    meal_id5                         gluten-free christmas cake   
1   meal_id47                                       peanut gajak   
2  meal_id199                              walnut brownie gujiya   
3  meal_id200                        raspberry and balsamic dome   
4  meal_id171  mavinakayi nellikai chitranna (raw mango &amp;...   
5  meal_id141                                      morning greed   
6   meal_id78                               strawberry margarita   

        Nutrient Veg_Non                                        description  \
0      vitamin_a     veg  christmas dry fruits (pre-soaked), orange zest...   
1  carbohydrates     veg                       jaggery (gur), peanuts, ghee   
2      magnesium     veg  walnut brownie, chocolate sauce, chocolate chu...   
3      vitamin_a     veg  dark chocolate, butter, sugar, water, egg, egg...   
4       chloride     veg  son