# **Food Recommendation System**
Recommendation of the 5 most similar dishes based on Cosine Similarity with a dish taken as input.

In [53]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import re
import string

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [4]:
df = pd.read_csv('/content/gdrive/My Drive/FoodRecommendationSystem/1662574418893344.csv')
df.head()

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe
0,1,summer squash salad,Healthy Food,veg,"white balsamic vinegar, lemon juice, lemon rin..."
1,2,chicken minced salad,Healthy Food,non-veg,"olive oil, chicken mince, garlic (minced), oni..."
2,3,sweet chilli almonds,Snack,veg,"almonds whole, egg white, curry leaves, salt, ..."
3,4,tricolour salad,Healthy Food,veg,"vinegar, honey/sugar, soy sauce, salt, garlic ..."
4,5,christmas cake,Dessert,veg,"christmas dry fruits (pre-soaked), orange zest..."


In [5]:
# To find number of dishes in the dataset
num_of_dishes = len(list(df['Name'].unique()))
num_of_dishes

400

In [6]:
# Categories
cat = df['C_Type'].unique()
cat

array(['Healthy Food', 'Snack', 'Dessert', 'Japanese', 'Indian', 'French',
       'Mexican', 'Italian', 'Chinese', 'Beverage', 'Thai', 'Korean',
       ' Korean', 'Vietnames', 'Nepalese', 'Spanish'], dtype=object)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Food_ID   400 non-null    int64 
 1   Name      400 non-null    object
 2   C_Type    400 non-null    object
 3   Veg_Non   400 non-null    object
 4   Describe  400 non-null    object
dtypes: int64(1), object(4)
memory usage: 15.8+ KB


In [32]:
# Cleaning of description
def cleaning(text):
  text = text.lower()
  text  = "".join([char for char in text if char not in string.punctuation])
  return text

In [33]:
df['Describe'] = df['Describe'].apply(cleaning)

In [34]:
# Duplicate Data
df.duplicated().sum()

0

In [35]:
# Null Data
df.isnull().sum()

Food_ID     0
Name        0
C_Type      0
Veg_Non     0
Describe    0
dtype: int64

# **Content Based Filtering**
Recommendation based on the Description of the dishes

In [36]:
vect = TfidfVectorizer(stop_words='english')
X = vect.fit_transform(df['Describe'])

In [37]:
cosine_similarity = linear_kernel(X, X)
cosine_similarity

array([[1.        , 0.16228366, 0.13001124, ..., 0.1286286 , 0.04277223,
        0.09993639],
       [0.16228366, 1.        , 0.06799336, ..., 0.14878001, 0.05688681,
        0.16917639],
       [0.13001124, 0.06799336, 1.        , ..., 0.03291577, 0.11795401,
        0.01834168],
       ...,
       [0.1286286 , 0.14878001, 0.03291577, ..., 1.        , 0.        ,
        0.10087579],
       [0.04277223, 0.05688681, 0.11795401, ..., 0.        , 1.        ,
        0.        ],
       [0.09993639, 0.16917639, 0.01834168, ..., 0.10087579, 0.        ,
        1.        ]])

In [38]:
food_items = pd.Series(df.index, index=df['Name']).drop_duplicates()
food_items

Name
summer squash salad                                          0
chicken minced salad                                         1
sweet chilli almonds                                         2
tricolour salad                                              3
christmas cake                                               4
                                                          ... 
Kimchi Toast                                               395
Tacos de Gobernador (Shrimp, Poblano, and Cheese Tacos)    396
Melted Broccoli Pasta With Capers and Anchovies            397
Lemon-Ginger Cake with Pistachios                          398
Rosemary Roasted Vegetables                                399
Length: 400, dtype: int64

In [39]:
def food_recommendations(title, cosine_sim=cosine_similarity):
    food_index = food_items[title]
    similarity_scores = list(enumerate(cosine_sim[food_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Scores for the five most similar dishes
    # Index 0 is the dish itself
    most_recommended_scores = similarity_scores[1:6]

    most_recommended_dishes = [i[0] for i in most_recommended_scores]
    return df['Name'].iloc[most_recommended_dishes]

In [41]:
food_recommendations('tricolour salad')

103             chilli chicken
1         chicken minced salad
27     vegetable som tam salad
282          veg hakka noodles
166             veg fried rice
Name: Name, dtype: object

# **Content Based Filtering: Advanced**
Based on Category, Vegetarian/ Non-Vegetarian, Description

In [42]:
food_features = ['C_Type','Veg_Non', 'Describe']

In [43]:
def features_column(x):
    return x['C_Type'] + " " + x['Veg_Non'] + " " + x['Describe']

In [45]:
# Column with all the features to dataframe df
df['features'] = df.apply(features_column, axis=1)

In [46]:
df.head()

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe,features
0,1,summer squash salad,Healthy Food,veg,white balsamic vinegar lemon juice lemon rind ...,Healthy Food veg white balsamic vinegar lemon ...
1,2,chicken minced salad,Healthy Food,non-veg,olive oil chicken mince garlic minced onion sa...,Healthy Food non-veg olive oil chicken mince g...
2,3,sweet chilli almonds,Snack,veg,almonds whole egg white curry leaves salt suga...,Snack veg almonds whole egg white curry leaves...
3,4,tricolour salad,Healthy Food,veg,vinegar honeysugar soy sauce salt garlic clove...,Healthy Food veg vinegar honeysugar soy sauce ...
4,5,christmas cake,Dessert,veg,christmas dry fruits presoaked orange zest lem...,Dessert veg christmas dry fruits presoaked ora...


In [51]:
count = CountVectorizer(stop_words='english')
count_X = count.fit_transform(df['features'])

In [54]:
cosine_similarity2 = cosine_similarity(count_X, count_X)

In [56]:
df = df.reset_index()
food_items = pd.Series(df.index, index=df['Name'])

In [57]:
display(food_items)

Name
summer squash salad                                          0
chicken minced salad                                         1
sweet chilli almonds                                         2
tricolour salad                                              3
christmas cake                                               4
                                                          ... 
Kimchi Toast                                               395
Tacos de Gobernador (Shrimp, Poblano, and Cheese Tacos)    396
Melted Broccoli Pasta With Capers and Anchovies            397
Lemon-Ginger Cake with Pistachios                          398
Rosemary Roasted Vegetables                                399
Length: 400, dtype: int64

In [58]:
food_recommendations('tricolour salad', cosine_similarity2)

1                         chicken minced salad
103                             chilli chicken
27                     vegetable som tam salad
177                        oats shallots pulao
69     shepherds salad (tamatar-kheera salaad)
Name: Name, dtype: object