In [92]:
from surprise import Dataset
from surprise.model_selection import cross_validate
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

food_data = pd.read_csv('data/food.csv')
ratings_data = pd.read_csv('data/ratings.csv')

def text_cleaning(text):
    text = "".join([char for char in text if char not in string.punctuation])
    return text
food_data['Describe'] = food_data['Describe'].apply(text_cleaning)

def create_soup(x):
  return " ".join([x['Describe'], x['C_Type'], x['Veg_Non']])

food_data['soup'] = food_data.apply(create_soup, axis=1)

# reader = Reader(rating_scale=(1, 10))

df = pd.merge(ratings_data, food_data, on='Food_ID').drop_duplicates(subset='Name', keep='first').reset_index(drop=True)
df.shape

(309, 8)

## Recommender

In [93]:
df['soup'].head()

0    boneless skinless chicken thigh trimmed salt a...
1    buns all purpose white flour dry yeast sugar s...
2    whole moong beans cow ghee raisins whole milk ...
3    cashew paste ghee khaand a sweetening agent an...
4    pizza dough 2 boules red pepper red onion basi...
Name: soup, dtype: object

In [94]:
df.shape

(309, 8)

In [95]:
#Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer

#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

#Replace NaN with an empty string
# metadata['soup'] = metadata['overview'].fillna('')

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(df['soup'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape

(309, 1115)

In [96]:
tfidf.get_feature_names_out()

array(['10', '12', '12inchthin', ..., 'zested', 'zinfandel', 'zucchini'],
      dtype=object)

In [97]:
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [98]:
cosine_sim.shape

(309, 309)

In [99]:
#Construct a reverse map of indices and movie titles
indices = pd.Series(df.index, index=df['Name']).drop_duplicates()

In [100]:
indices[:10]

Name
peri peri chicken satay         0
steam bunny chicken bao         1
green lentil dessert fudge      2
cashew nut cookies              3
christmas tree pizza            4
moong dal kiwi coconut soup     5
chicken nimbu dhaniya shorba    6
carrot ginger soup              7
hot chocolate                   8
chicken and mushroom lasagna    9
dtype: int64

In [104]:
def get_recommendations(title, cosine_sim=cosine_sim, k=5):
    idx = indices[title]
    print(idx)

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:k+1]
    food_indices = [i[0] for i in sim_scores]
    return df['Name'].iloc[food_indices]

In [105]:
get_recommendations('chocolate lava cake')

279


256         chocolate doughnut
88      chocolate nero cookies
258    chocolate fudge cookies
59        chocolate kaju katli
71          chocolate marquise
Name: Name, dtype: object

## Evaluation

Need to find evaluation metrics.