In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Embedding,Flatten,Concatenate,Dense,Dropout
from tensorflow.keras.optimizers import Adam

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
#Load clean data interactions dataset
interactions = pd.read_csv('drive/MyDrive/CapstoneML/clean_data.csv')

In [4]:
interactions.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,user_id,recipe_id,date,rating,review
0,emotional balance spice mixture,48156,10,6164,2002-12-09,"['15-minutes-or-less', 'time-to-make', 'course...","[182.7, 11.0, 4.0, 1.0, 13.0, 4.0, 10.0]",2,['mix the spices together and store in an airt...,really an effective spice blend...i got it in ...,"['ground black pepper', 'ground ginger', 'grou...",6,65056,48156,2004-10-12,5,I am just begining to learn a bit about Ayurve...
1,emotional balance spice mixture,48156,10,6164,2002-12-09,"['15-minutes-or-less', 'time-to-make', 'course...","[182.7, 11.0, 4.0, 1.0, 13.0, 4.0, 10.0]",2,['mix the spices together and store in an airt...,really an effective spice blend...i got it in ...,"['ground black pepper', 'ground ginger', 'grou...",6,191739,48156,2008-03-02,5,This is the fourth or fifth time that I have m...
2,emotional balance spice mixture,48156,10,6164,2002-12-09,"['15-minutes-or-less', 'time-to-make', 'course...","[182.7, 11.0, 4.0, 1.0, 13.0, 4.0, 10.0]",2,['mix the spices together and store in an airt...,really an effective spice blend...i got it in ...,"['ground black pepper', 'ground ginger', 'grou...",6,844554,48156,2009-03-26,5,This looked very interesting and a good way to...
3,jiffy roasted corn and jalapeno cornbread,108414,35,29506,2005-01-15,"['60-minutes-or-less', 'time-to-make', 'course...","[400.2, 32.0, 43.0, 31.0, 21.0, 54.0, 14.0]",10,"['melt butter in a saut pan', 'add the corn , ...","this is a moist, easy, colorful and delicious ...","['whole kernel corn', 'onion', 'red bell peppe...",10,1189565,108414,2009-03-22,5,This was excellent! I used a 12 oz. bag of fro...
4,jiffy roasted corn and jalapeno cornbread,108414,35,29506,2005-01-15,"['60-minutes-or-less', 'time-to-make', 'course...","[400.2, 32.0, 43.0, 31.0, 21.0, 54.0, 14.0]",10,"['melt butter in a saut pan', 'add the corn , ...","this is a moist, easy, colorful and delicious ...","['whole kernel corn', 'onion', 'red bell peppe...",10,588852,108414,2009-08-30,5,I have made this a few times and it disappears...


In [5]:
#Encode user _id and recipe_id using LabelEncoder
user_encoder   = LabelEncoder()
recipe_encoder = LabelEncoder()

interactions['user_id'] = user_encoder.fit_transform(interactions['user_id'])
interactions['recipe_id'] = recipe_encoder.fit_transform(interactions['recipe_id'])

In [6]:
#Train-test split
train,test = train_test_split(interactions, test_size =0.2, random_state=42)

In [7]:
#Parameters
n_users=len(interactions['user_id'].unique())
n_recipes=len(interactions['recipe_id'].unique())
embedding_size =64

In [8]:
#Define the model
user_input = Input(shape=(1,))
user_embedding = Embedding(n_users, embedding_size)(user_input)
user_flatten = Flatten()(user_embedding)

recipe_input = Input(shape=(1,))
recipe_embedding = Embedding(n_recipes, embedding_size)(recipe_input)
recipe_flatten = Flatten()(recipe_embedding)

concat = Concatenate()([user_flatten,recipe_flatten])
dense1=Dense(128,activation='relu')(concat)
dropout1=Dropout(0.5)(dense1)
dense2=Dense(64,activation='relu')(dropout1)
dropout2=Dropout(0.5)(dense1)
output =Dense(1)(dropout2)

model = Model(inputs=[user_input,recipe_input],outputs=output)
model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))

In [9]:
#Train the model
history = model.fit([train['user_id'],train['recipe_id']],train['rating'],batch_size=64,epochs =10, validation_split=0.1,verbose=1,)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
#interactions = pd.read_csv('drive/MyDrive/CapstoneML/RAW_interactions.csv')
raw_recipes = pd.read_csv('drive/MyDrive/CapstoneML/RAW_recipes.csv')
#Preprocess text features
raw_recipes['tags_str'] = raw_recipes['tags'].apply(lambda x: ' '.join(eval(x)))

In [11]:
#Create a TF IDF matrix for recipe tags
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
tags_matrix = vectorizer.fit_transform(raw_recipes['tags_str'])

In [12]:
#Reduce dimensionality using TruncatedSVD
#from sklearn.decomposition import TruncatedSVD
#from scipy.sparse import csr_matrix
#import numpy as np
#n_components = 10
#svd = TruncatedSVD(n_components)
#tags_matrix_reduced = svd.fit_transform(tags_matrix)

In [None]:
#Calculate the cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
tags_similarity = cosine_similarity(tags_matrix)

In [None]:
#interactions = pd.read_csv('drive/MyDrive/CapstoneML/RAW_interactions.csv')
raw_interactions = pd.read_csv('drive/MyDrive/CapstoneML/RAW_recipes.csv')
#Function to get recommendations for a specific user
def get_recommendations(user_id, n_recommendations=5):
  #Get all list of all recipe id
  all_recipe_ids = raw_recipes['id'].unique()

  #Get  a list of recipes already rated by the user
  rated_recipes = raw_interactions.loc['raw_interactions['user_id']==user_id,'recipe_id']

  #Remove rated recipes from the list of all happens
  estimated_ratings=[]
  for recipe_id in unrated_recipes:
      ratings  = model.predict([user_ids,recipe_ids])
      recipe_index = raw_recipes.loc[raw_recipes['id'] == recipe_id).index[0]
      tags_sim = tags_similarity[recipez_index].mean()
      hybrid_score = 0.3 *rating + 0.7*tags_sim
      estimated_ratings.append((recipe_id,hyrid_score))

  #Sort recipes by estimated ratings in descending order
  sorted_ratings = sorted(estimated_ratings,key =lambda x: x[1], reverse = True)

  #Get top n recommendations
  recommendations = sorted_ratings[:n_recommendations]

  #Get the recommend recipe names
  recommend_recipe_names = [raw_recipes.loc[raw_recipes['id'] == recipe_id, 'name'].iloc[0] for recipe_id, _ in recommendations]

return recommended recipe_names