In [None]:
# !pip install -q tensorflow-recommenders
# !pip install -q --upgrade tensorflow-datasets
# !pip install -q scann

In [1]:
import os
import pprint
import tempfile
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import pandas as pd
import sampling

In [2]:
import tensorflow_recommenders as tfrs

In [3]:
import pickle

# Constants 

In [4]:
RATINGS_SMALL = "../EDA_files/ratings_small.parquet"
RECIPES_SMALL = "../EDA_files/recipes_small.parquet"
INDEX_TO_RECIPE_OBJ = "../EDA_files/index_to_recipe.obj"
RECIPE_TO_INDEX_OBJ = "../EDA_files/recipe_to_index.obj"

ING_CLEAN_NO_COMMON = '../cleaned_files/ingredients_clean_without_common_words.obj'
KEYWORDS_CLEAN = '../cleaned_files/keywords_cleaned.obj'
CATEGORIES_CLEAN = '../cleaned_files/categories_cleaned.obj'
NAMES_CLEAN = '../cleaned_files/names_cleaned.obj'

RECIPES_DATA = "../dataset/recipes.parquet"

# Load data 

In [5]:
recipes_small = pd.read_parquet(RECIPES_SMALL)
ratings_small = pd.read_parquet(RATINGS_SMALL)

In [6]:
with open(RECIPE_TO_INDEX_OBJ, "rb") as input_file:
    recipe_to_index = pickle.load(input_file)

In [7]:
# with open(ING_CLEAN_NO_COMMON, "rb") as input_file:
#     ingredients_no_common_words = pickle.load(input_file)

# with open(KEYWORDS_CLEAN, "rb") as input_file:
#     keywords_clean = pickle.load(input_file)

# with open(CATEGORIES_CLEAN, "rb") as input_file:
#     categories_clean = pickle.load(input_file)
    
# with open(NAMES_CLEAN, "rb") as input_file:
#     names_clean = pickle.load(input_file)

## Ratings

In [8]:
author_min_20 = sampling.get_rating_with_min_number(ratings_small, 20, col_name='AuthorId')
recipe_min_20 = sampling.get_rating_with_min_number(ratings_small, 20, col_name='RecipeId')

ratings_min_20 = author_min_20.merge(recipe_min_20, how='inner')

In [9]:
ratings_sample = ratings_min_20.copy()

## Recipes

In [10]:
# ingredients_no_common_words.Ingredients = ingredients_no_common_words.Ingredients.map(lambda x: ' '.join(x))

In [11]:
# keywords_clean.Keywords = keywords_clean.Keywords.map(lambda x: ' '.join(x)) 

In [12]:
# recipes_clean = recipes_small.copy()
# recipes_clean.drop(columns=['Ingredients', 'Keywords', 'RecipeCategory', 'Nutritions'], axis=1, inplace=True)
# recipes_clean = recipes_clean.merge(ingredients_no_common_words, on='RecipeId')
# recipes_clean = recipes_clean.merge(keywords_clean, on='RecipeId')
# recipes_clean = recipes_clean.merge(categories_clean, on='RecipeId')

# sample = recipes[['RecipeId', 'Calories', 'FatContent', 'SaturatedFatContent',
#                                             'CholesterolContent', 'SodiumContent', 'CarbohydrateContent',
#                                             'FiberContent', 'SugarContent', 'ProteinContent']]

# recipes_clean = recipes_clean.merge(sample, on='RecipeId')

In [57]:
recipes_sample = recipes_small[recipes_small.RecipeId.isin(list(ratings_min_20.RecipeId))].copy()

# Prepare dataset

## Ratings

In [14]:
ratings_sample.AuthorId = ratings_sample.AuthorId.map(lambda x: bytes(str(x), 'utf-8'))
ratings_sample.RecipeId = ratings_sample.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

In [15]:
ratings_dict = ratings_sample.groupby(['AuthorId', 'RecipeId'])['Rating'].sum().reset_index()

In [16]:
ratings_dict = {name: np.array(value) for name, value in ratings_dict.items()}
ratings = tf.data.Dataset.from_tensor_slices(ratings_dict)


In [17]:
ratings = ratings.map(lambda x: {'AuthorId' : x['AuthorId'], 
                                 'RecipeId' : x['RecipeId'], 
                                 'Rating' : float(x['Rating']),})

In [18]:
for x in ratings.take(10).as_numpy_iterator():
    pprint.pprint(x)

{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'120914'}
{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'143736'}
{'AuthorId': b'100026', 'Rating': 4.0, 'RecipeId': b'161324'}
{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'161335'}
{'AuthorId': b'100026', 'Rating': 4.0, 'RecipeId': b'161381'}
{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'172588'}
{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'195437'}
{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'33201'}
{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'39165'}
{'AuthorId': b'100026', 'Rating': 5.0, 'RecipeId': b'8739'}


## Recipes

In [None]:
# recipes_clean.info()

In [19]:
features = ["RecipeId", "Name", "Keywords", "Ingredients", "FatContent", "SaturatedFatContent", "CholesterolContent", "SodiumContent",
           "CarbohydrateContent", "FiberContent","SugarContent", "ProteinContent"]

In [58]:
recipes_sample.RecipeId = recipes_sample.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))
recipes_dict = recipes_sample[['RecipeId']]
recipes_dict = {name: np.array(value) for name, value in recipes_dict.items()}
recipes = tf.data.Dataset.from_tensor_slices(recipes_dict)

In [63]:
recipes = recipes.map(lambda x: x['RecipeId'])

## Basic version - just ids

In [66]:
size = ratings_min_20.shape[0]
train_size = int(0.8 * size)
test_size = size - train_size

tf.random.set_seed(42)
shuffled = ratings.shuffle(size, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(train_size)
test = shuffled.take(train_size).take(test_size)

In [67]:
recipe_ids = recipes.batch(1_000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["AuthorId"])

unique_recipe_ids = np.unique(np.concatenate(list(recipe_ids)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [None]:
# unique_user_ids = [bytes(str(x), 'utf-8') for x in unique_user_ids]

In [None]:
# unique_recipe_ids = [bytes(str(x), 'utf-8') for x in unique_recipe_ids]

# Implementing model

## Query tower

In [71]:
embedding_dimension = 32

In [72]:
user_model = tf.keras.Sequential([
    tf.keras.layers.StringLookup(vocabulary=unique_user_ids, mask_token=None),
    tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

## Candidate tower

In [73]:
recipe_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_recipe_ids, mask_token=None),
  tf.keras.layers.Embedding(len(unique_recipe_ids) + 1, embedding_dimension)
])

## Model

In [74]:
class RecipeModel(tfrs.Model):
    def __init__(self, user_model, recipe_model):
        super().__init__()
        self.recipe_model: tf.keras.Model = recipe_model
        self.user_model: tf.keras.Model = user_model
            
        metrics = tfrs.metrics.FactorizedTopK(candidates=recipes.batch(128).map(recipe_model))
        task = tfrs.tasks.Retrieval(metrics=metrics)
        self.task: tf.keras.layers.Layer = task
            
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        print(features)
        user_embeddings = self.user_model(features["AuthorId"])
        positive_recipe_embeddings = self.recipe_model(features["RecipeId"])
        
        return self.task(user_embeddings, positive_recipe_embeddings)

In [75]:
model = RecipeModel(user_model, recipe_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [76]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [77]:
model.fit(cached_train, epochs=3)

Epoch 1/3
{'AuthorId': <tf.Tensor 'IteratorGetNext:0' shape=(None,) dtype=string>, 'RecipeId': <tf.Tensor 'IteratorGetNext:2' shape=(None,) dtype=string>, 'Rating': <tf.Tensor 'IteratorGetNext:1' shape=(None,) dtype=float32>}
{'AuthorId': <tf.Tensor 'IteratorGetNext:0' shape=(None,) dtype=string>, 'RecipeId': <tf.Tensor 'IteratorGetNext:2' shape=(None,) dtype=string>, 'Rating': <tf.Tensor 'IteratorGetNext:1' shape=(None,) dtype=float32>}
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x2a8269e4d60>

In [78]:
model.evaluate(cached_test, return_dict=True)

{'AuthorId': <tf.Tensor 'IteratorGetNext:0' shape=(None,) dtype=string>, 'RecipeId': <tf.Tensor 'IteratorGetNext:2' shape=(None,) dtype=string>, 'Rating': <tf.Tensor 'IteratorGetNext:1' shape=(None,) dtype=float32>}


{'factorized_top_k/top_1_categorical_accuracy': 0.008418675512075424,
 'factorized_top_k/top_5_categorical_accuracy': 0.03870655596256256,
 'factorized_top_k/top_10_categorical_accuracy': 0.06296266615390778,
 'factorized_top_k/top_50_categorical_accuracy': 0.1725667268037796,
 'factorized_top_k/top_100_categorical_accuracy': 0.24878638982772827,
 'loss': 3112.62451171875,
 'regularization_loss': 0,
 'total_loss': 3112.62451171875}

In [80]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(tf.data.Dataset.zip((recipes.batch(100), recipes.batch(100).map(model.recipe_model))))

x, titles = index(tf.constant([b'2695']))

In [81]:
print(f"Recommendatons for user 42: {titles[0, :3]}")

Recommendatons for user 42: [b'38846' b'11969' b'86623']


In [84]:
ratings_sample[ratings_sample.AuthorId == b'2695']

Unnamed: 0,RecipeId,AuthorId,Rating,Review,DateSubmitted
0,b'4807',b'2695',2,"I'm sorry, but I tried this method for my Chri...",2000-12-27 13:47:50+00:00
2,b'12134',b'2695',5,This dish was excellent. The sauce turned out...,2001-01-19 16:40:14+00:00
48018,b'86623',b'2695',2,A bit too dry for our tastes.,2007-01-31 11:30:50+00:00
49166,b'53767',b'2695',5,This soup was very easy to make and tasted fan...,2005-08-13 09:49:33+00:00
58607,b'41685',b'2695',4,"This was very good, and easy. I added some ch...",2005-12-24 07:19:11+00:00
58899,b'83287',b'2695',1,Tried this with our leftover ham bone from the...,2005-12-27 16:01:03+00:00
67440,b'109295',b'2695',5,"This sounds odd, but I'm telling you it is fan...",2006-04-16 17:35:16+00:00
79717,b'37638',b'2695',4,loved it . . . i did add several fresh chopped...,2006-09-20 09:23:01+00:00
92275,b'30018',b'2695',5,Wonderful. I did add 8oz of tomato juice so i...,2007-01-18 08:38:56+00:00
164510,b'70224',b'2695',3,"Not bad. A little too thin for our taste, so ...",2008-07-11 14:50:56+00:00


In [89]:
recipes_sample[recipes_sample.RecipeId == b'86623']

Unnamed: 0,RecipeId,Name,AuthorId,CookTimeInMinutes,PrepTimeInMinutes,TotalTimeInMinutes,DatePublished,Description,RecipeCategory,Keywords,Ingredients,RecipeServings,RecipeInstructions,Nutritions
81549,b'86623',Lasagna Supremo (The Best Lasagna Ever!),112818,60.0,45.0,105.0,2004-03-15 20:00:00+00:00,This is a recipe that I have been making for m...,Pork,"[Cheese, Vegetable, Meat, European, Oven, < 4 ...","[ground beef, ground pork, Italian sausage, bu...",8.0,[FOR THE LASAGNA: Prepare lasagna according to...,"[67.0, 29.8, 230.3, 2927.0, 78.0, 8.0, 14.1, 6..."
