In [1]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
import tensorflow_recommenders as tfrs




Let's pretend we have 3 users:
1. Clarissa is a vegetarian who is lactose intolerant. ü•ùüçÖ
2. Martin is an athlete who prefers meat dishes to vegetable dishes. üçñü•©
3. Stella - has no special preferences, eats mostly everything. üç≤üç†

In [2]:
clarissa = {'id':'clr', 'prefers': ['vegetables', 'vegan', 'nomeat', 'vegetarian'], 'hates': ['meat', 'lactose']}
martin = {'id':'mrt', 'prefers': 'meat', 'hates': 'vegetables'}
stella = {'id':'stl', 'prefers': '', 'hates': ''}

We will additionally enter the factors Calories, Protein, Fat, Carbohydrates for our requirements. **(The numbers were taken at random and do not reflect proportions or recommendations - the numbers are just an example)**
* Clarissa - 2000/100/60/120
* Martin, 2600/150/100/328.
* Stella - 2200/80/50/100

In [3]:
def set_requirements(user, calories, proteins, fats, carbs):
  user['calories'] = calories
  user['proteins'] = proteins
  user['fats'] = fats
  user['carbs'] = carbs
set_requirements(clarissa, 2000, 100, 60, 120)
set_requirements(martin, 2600, 150, 100, 328)
set_requirements(martin, 2200, 80, 50, 100)


# Data processing üìä

Loading recipes and user_interactions. Nutritions showed like:calories (#), total fat (PDV), sugar (PDV) , sodium (PDV) , protein (PDV) , saturated fat (PDV) , and carbohydrates (PDV)

In [4]:
raw_recipes = pd.read_csv('./sample_data/RAW_recipes.csv')
raw_interactions = pd.read_csv('./sample_data/RAW_interactions.csv')
raw_recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


Merge data interactions with recipe infos

In [5]:
interactions_with_recipe_info = pd.merge(raw_interactions, raw_recipes[['name', 'tags', 'ingredients', 'id']], left_on='recipe_id', right_on='id', how='left')
interactions_with_recipe_info = interactions_with_recipe_info[['user_id', 'recipe_id', 'rating', 'name', 'tags', 'ingredients']]
interactions_with_recipe_info.rename(columns={'name': 'recipe_name'}, inplace=True)
interactions_with_recipe_info.head()

Unnamed: 0,user_id,recipe_id,rating,recipe_name,tags,ingredients
0,38094,40893,4,white bean green chile pepper soup,"['weeknight', 'time-to-make', 'course', 'main-...","['great northern beans', 'yellow onion', 'dice..."
1,1293707,40893,5,white bean green chile pepper soup,"['weeknight', 'time-to-make', 'course', 'main-...","['great northern beans', 'yellow onion', 'dice..."
2,8937,44394,4,devilicious cookie cake delights,"['30-minutes-or-less', 'time-to-make', 'course...","[""devil's food cake mix"", 'vegetable oil', 'eg..."
3,126440,85009,5,baked potato toppings,"['15-minutes-or-less', 'time-to-make', 'course...","['mayonnaise', 'salsa', 'cheddar cheese', 'ref..."
4,57222,85009,5,baked potato toppings,"['15-minutes-or-less', 'time-to-make', 'course...","['mayonnaise', 'salsa', 'cheddar cheese', 'ref..."


In [6]:
import ast

def convert_to_list(data_str):
  try:
    return ast.literal_eval(data_str)
  except (SyntaxError, ValueError):
    return []

In [7]:
interactions_with_recipe_info['ingredients_str'] = interactions_with_recipe_info['ingredients'].apply(convert_to_list)
interactions_with_recipe_info['tags_str'] = interactions_with_recipe_info['tags'].apply(convert_to_list)
raw_recipes['ingredients_str'] = raw_recipes['ingredients'].apply(convert_to_list)
raw_recipes['tags_str'] = raw_recipes['tags'].apply(convert_to_list)

Formatting data for Datasets

In [8]:
interactions_with_recipe_info['user_id'] = interactions_with_recipe_info.user_id.astype("str")
interactions_with_recipe_info['recipe_id'] = interactions_with_recipe_info.recipe_id.astype("str")
interactions_with_recipe_info['rating'] = interactions_with_recipe_info.rating.astype(np.float32)
interactions_with_recipe_info['recipe_name'] = interactions_with_recipe_info.recipe_name.astype("str")
interactions_with_recipe_info['tags'] = interactions_with_recipe_info.tags.astype("str")
interactions_with_recipe_info['ingredients'] = interactions_with_recipe_info.ingredients.astype("str")


In [9]:
len(interactions_with_recipe_info['user_id'].unique())

226570

Creating train preferences for users

In [10]:
user_preferences = interactions_with_recipe_info[['user_id', 'rating', 'tags_str']]
user_preferences.head()

Unnamed: 0,user_id,rating,tags_str
0,38094,4.0,"[weeknight, time-to-make, course, main-ingredi..."
1,1293707,5.0,"[weeknight, time-to-make, course, main-ingredi..."
2,8937,4.0,"[30-minutes-or-less, time-to-make, course, mai..."
3,126440,5.0,"[15-minutes-or-less, time-to-make, course, mai..."
4,57222,5.0,"[15-minutes-or-less, time-to-make, course, mai..."


In [12]:
user_liked_tags = user_preferences[user_preferences['rating'] >= 3].groupby('user_id')['tags_str'].apply(list).reset_index()
user_unliked_tags = user_preferences[user_preferences['rating'] < 3].groupby('user_id')['tags_str'].apply(list).reset_index()
users_ds = pd.DataFrame(interactions_with_recipe_info['user_id'].unique(), columns=['user_id'])


In [13]:
users_ds = users_ds.merge(user_liked_tags, on='user_id', how='left')
users_ds = users_ds.rename(columns={'tags_str': 'liked_tags'})
users_ds = users_ds.merge(user_unliked_tags, on='user_id', how='left')
users_ds = users_ds.rename(columns={'tags_str': 'unliked_tags'})

In [14]:
users_ds.head()

Unnamed: 0,user_id,liked_tags,unliked_tags
0,38094,"[[weeknight, time-to-make, course, main-ingred...",
1,1293707,"[[weeknight, time-to-make, course, main-ingred...","[[60-minutes-or-less, time-to-make, course, ma..."
2,8937,"[[30-minutes-or-less, time-to-make, course, ma...",
3,126440,"[[15-minutes-or-less, time-to-make, course, ma...","[[60-minutes-or-less, time-to-make, course, pr..."
4,57222,"[[15-minutes-or-less, time-to-make, course, ma...","[[60-minutes-or-less, time-to-make, course, ma..."


Looking that some of users don't have unliked_tags, so replace all Nan with empty arrays

In [15]:
def unique_tags_in_list(tags_list):
  if isinstance(tags_list, list):
    unique_tags = []
    for sublist in tags_list:
      if isinstance(sublist, list):
        for tag in sublist:
          if tag not in unique_tags:
            unique_tags.append(tag)
    return unique_tags
  else:
    return []

users_ds['liked_tags'] = users_ds['liked_tags'].apply(unique_tags_in_list)
users_ds['unliked_tags'] = users_ds['unliked_tags'].apply(unique_tags_in_list)

In [16]:
def remove_duplicate_tags(row):
  liked_tags = set(row['liked_tags']) if isinstance(row['liked_tags'], list) else set()
  unliked_tags = set(row['unliked_tags']) if isinstance(row['unliked_tags'], list) else set()
  duplicate_tags = liked_tags.intersection(unliked_tags)
  row['liked_tags'] = [tag for tag in row['liked_tags'] if tag not in duplicate_tags] if isinstance(row['liked_tags'], list) else []
  row['unliked_tags'] = [tag for tag in row['unliked_tags'] if tag not in duplicate_tags] if isinstance(row['unliked_tags'], list) else []
  return row

users_ds = users_ds.apply(remove_duplicate_tags, axis=1)


In [17]:
users_ds.head()

Unnamed: 0,user_id,liked_tags,unliked_tags
0,38094,"[weeknight, time-to-make, course, main-ingredi...",[]
1,1293707,"[weeknight, soups-stews, beans, crock-pot-slow...","[cupcakes, finger-food, cakes, english, cake-f..."
2,8937,"[30-minutes-or-less, time-to-make, course, mai...",[]
3,126440,"[15-minutes-or-less, 3-steps-or-less, jewish-s...",[]
4,57222,"[condiments-etc, salads, beans, grains, south-...","[kwanzaa, dairy-free]"


In [18]:
interactions_with_recipe_info = pd.merge(interactions_with_recipe_info, users_ds, on='user_id', how='left')

In [19]:
interactions_with_recipe_info.head()

Unnamed: 0,user_id,recipe_id,rating,recipe_name,tags,ingredients,ingredients_str,tags_str,liked_tags,unliked_tags
0,38094,40893,4.0,white bean green chile pepper soup,"['weeknight', 'time-to-make', 'course', 'main-...","['great northern beans', 'yellow onion', 'dice...","[great northern beans, yellow onion, diced gre...","[weeknight, time-to-make, course, main-ingredi...","[weeknight, time-to-make, course, main-ingredi...",[]
1,1293707,40893,5.0,white bean green chile pepper soup,"['weeknight', 'time-to-make', 'course', 'main-...","['great northern beans', 'yellow onion', 'dice...","[great northern beans, yellow onion, diced gre...","[weeknight, time-to-make, course, main-ingredi...","[weeknight, soups-stews, beans, crock-pot-slow...","[cupcakes, finger-food, cakes, english, cake-f..."
2,8937,44394,4.0,devilicious cookie cake delights,"['30-minutes-or-less', 'time-to-make', 'course...","[""devil's food cake mix"", 'vegetable oil', 'eg...","[devil's food cake mix, vegetable oil, eggs, r...","[30-minutes-or-less, time-to-make, course, mai...","[30-minutes-or-less, time-to-make, course, mai...",[]
3,126440,85009,5.0,baked potato toppings,"['15-minutes-or-less', 'time-to-make', 'course...","['mayonnaise', 'salsa', 'cheddar cheese', 'ref...","[mayonnaise, salsa, cheddar cheese, refried be...","[15-minutes-or-less, time-to-make, course, mai...","[15-minutes-or-less, 3-steps-or-less, jewish-s...",[]
4,57222,85009,5.0,baked potato toppings,"['15-minutes-or-less', 'time-to-make', 'course...","['mayonnaise', 'salsa', 'cheddar cheese', 'ref...","[mayonnaise, salsa, cheddar cheese, refried be...","[15-minutes-or-less, time-to-make, course, mai...","[condiments-etc, salads, beans, grains, south-...","[kwanzaa, dairy-free]"


Creating datasets

In [20]:
interactions_with_recipe_info['liked_tags_str'] = interactions_with_recipe_info['liked_tags'].apply(lambda x: ','.join(x))
interactions_with_recipe_info['unliked_tags_str'] = interactions_with_recipe_info['unliked_tags'].apply(lambda x: ','.join(x))

In [21]:
def split_tags(tags_string):
    return tf.strings.split(tags_string, ',')

In [22]:
interactions_with_recipe_info['liked_tags_str']

0          weeknight,time-to-make,course,main-ingredient,...
1          weeknight,soups-stews,beans,crock-pot-slow-coo...
2          30-minutes-or-less,time-to-make,course,main-in...
3          15-minutes-or-less,3-steps-or-less,jewish-seph...
4          condiments-etc,salads,beans,grains,south-ameri...
                                 ...                        
1132362                                                     
1132363    time-to-make,course,main-ingredient,cuisine,pr...
1132364    time-to-make,course,main-ingredient,preparatio...
1132365    frozen-desserts,freezer,kid-friendly,romantic,...
1132366    weeknight,time-to-make,course,preparation,main...
Name: liked_tags_str, Length: 1132367, dtype: object

In [23]:
split_tags('')

<tf.Tensor: shape=(1,), dtype=string, numpy=array([b''], dtype=object)>

In [24]:
ratings = tf.data.Dataset.from_tensor_slices((tf.cast(interactions_with_recipe_info['user_id'].values.reshape(-1,1), tf.string),
                                              tf.cast(interactions_with_recipe_info['recipe_id'].values.reshape(-1,1), tf.string),
                                              tf.cast(interactions_with_recipe_info['rating'].values.reshape(-1,1), tf.float32),
                                              tf.cast(interactions_with_recipe_info['recipe_name'].values.reshape(-1,1), tf.string),
                                              tf.cast(interactions_with_recipe_info['tags'].values.reshape(-1,1), tf.string),
                                              tf.cast(interactions_with_recipe_info['liked_tags_str'].values.reshape(-1,1), tf.string),
                                              tf.cast(interactions_with_recipe_info['unliked_tags_str'].values.reshape(-1,1), tf.string)
                                              )).map(lambda x,x1,x2,x3,x4,x5,x6: {
                                                  "user_id": x,
                                                  "recipe_id": x1,
                                                  "rating": x2,
                                                  "recipe_name": x3,
                                                  "tags": x4,
                                                  "liked_tags": split_tags(x5),
                                                  "unliked_tags": split_tags(x6)
                                              })

In [25]:
raw_recipes['ingredients_str'] = raw_recipes['ingredients_str'].apply(lambda x: ','.join(x))
raw_recipes['tags_str'] = raw_recipes['tags_str'].apply(lambda x: ','.join(x))

In [26]:
raw_recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,ingredients_str,tags_str
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,"winter squash,mexican seasoning,mixed spice,ho...","60-minutes-or-less,time-to-make,course,main-in..."
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,"prepared pizza crust,sausage patty,eggs,milk,s...","30-minutes-or-less,time-to-make,course,main-in..."
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,"ground beef,yellow onions,diced tomatoes,tomat...","time-to-make,course,preparation,main-dish,chil..."
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,"spreadable cheese with garlic and herbs,new po...","60-minutes-or-less,time-to-make,course,main-in..."
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,"tomato juice,apple cider vinegar,sugar,salt,pe...","weeknight,time-to-make,course,main-ingredient,..."


In [27]:
raw_recipes['id'] = raw_recipes.id.astype("str")
raw_recipes['name'] = raw_recipes.name.astype("str")
raw_recipes['tags'] = raw_recipes.tags.astype("str")
raw_recipes['ingredients'] = raw_recipes.ingredients.astype("str")

In [28]:
# prompt: –°–æ–∑–¥–∞–π –Ω–∞ –æ—Å–Ω–æ–≤–µ raw_recipes –¥–∞—Ç–∞—Å–µ—Ç, –Ω–æ —É—á—Ç–∏ —á—Ç–æ ingredients_str —ç—Ç–æ –º–∞—Å—Å–∏–≤

recipes = tf.data.Dataset.from_tensor_slices((tf.cast(raw_recipes['id'].values.reshape(-1, 1), tf.string),
                                              tf.cast(raw_recipes['name'].values.reshape(-1, 1), tf.string),
                                              tf.cast(raw_recipes['tags_str'].values.reshape(-1, 1), tf.string),
                                              tf.cast(raw_recipes['ingredients_str'].values.reshape(-1, 1), tf.string),)).map(lambda x, x1, x2, x3: {
                                                  "recipe_id": x,
                                                  "recipe_name": x1,
                                                  "tags": split_tags(x2),
                                                  "ingredients": split_tags(x3)
                                              })

for data in recipes.take(1).as_numpy_iterator():
  print(data)


{'recipe_id': array([b'137739'], dtype=object), 'recipe_name': array([b'arriba   baked winter squash mexican style'], dtype=object), 'tags': array([[b'60-minutes-or-less', b'time-to-make', b'course',
        b'main-ingredient', b'cuisine', b'preparation', b'occasion',
        b'north-american', b'side-dishes', b'vegetables', b'mexican',
        b'easy', b'fall', b'holiday-event', b'vegetarian', b'winter',
        b'dietary', b'christmas', b'seasonal', b'squash']], dtype=object), 'ingredients': array([[b'winter squash', b'mexican seasoning', b'mixed spice', b'honey',
        b'butter', b'olive oil', b'salt']], dtype=object)}


In [29]:
recipe_names = tf.data.Dataset.from_tensor_slices((tf.cast(raw_recipes['name'].values.reshape(-1, 1), tf.string))).map(lambda x: x)

# Towers üóº

For our towers set dimensionality of the query and candidate representations: **32**. Higher values will correspond to models that may be more accurate, but will also be slower to fit and more prone to overfitting.

In [30]:
embedding_dimension = 32

## User tower üë∑

Lets start creating our towers with User towers. We will compute by:
1. User id
2. Prefers and hates

### User ID model

In [31]:
unique_user_ids = users_ds["user_id"].unique()
unique_user_ids[:10]

array(['38094', '1293707', '8937', '126440', '57222', '52282', '124416',
       '2000192946', '76535', '273745'], dtype=object)

In [32]:
# user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])
# unique_user_ids = np.unique(np.concatenate(list(user_ids)))
# unique_user_ids[:10]

In [33]:
user_id_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_user_ids, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_user_ids)+1, embedding_dimension)
])




### User Prefers models

In [34]:
# prompt: –º–æ–¥–µ–ª—å –¥–ª—è liked_tags –∫–æ—Ç–æ—Ä—ã–º —Å—Ç–æ–∏—Ç –æ—Ç–¥–∞—Ç—å –ø—Ä–µ–¥–ø–æ—á—Ç–µ–Ω–∏–µ, –Ω–æ —É—á—Ç–∏ —á—Ç–æ liked_tags –≤ –¥–∞—Ç–∞—Å–µ—Ç–µ —ç—Ç–æ –º–∞—Å—Å–∏–≤

unique_liked_tags = set()
for tags in users_ds['liked_tags']:
  if isinstance(tags, list):
    for tag in tags:
      unique_liked_tags.add(tag)
unique_liked_tags = list(unique_liked_tags)

liked_tags_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_liked_tags, mask_token=None),
  tf.keras.layers.Embedding(len(unique_liked_tags) + 1, embedding_dimension)
])


In [35]:
unique_liked_tags[:10]

['',
 'course',
 'microwave',
 'lactose',
 'hidden-valley-ranch',
 'portuguese',
 'ice-cream',
 'eggs-breakfast',
 'southern-united-states',
 'easter']

In [36]:

unique_unliked_tags = set()
for tags in users_ds['unliked_tags']:
  if isinstance(tags, list):
    for tag in tags:
      unique_unliked_tags.add(tag)
unique_unliked_tags = list(unique_unliked_tags)

unliked_tags_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_unliked_tags, mask_token=None),
  tf.keras.layers.Embedding(len(unique_unliked_tags) + 1, embedding_dimension)
])
unique_unliked_tags[:10]

['',
 'course',
 'microwave',
 'lactose',
 'hidden-valley-ranch',
 'ice-cream',
 'portuguese',
 'southern-united-states',
 'easter',
 'salmon']

## Recipe tower üå≠

In [37]:
unique_recipe_names = np.unique(list(raw_recipes["name"].unique()))
recipe_name_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_recipe_names, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_recipe_names) + 1, embedding_dimension)
])
unique_recipe_names[:10]

array(['0 carb   0 cal gummy worms', '0 fat chunky watermelon salsa',
       '0 point ice cream  only 1 ingredient', '0 point soup   ww',
       '0 point soup  crock pot', '007  martini', '007 cocktail',
       '1  2  3  swiss meringue buttercream', '1 00 tangy chicken recipe',
       '1 000 artichoke hearts'], dtype='<U85')

In [38]:
# prompt: –Ω–∞–ø–∏—à–∏ –≤—ã–∑–æ–≤ recipe_name_model

recipe_name_model(['some recipe name'])





<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[ 0.0456244 ,  0.03931374, -0.03894681, -0.00580595, -0.04298264,
         0.02317716,  0.04442343,  0.04382325, -0.00535711,  0.02303362,
         0.00465139,  0.04590912,  0.04135333, -0.00849777,  0.00897612,
        -0.00878551,  0.03185965, -0.01099288,  0.01770008,  0.04620594,
        -0.02944211,  0.04778177, -0.0057532 , -0.04643984, -0.04169332,
        -0.02219941, -0.01274353,  0.02301443,  0.03161195,  0.00935373,
        -0.04035289, -0.00177597]], dtype=float32)>

### Recipe ingredient&tag model

In [39]:
unique_tags = set()
for tags in raw_recipes['tags'].apply(convert_to_list):
  if isinstance(tags, list):
    for tag in tags:
      unique_tags.add(tag)
unique_tags = np.unique(list(unique_tags))

recipe_tags_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_tags, mask_token=None),
  tf.keras.layers.Embedding(len(unique_tags) + 1, embedding_dimension)
])
unique_tags[:10]

array(['', '1-day-or-more', '15-minutes-or-less', '3-steps-or-less',
       '30-minutes-or-less', '4-hours-or-less', '5-ingredients-or-less',
       '60-minutes-or-less',
       'Throw the ultimate fiesta with this sopaipillas recipe from Food.com.',
       'a1-sauce'], dtype='<U69')

## Combine models

In [40]:
class RecipeAndUserModel(tfrs.Model):
  def __init__(self):
    super().__init__()
    unique_recipe_names = np.unique(list(raw_recipes["name"].unique()))

    # Recipe embeddings
    self.recipe_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_recipe_names, mask_token=None),
      tf.keras.layers.Embedding(len(unique_recipe_names) + 1, embedding_dimension)
    ])
    
    # Tags embeddings
    self.tags_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_tags, mask_token=None),
      tf.keras.layers.Embedding(len(unique_tags) + 1, embedding_dimension)
    ])

    # User embeddings
    self.user_id_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Liked tags embeddings
    self.liked_tags_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_liked_tags, mask_token=None),
      tf.keras.layers.Embedding(len(unique_liked_tags) + 1, embedding_dimension)
    ])

    # Unliked tags embeddings
    self.unliked_tags_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_unliked_tags, mask_token=None),
      tf.keras.layers.Embedding(len(unique_unliked_tags) + 1, embedding_dimension)
    ])

    self.task = tfrs.tasks.Retrieval(
      metrics=tfrs.metrics.FactorizedTopK(
          candidates=recipe_names.batch(128).map(lambda x: tf.squeeze(self.recipe_model(x), axis=1))
      )
    )
    # Take a single batch from recipe_names
    candidate_names = next(iter(recipe_names.batch(128)))
    # Pass the batch of names to self.recipe_model
    candidate_embeddings = self.recipe_model(candidate_names)
    print("Candidate Embeddings Shape:", candidate_embeddings.shape) 

  def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
    # Recipe embeddings
    recipe_embeddings = self.recipe_model(features["recipe_name"])
    tags_embeddings = self.tags_model(features["tags"])

    # User embeddings
    user_id_embedding = self.user_id_model(features["user_id"])
    liked_tags_embedding = self.liked_tags_model(features['liked_tags'])
    unliked_tags_embedding = self.unliked_tags_model(features['unliked_tags'])

    # Apply penalties to unliked tags embeddings
    penalty_weight = -0.5
    unliked_tags_embedding = unliked_tags_embedding * penalty_weight

    # Reduce mean for RaggedTensors to handle variable lengths
    liked_tags_embedding = tf.reduce_mean(liked_tags_embedding, axis=1)
    unliked_tags_embedding = tf.reduce_mean(unliked_tags_embedding, axis=1)

    # Combine user embeddings (ensure shape (batch_size, embedding_dim))
    user_embedding = tf.concat([user_id_embedding, liked_tags_embedding, unliked_tags_embedding], axis=1)

    # Combine recipe embeddings (ensure shape (batch_size, embedding_dim))
    recipe_embeddings = tf.concat([recipe_embeddings, tags_embeddings], axis=1)

    # Ensure both user_embedding and recipe_embeddings are reduced to (batch_size, embedding_dim)
    user_embedding = tf.reduce_mean(user_embedding, axis=1)  # Reduce axis to 2D
    recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1)

    # Check shapes before returning
    print("User Embeddings Shape: 21", user_embedding.shape)
    print("Recipe Embeddings Shape: 22", recipe_embeddings.shape)

    return user_embedding, recipe_embeddings


  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings, recipe_embeddings = self(features)

    print("User Embeddings Shape:", user_embeddings.shape)
    print("Recipe Embeddings Shape:", recipe_embeddings.shape)
    # # Apply pooling to ensure uniform shapes
    # user_embeddings = tf.reduce_mean(user_embeddings, axis=1)  # Ensure it's (batch_size, embedding_dim)
    # recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1)

    # user_embeddings = tf.reduce_mean(user_embeddings, axis=1)  # Ensure it's (batch_size, embedding_dim)
    # recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1) # Ensure it's (batch_size, embedding_dim)

    # Check shapes again
    print("User Embeddings Shape:", user_embeddings.shape)
    print("Recipe Embeddings Shape:", recipe_embeddings.shape)

    return self.task(user_embeddings, recipe_embeddings)


In [41]:
class UserModel(tf.keras.Model):
    def __init__(self, unique_user_ids, unique_liked_tags, unique_unliked_tags, embedding_dimension):
        super().__init__()
        
        # User ID embeddings
        self.user_id_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])
        
        # Liked tags embeddings
        self.liked_tags_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_liked_tags, mask_token=None),
            tf.keras.layers.Embedding(len(unique_liked_tags) + 1, embedding_dimension)
        ])

        # Unliked tags embeddings
        self.unliked_tags_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_unliked_tags, mask_token=None),
            tf.keras.layers.Embedding(len(unique_unliked_tags) + 1, embedding_dimension)
        ])

        # Penalty weight for unliked tags
        self.penalty_weight = -0.5

    def call(self, features):
        # Inputs should contain "user_id", "liked_tags", and "unliked_tags"
        user_id = features["user_id"]
        liked_tags = features["liked_tags"]
        unliked_tags = features["unliked_tags"]
        
        # Embed user ID
        user_id_embedding = self.user_id_model(user_id)
        if(user_id_embedding.shape.as_list() == list([1,1,32])):
            print("Fixed users")
            user_id_embedding = tf.reduce_mean(user_id_embedding, axis=1)
        # Embed liked and unliked tags
        liked_tags_embedding = self.liked_tags_model(liked_tags)
        unliked_tags_embedding = self.unliked_tags_model(unliked_tags)
        
        # Apply penalty to unliked tags
        unliked_tags_embedding = unliked_tags_embedding * self.penalty_weight
        # Reduce mean to handle ragged tensors (different lengths of liked/unliked tags)
        if(len(liked_tags_embedding.shape.as_list()) >=3):
            liked_tags_embedding = tf.reduce_mean(liked_tags_embedding, axis=1)
        if(len(unliked_tags_embedding.shape.as_list()) >=3):
            unliked_tags_embedding = tf.reduce_mean(unliked_tags_embedding, axis=1)
        # Concatenate user embeddings with liked and unliked tags
        user_embedding = tf.concat([user_id_embedding, liked_tags_embedding, unliked_tags_embedding], axis=1)
        
        return user_embedding


In [42]:
class RecipeModel(tf.keras.Model):
    def __init__(self, unique_recipe_names, unique_tags, embedding_dimension):
        super().__init__()

        self.recipe_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_recipe_names, mask_token=None),
            tf.keras.layers.Embedding(len(unique_recipe_names) + 1, embedding_dimension)
        ])
    
    # Tags embeddings
        self.tags_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_tags, mask_token=None),
            tf.keras.layers.Embedding(len(unique_tags) + 1, embedding_dimension)
        ])
        
    def call(self, features):
        # Inputs should contain "recipe_name" and "tags"
        recipe_name = features["recipe_name"]
        tags = features["tags"]

        # Embed recipe name
        recipe_embeddings = self.recipe_model(recipe_name)

        tags_embeddings = self.tags_model(tags)
        if(len(recipe_embeddings.shape.as_list())>=3): 
        #   recipe_embeddings = tf.reshape(recipe_embeddings, [1,32])
           recipe_embeddings = tf.reshape(recipe_embeddings, [1,embedding_dimension])
        if(len(tags_embeddings.shape.as_list())>=3):
            tags_embeddings = tf.reshape(tags_embeddings, [1,embedding_dimension])
        #   tags_embeddings = tf.reshape(tags_embeddings, [1,32])
        recipe_embeddings = tf.concat([recipe_embeddings, tags_embeddings], axis=1)
        return recipe_embeddings

In [48]:

class RecipeAndUserModel(tfrs.Model):
  def __init__(self):
    super().__init__()
    unique_recipe_names = np.unique(list(raw_recipes["name"].unique()))
    self.user_model = UserModel(unique_user_ids, unique_liked_tags, unique_unliked_tags, 32 )
    # Recipe embeddings
    # Recipe embeddings
    self.recipe_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_recipe_names, mask_token=None),
      tf.keras.layers.Embedding(len(unique_recipe_names) + 1, embedding_dimension)
    ])
    
    # Tags embeddings
    self.tags_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_tags, mask_token=None),
      tf.keras.layers.Embedding(len(unique_tags) + 1, embedding_dimension)
    ])

    self.task = tfrs.tasks.Retrieval(
      metrics=tfrs.metrics.FactorizedTopK(
          candidates=recipe_names.batch(128).map(lambda x: tf.squeeze(self.recipe_model(x), axis=1))
      )
    )
    

  def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
    user_embedding = self.user_model(features)
    recipe_name_embeddings = self.recipe_model(features["recipe_name"])
    recipe_tag_embeddings = self.tags_model(features["tags"])
    if(recipe_name_embeddings.shape.as_list() == list([1,1,32])):
      print("Fixed 1")
      recipe_name_embeddings = tf.reduce_mean(recipe_name_embeddings, axis=1)
    if(recipe_tag_embeddings.shape.as_list() == list([1,1,32])):
      print("Fixed 2")
      recipe_tag_embeddings = tf.reduce_mean(recipe_tag_embeddings, axis=1)
    recipe_embeddings = tf.concat([recipe_name_embeddings, recipe_tag_embeddings], axis=1)
    return user_embedding, recipe_embeddings


  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings, recipe_embeddings = self(features)
    print(user_embeddings.shape, recipe_embeddings.shape)
    if(len(user_embeddings.shape.as_list()) != recipe_embeddings.shape.as_list()):
      if(len(user_embeddings.shape.as_list()) > len(recipe_embeddings.shape.as_list())):
        user_embeddings = tf.reduce_mean(user_embeddings, axis=1)
      if (len(user_embeddings.shape.as_list()) < len(recipe_embeddings.shape.as_list())):
        recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1)  
    return self.task(user_embeddings, recipe_embeddings)


In [49]:
from turtle import Shape


class RecipeAndUserModel(tfrs.Model):
  def __init__(self):
    super().__init__()
    unique_recipe_names = np.unique(list(raw_recipes["name"].unique()))
    self.user_model = UserModel(unique_user_ids, unique_liked_tags, unique_unliked_tags, embedding_dimension )

    # Recipe embeddings
    self.recipe_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_recipe_names, mask_token=None),
      tf.keras.layers.Embedding(len(unique_recipe_names) + 1, embedding_dimension)
    ])
    
    # Tags embeddings
    self.tags_model = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_tags, mask_token=None),
      tf.keras.layers.Embedding(len(unique_tags) + 1, embedding_dimension)
    ])

    

    self.task = tfrs.tasks.Retrieval(
      metrics=tfrs.metrics.FactorizedTopK(
          candidates=recipe_names.batch(128).map(lambda x: tf.squeeze(self.recipe_model(x), axis=1))
      )
    )
    # Take a single batch from recipe_names
    candidate_names = next(iter(recipe_names.batch(128)))
    # Pass the batch of names to self.recipe_model
    candidate_embeddings = self.recipe_model(candidate_names)
    print("Candidate Embeddings Shape:", candidate_embeddings.shape) 

  def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
    # Recipe embeddings
    recipe_embeddings = self.recipe_model(features["recipe_name"])
    tags_embeddings = self.tags_model(features["tags"])

    # User embeddings
    user_embedding = self.user_model(features)
    # user_id_embedding = self.user_id_model(features["user_id"])
    # liked_tags_embedding = self.liked_tags_model(features['liked_tags'])
    # unliked_tags_embedding = self.unliked_tags_model(features['unliked_tags'])

    # # Apply penalties to unliked tags embeddings
    # penalty_weight = -0.5
    # unliked_tags_embedding = unliked_tags_embedding * penalty_weight

    # # Reduce mean for RaggedTensors to handle variable lengths
    # liked_tags_embedding = tf.reduce_mean(liked_tags_embedding, axis=1)
    # unliked_tags_embedding = tf.reduce_mean(unliked_tags_embedding, axis=1)
    # print(user_id_embedding.shape)
    # if(user_id_embedding.shape.as_list() == list([1,1,32])):
    #   print("Fixed users")
    #   user_id_embedding = tf.reduce_mean(user_id_embedding, axis=1)
    # # Combine user embeddings (ensure shape (batch_size, embedding_dim))
    # user_embedding = tf.concat([user_id_embedding, liked_tags_embedding, unliked_tags_embedding], axis=1)

    # Combine recipe embeddings (ensure shape (batch_size, embedding_dim))
    print("Recipe Embeddings Shape: 31", recipe_embeddings.shape.as_list())
    print("tags_embeddings Embeddings Shape: 32", tags_embeddings.shape)
    if(recipe_embeddings.shape.as_list() == list([1,1,32])):
      print("Fixed 1")
      recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1)
    if(tags_embeddings.shape.as_list() == list([1,1,32])):
      print("Fixed 2")
      tags_embeddings = tf.reduce_mean(tags_embeddings, axis=1)

    recipe_embeddings = tf.concat([recipe_embeddings, tags_embeddings], axis=1)

    # Ensure both user_embedding and recipe_embeddings are reduced to (batch_size, embedding_dim)
    user_embedding = tf.reduce_mean(user_embedding, axis=1)  # Reduce axis to 2D
    recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1)

    # Check shapes before returning
    print("User Embeddings Shape: 21", user_embedding.shape)
    print("Recipe Embeddings Shape: 22", recipe_embeddings.shape)

    return user_embedding, recipe_embeddings


  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings, recipe_embeddings = self(features)

    print("User Embeddings Shape:", user_embeddings.shape)
    print("Recipe Embeddings Shape:", recipe_embeddings.shape)
    # # Apply pooling to ensure uniform shapes
    # user_embeddings = tf.reduce_mean(user_embeddings, axis=1)  # Ensure it's (batch_size, embedding_dim)
    # recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1)

    # user_embeddings = tf.reduce_mean(user_embeddings, axis=1)  # Ensure it's (batch_size, embedding_dim)
    # recipe_embeddings = tf.reduce_mean(recipe_embeddings, axis=1) # Ensure it's (batch_size, embedding_dim)

    # Check shapes again
    print("User Embeddings Shape:", user_embeddings.shape)
    print("Recipe Embeddings Shape:", recipe_embeddings.shape)

    return self.task(user_embeddings, recipe_embeddings)


In [44]:
# Randomly shuffle data and split between train and test.
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [45]:
# prompt: –ù–∞–ø–∏—à–∏ –∫–æ–¥ –¥–ª—è —Ç–µ—Å—Ç–∞ model –¥–ª—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è 42
model = RecipeAndUserModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
test_user = {}
for data in test.take(1).as_numpy_iterator():
  print(model(data))

Fixed 1
(<tf.Tensor: shape=(1, 96), dtype=float32, numpy=
array([[-3.52230556e-02,  1.75264813e-02,  1.60246007e-02,
         2.81946175e-02,  4.14231755e-02, -3.62736359e-02,
         2.48584896e-03, -1.59127116e-02,  2.75185592e-02,
         1.92953013e-02,  2.19455399e-02,  4.37143557e-02,
        -4.38168049e-02,  5.96208498e-03,  3.98756750e-02,
        -4.34775613e-02, -2.30742823e-02,  3.63623016e-02,
        -3.83467562e-02, -1.85733326e-02,  9.38465446e-03,
         8.92303884e-04, -1.25821680e-03, -1.91025864e-02,
        -4.44731377e-02,  3.63770463e-02, -2.16915011e-02,
        -1.59541368e-02,  1.41413249e-02, -6.47109747e-03,
         2.30637677e-02, -6.81829453e-03, -1.68757210e-03,
         2.75275623e-03,  7.90641643e-04, -1.27081433e-03,
        -3.44715896e-03,  6.88652566e-04, -9.55880038e-04,
         3.47363576e-03,  1.32687448e-03, -6.82806422e-04,
         4.34627337e-03,  1.85572880e-03, -2.21296796e-03,
         2.76207225e-03, -2.89641344e-03, -5.32198101e-05

In [50]:
model = RecipeAndUserModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
model.fit(cached_train, epochs=1)

Candidate Embeddings Shape: (128, 1, 32)
Recipe Embeddings Shape: 31 [None, 1, 32]
tags_embeddings Embeddings Shape: 32 (None, 1, 32)
User Embeddings Shape: 21 (None, 32)
Recipe Embeddings Shape: 22 (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)
Recipe Embeddings Shape: 31 [None, 1, 32]
tags_embeddings Embeddings Shape: 32 (None, 1, 32)
User Embeddings Shape: 21 (None, 32)
Recipe Embeddings Shape: 22 (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)


<keras.src.callbacks.History at 0x23d998f2e30>

In [46]:
model.evaluate(cached_test, return_dict=True)

(None, 1, 32)
Recipe Embeddings Shape: 31 [None, 1, 32]
tags_embeddings Embeddings Shape: 32 (None, 1, 32)
User Embeddings Shape: 21 (None, 32)
Recipe Embeddings Shape: 22 (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)


{'factorized_top_k/top_1_categorical_accuracy': 0.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.003700000001117587,
 'factorized_top_k/top_10_categorical_accuracy': 0.005849999841302633,
 'factorized_top_k/top_50_categorical_accuracy': 0.021250000223517418,
 'factorized_top_k/top_100_categorical_accuracy': 0.03435000032186508,
 'loss': 29595.990234375,
 'regularization_loss': 0,
 'total_loss': 29595.990234375}

In [81]:
model.evaluate(cached_test, return_dict=True)

Recipe Embeddings Shape: 31 [None, 1, 32]
tags_embeddings Embeddings Shape: 32 (None, 1, 32)
User Embeddings Shape: 21 (None, 32)
Recipe Embeddings Shape: 22 (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)
User Embeddings Shape: (None, 32)
Recipe Embeddings Shape: (None, 32)


{'factorized_top_k/top_1_categorical_accuracy': 0.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.00279999990016222,
 'factorized_top_k/top_10_categorical_accuracy': 0.006099999882280827,
 'factorized_top_k/top_50_categorical_accuracy': 0.019899999722838402,
 'factorized_top_k/top_100_categorical_accuracy': 0.03164999932050705,
 'loss': 29591.072265625,
 'regularization_loss': 0,
 'total_loss': 29591.072265625}

### Making predictions

In [78]:
test_user = {}
for data in test.take(1).as_numpy_iterator():
  print(model(data))

Fixed users
Recipe Embeddings Shape: 31 [1, 1, 32]
tags_embeddings Embeddings Shape: 32 (1, 1, 32)
Fixed 1
Fixed 2
User Embeddings Shape: 21 (1,)
Recipe Embeddings Shape: 22 (1,)
(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.0028638], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-0.0279709], dtype=float32)>)


In [52]:
test_user = {}
for data in test.take(1).as_numpy_iterator():
  test_user = data

In [None]:
test_user

In [63]:
# Create a dataset with the user ID you want to test
user_id_to_test = "42"  # Replace with the user ID you want to test
test_user_dataset = {"user_id": np.array(test_user["user_id"]),
      "liked_tags": np.array(test_user["liked_tags"]),
      "recipe_name": np.array(["arriba baked winter squash mexican style"]),  # Use empty list as a default
      "tags": np.array(test_user["tags"]),
      "unliked_tags": np.array(test_user["unliked_tags"])}

# Use the model's retrieval task to get the top-k predictions
_, recipe_embeddings = model(test_user_dataset)

brute_force = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
print(recipe_embeddings)

index = brute_force.index_from_dataset(
  tf.data.Dataset.zip((recipes.batch(100), recipes.batch(100).map(lambda x: model.recipe_model(x['recipe_name'])))))


print(index(test_user_dataset))
_, titles = index(recipe_embeddings, k=10)

print(f"Top 10 recommendations for user {user_id_to_test}: {titles}")

Fixed users
Recipe Embeddings Shape: 31 [1, 1, 32]
tags_embeddings Embeddings Shape: 32 (1, 1, 32)
Fixed 1
Fixed 2
User Embeddings Shape: 21 (1,)
Recipe Embeddings Shape: 22 (1,)
tf.Tensor([-0.0037438], shape=(1,), dtype=float32)


AttributeError: 'dict' object has no attribute 'shape'

In [None]:
class UserModel(tf.keras.Model):
    def __init__(self, unique_user_ids, unique_liked_tags, unique_unliked_tags, embedding_dimension):
        super().__init__()
        
        # User ID embeddings
        self.user_id_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])
        
        # Liked tags embeddings
        self.liked_tags_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_liked_tags, mask_token=None),
            tf.keras.layers.Embedding(len(unique_liked_tags) + 1, embedding_dimension)
        ])

        # Unliked tags embeddings
        self.unliked_tags_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_unliked_tags, mask_token=None),
            tf.keras.layers.Embedding(len(unique_unliked_tags) + 1, embedding_dimension)
        ])

        # Penalty weight for unliked tags
        self.penalty_weight = -0.5

    def call(self, features):
        # Inputs should contain "user_id", "liked_tags", and "unliked_tags"
        user_id = features["user_id"]
        liked_tags = features["liked_tags"]
        unliked_tags = features["unliked_tags"]
        
        # Embed user ID
        user_id_embedding = self.user_id_model(user_id)
        if(user_id_embedding.shape.as_list() == list([1,1,32])):
            print("Fixed users")
            user_id_embedding = tf.reduce_mean(user_id_embedding, axis=1)
        # Embed liked and unliked tags
        liked_tags_embedding = self.liked_tags_model(liked_tags)
        unliked_tags_embedding = self.unliked_tags_model(unliked_tags)
        
        # Apply penalty to unliked tags
        unliked_tags_embedding = unliked_tags_embedding * self.penalty_weight
        # Reduce mean to handle ragged tensors (different lengths of liked/unliked tags)
        if(len(liked_tags_embedding.shape.as_list()) >=3):
            liked_tags_embedding = tf.reduce_mean(liked_tags_embedding, axis=1)
        if(len(unliked_tags_embedding.shape.as_list()) >=3):
            unliked_tags_embedding = tf.reduce_mean(unliked_tags_embedding, axis=1)
        # Concatenate user embeddings with liked and unliked tags
        user_embedding = tf.concat([user_id_embedding, liked_tags_embedding, unliked_tags_embedding], axis=1)
        
        return user_embedding


In [None]:
model.recipe_model(np.array(["arriba baked winter squash mexican style"]))

In [None]:
recipes.batch(100).map(lambda x: x["recipe_id"]).map(lambda z: tf.cast(z, tf.float32))

In [83]:
test_user = {}
for data in test.take(1).as_numpy_iterator():
  test_user = data

In [64]:
# Create a dataset with the user ID you want to test
user_id_to_test = "42"
test_user_dataset = {
    "user_id": np.array(test_user["user_id"]),
    "liked_tags": np.array(test_user["liked_tags"]),
    "recipe_name": np.array(["arriba baked winter squash mexican style"], dtype=object),
    "tags": np.array(test_user["tags"]),
    "unliked_tags": np.array(test_user["unliked_tags"]),
}

# Use the model's retrieval task to get the top-k predictions
_, recipe_embeddings = model(test_user_dataset)

# Ensure recipe embeddings have the correct shape
print("Shape of recipe embeddings:", recipe_embeddings.shape)
print("Dtype of recipe embeddings:", recipe_embeddings.dtype)

# Create a brute force index
brute_force = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

# Build the index
index = brute_force.index_from_dataset(
    recipes.batch(100).map(
        lambda x: (
            x["recipe_id"] +"_" + x["recipe_name"],
            tf.squeeze(model.recipe_model(x["recipe_name"]), axis=1)  # Ensure this is 2D
        )
    )
)

# Reshape the recipe embeddings if necessary
if len(recipe_embeddings.shape) == 1:
    recipe_embeddings = tf.expand_dims(recipe_embeddings, axis=0)  # Reshape to (1, embedding_dim)
print("Shape of reshaped recipe embeddings:", recipe_embeddings.shape)

recipe_embeddings_str = tf.strings.as_string(recipe_embeddings)
_, titles = index(recipe_embeddings_str, k=10)

print(f"Top 10 recommendations for user {user_id_to_test}: {titles}")


Fixed users
Recipe Embeddings Shape: 31 [1, 1, 32]
tags_embeddings Embeddings Shape: 32 (1, 1, 32)
Fixed 1
Fixed 2
User Embeddings Shape: 21 (1,)
Recipe Embeddings Shape: 22 (1,)
Shape of recipe embeddings: (1,)
Dtype of recipe embeddings: <dtype: 'float32'>
Shape of reshaped recipe embeddings: (1, 1)


TypeError: Exception encountered when calling layer 'user_model_3' (type UserModel).

Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got 'user_id'

Call arguments received by layer 'user_model_3' (type UserModel):
  ‚Ä¢ features=tf.Tensor(shape=(1, 1), dtype=string)

In [72]:
# Create a dataset with the user ID you want to test
user_id_to_test = "42"
test_user_dataset = {
    "user_id": np.array(test_user["user_id"]),
    "liked_tags": np.array(test_user["liked_tags"]),
    "recipe_name": np.array(["arriba baked winter squash mexican style"], dtype=object),
    "tags": np.array(test_user["tags"]),
    "unliked_tags": np.array(test_user["unliked_tags"]),
}

# Use the model's retrieval task to get the top-k predictions
_, recipe_embeddings = model(test_user_dataset)

# Ensure recipe embeddings have the correct shape
print("Shape of recipe embeddings:", recipe_embeddings.shape)
print("Dtype of recipe embeddings:", recipe_embeddings.dtype)

# Create a brute force index
brute_force = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

# Build the index
index = brute_force.index_from_dataset(
    recipes.batch(100).map(
        lambda x: (
            x["recipe_id"] +"_" + x["recipe_name"],
            tf.squeeze(model.recipe_model(x["recipe_name"]), axis=1)  # Ensure this is 2D
        )
    )
)

# Reshape the recipe embeddings if necessary
if len(recipe_embeddings.shape) == 1:
    recipe_embeddings = tf.expand_dims(recipe_embeddings, axis=0)  # Reshape to (1, embedding_dim)
print("Shape of reshaped recipe embeddings:", recipe_embeddings.shape)

recipe_embeddings_str = tf.strings.as_string(recipe_embeddings)
_, titles = index(recipe_embeddings_str, k=10)

print(f"Top 10 recommendations for user {user_id_to_test}: {titles}")

InvalidArgumentError: Exception encountered when calling layer 'user_model_1' (type UserModel).

{{function_node __wrapped__ConcatV2_N_3_device_/job:localhost/replica:0/task:0/device:CPU:0}} ConcatOp : Ranks of all input tensors should match: shape[0] = [1,1,32] vs. shape[1] = [1,32] [Op:ConcatV2] name: concat

Call arguments received by layer 'user_model_1' (type UserModel):
  ‚Ä¢ inputs={'user_id': 'tf.Tensor(shape=(1,), dtype=string)', 'liked_tags': 'tf.Tensor(shape=(1, 178), dtype=string)', 'recipe_name': 'tf.Tensor(shape=(1,), dtype=string)', 'tags': 'tf.Tensor(shape=(1,), dtype=string)', 'unliked_tags': 'tf.Tensor(shape=(1, 1), dtype=string)'}