In [1]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
import tensorflow_recommenders as tfrs




In [2]:
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

Let's pretend we have 3 users:
1. Clarissa is a vegetarian who is lactose intolerant. 🥝🍅
2. Martin is an athlete who prefers meat dishes to vegetable dishes. 🍖🥩
3. Stella - has no special preferences, eats mostly everything. 🍲🍠

In [3]:
clarissa = {'id':'clr', 'prefers': ['vegetables', 'vegan', 'nomeat', 'vegetarian'], 'hates': ['meat', 'lactose']}
martin = {'id':'mrt', 'prefers': 'meat', 'hates': 'vegetables'}
stella = {'id':'stl', 'prefers': '', 'hates': ''}

We will additionally enter the factors Calories, Protein, Fat, Carbohydrates for our requirements. **(The numbers were taken at random and do not reflect proportions or recommendations - the numbers are just an example)**
* Clarissa - 2000/100/60/120
* Martin, 2600/150/100/328.
* Stella - 2200/80/50/100

In [4]:
def set_requirements(user, calories, proteins, fats, carbs):
  user['calories'] = calories
  user['proteins'] = proteins
  user['fats'] = fats
  user['carbs'] = carbs
set_requirements(clarissa, 2000, 100, 60, 120)
set_requirements(martin, 2600, 150, 100, 328)
set_requirements(martin, 2200, 80, 50, 100)


# Data processing 📊

Loading recipes and user_interactions. Nutritions showed like:calories (#), total fat (PDV), sugar (PDV) , sodium (PDV) , protein (PDV) , saturated fat (PDV) , and carbohydrates (PDV)

In [5]:
raw_recipes = pd.read_csv('./sample_data/RAW_recipes.csv')
raw_interactions = pd.read_csv('./sample_data/RAW_interactions.csv')
raw_recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


Merge data interactions with recipe infos

In [6]:
interactions_with_recipe_info = pd.merge(raw_interactions, raw_recipes[['name', 'tags', 'ingredients', 'id']], left_on='recipe_id', right_on='id', how='left')
interactions_with_recipe_info = interactions_with_recipe_info[['user_id', 'recipe_id', 'rating', 'name', 'tags', 'ingredients']]
interactions_with_recipe_info.rename(columns={'name': 'recipe_name'}, inplace=True)
interactions_with_recipe_info.head()

Unnamed: 0,user_id,recipe_id,rating,recipe_name,tags,ingredients
0,38094,40893,4,white bean green chile pepper soup,"['weeknight', 'time-to-make', 'course', 'main-...","['great northern beans', 'yellow onion', 'dice..."
1,1293707,40893,5,white bean green chile pepper soup,"['weeknight', 'time-to-make', 'course', 'main-...","['great northern beans', 'yellow onion', 'dice..."
2,8937,44394,4,devilicious cookie cake delights,"['30-minutes-or-less', 'time-to-make', 'course...","[""devil's food cake mix"", 'vegetable oil', 'eg..."
3,126440,85009,5,baked potato toppings,"['15-minutes-or-less', 'time-to-make', 'course...","['mayonnaise', 'salsa', 'cheddar cheese', 'ref..."
4,57222,85009,5,baked potato toppings,"['15-minutes-or-less', 'time-to-make', 'course...","['mayonnaise', 'salsa', 'cheddar cheese', 'ref..."


In [7]:
import ast

def convert_to_list(data_str):
  try:
    return ast.literal_eval(data_str)
  except (SyntaxError, ValueError):
    return []

In [8]:
interactions_with_recipe_info['ingredients_str'] = interactions_with_recipe_info['ingredients'].apply(convert_to_list)
interactions_with_recipe_info['tags_str'] = interactions_with_recipe_info['tags'].apply(convert_to_list)

raw_recipes['ingredients_str'] = raw_recipes['ingredients'].apply(convert_to_list)
raw_recipes['tags_str'] = raw_recipes['tags'].apply(convert_to_list)

In [9]:
interactions_with_recipe_info['ingredients_str'].str.len().unique()

array([ 9,  4, 13,  2, 12, 10, 16,  8, 14, 15,  7, 11,  6,  5, 23, 22, 17,
        3, 18, 19, 21, 26, 29, 24, 20, 27, 25, 33, 30, 31,  1, 28, 35, 32,
       34, 43, 40, 39, 37, 36, 38], dtype=int64)

Formatting data for Datasets

In [10]:
interactions_with_recipe_info['user_id'] = interactions_with_recipe_info.user_id.astype("str")
interactions_with_recipe_info['recipe_id'] = interactions_with_recipe_info.recipe_id.astype("str")
interactions_with_recipe_info['rating'] = interactions_with_recipe_info.rating.astype(np.float32)
interactions_with_recipe_info['recipe_name'] = interactions_with_recipe_info.recipe_name.astype("str")
interactions_with_recipe_info['tags'] = interactions_with_recipe_info.tags.astype("str")
interactions_with_recipe_info['ingredients'] = interactions_with_recipe_info.ingredients.astype("str")


Creating train preferences for users

In [11]:
user_preferences = interactions_with_recipe_info[['user_id', 'rating', 'tags_str']]
user_preferences.head()

Unnamed: 0,user_id,rating,tags_str
0,38094,4.0,"[weeknight, time-to-make, course, main-ingredi..."
1,1293707,5.0,"[weeknight, time-to-make, course, main-ingredi..."
2,8937,4.0,"[30-minutes-or-less, time-to-make, course, mai..."
3,126440,5.0,"[15-minutes-or-less, time-to-make, course, mai..."
4,57222,5.0,"[15-minutes-or-less, time-to-make, course, mai..."


In [12]:
user_liked_tags = user_preferences[user_preferences['rating'] >= 3].groupby('user_id')['tags_str'].apply(list).reset_index()
user_unliked_tags = user_preferences[user_preferences['rating'] < 3].groupby('user_id')['tags_str'].apply(list).reset_index()
users_ds = pd.DataFrame(interactions_with_recipe_info['user_id'].unique(), columns=['user_id'])


In [13]:
users_ds = users_ds.merge(user_liked_tags, on='user_id', how='left')
users_ds = users_ds.rename(columns={'tags_str': 'liked_tags'})
users_ds = users_ds.merge(user_unliked_tags, on='user_id', how='left')
users_ds = users_ds.rename(columns={'tags_str': 'unliked_tags'})

In [14]:
users_ds.head()

Unnamed: 0,user_id,liked_tags,unliked_tags
0,38094,"[[weeknight, time-to-make, course, main-ingred...",
1,1293707,"[[weeknight, time-to-make, course, main-ingred...","[[60-minutes-or-less, time-to-make, course, ma..."
2,8937,"[[30-minutes-or-less, time-to-make, course, ma...",
3,126440,"[[15-minutes-or-less, time-to-make, course, ma...","[[60-minutes-or-less, time-to-make, course, pr..."
4,57222,"[[15-minutes-or-less, time-to-make, course, ma...","[[60-minutes-or-less, time-to-make, course, ma..."


Looking that some of users don't have unliked_tags, so replace all Nan with empty arrays

In [15]:
def unique_tags_in_list(tags_list):
  if isinstance(tags_list, list):
    unique_tags = []
    for sublist in tags_list:
      if isinstance(sublist, list):
        for tag in sublist:
          if tag not in unique_tags:
            unique_tags.append(tag)
    return unique_tags
  else:
    return []

users_ds['liked_tags'] = users_ds['liked_tags'].apply(unique_tags_in_list)
users_ds['unliked_tags'] = users_ds['unliked_tags'].apply(unique_tags_in_list)

In [16]:
def remove_duplicate_tags(row):
  liked_tags = set(row['liked_tags']) if isinstance(row['liked_tags'], list) else set()
  unliked_tags = set(row['unliked_tags']) if isinstance(row['unliked_tags'], list) else set()
  duplicate_tags = liked_tags.intersection(unliked_tags)
  row['liked_tags'] = [tag for tag in row['liked_tags'] if tag not in duplicate_tags] if isinstance(row['liked_tags'], list) else []
  row['unliked_tags'] = [tag for tag in row['unliked_tags'] if tag not in duplicate_tags] if isinstance(row['unliked_tags'], list) else []
  return row

users_ds = users_ds.apply(remove_duplicate_tags, axis=1)


In [17]:
users_ds.head()

Unnamed: 0,user_id,liked_tags,unliked_tags
0,38094,"[weeknight, time-to-make, course, main-ingredi...",[]
1,1293707,"[weeknight, soups-stews, beans, crock-pot-slow...","[cupcakes, finger-food, cakes, english, cake-f..."
2,8937,"[30-minutes-or-less, time-to-make, course, mai...",[]
3,126440,"[15-minutes-or-less, 3-steps-or-less, jewish-s...",[]
4,57222,"[condiments-etc, salads, beans, grains, south-...","[kwanzaa, dairy-free]"


In [18]:
interactions_with_recipe_info = pd.merge(interactions_with_recipe_info, users_ds, on='user_id', how='left')

Creating datasets

In [19]:
# Параметры
MAX_TAG_LENGTH = 30
PADDING_VALUE = "empty"

interactions_with_recipe_info['liked_tags'] = [
    (tags + [PADDING_VALUE] * (MAX_TAG_LENGTH - len(tags)))[:MAX_TAG_LENGTH] 
    for tags in interactions_with_recipe_info['liked_tags']
]
interactions_with_recipe_info['unliked_tags'] = [
    (tags + [PADDING_VALUE] * (MAX_TAG_LENGTH - len(tags)))[:MAX_TAG_LENGTH] 
    for tags in interactions_with_recipe_info['unliked_tags']
]
interactions_with_recipe_info['ingredients_str'] =  [
    (tags + [PADDING_VALUE] * (MAX_TAG_LENGTH - len(tags)))[:MAX_TAG_LENGTH] 
    for tags in interactions_with_recipe_info['ingredients_str'] ]

interactions_with_recipe_info['tags_str'] =  [
    (tags + [PADDING_VALUE] * (MAX_TAG_LENGTH - len(tags)))[:MAX_TAG_LENGTH] 
    for tags in interactions_with_recipe_info['tags_str'] ]

raw_recipes['tags_str'] = [
    (tags + [PADDING_VALUE] * (MAX_TAG_LENGTH - len(tags)))[:MAX_TAG_LENGTH] 
    for tags in raw_recipes['tags_str'] ]

raw_recipes['ingredients_str'] = [
    (tags + [PADDING_VALUE] * (MAX_TAG_LENGTH - len(tags)))[:MAX_TAG_LENGTH] 
    for tags in raw_recipes['ingredients_str'] ]

In [20]:
interactions_with_recipe_info['ingredients_str'].str.len().unique(), interactions_with_recipe_info['unliked_tags'].str.len().unique(), 

(array([30], dtype=int64), array([30], dtype=int64))

In [21]:
interactions_with_recipe_info['liked_tags_str'] = interactions_with_recipe_info['liked_tags'].apply(lambda x: ','.join(x))
interactions_with_recipe_info['unliked_tags_str'] = interactions_with_recipe_info['unliked_tags'].apply(lambda x: ','.join(x))
interactions_with_recipe_info['tags_str'] = interactions_with_recipe_info['tags_str'].apply(lambda x: ','.join(x))

In [22]:
def split_tags(tags_string):
    return tf.strings.split(tf.strings.strip(tags_string), ',')

In [23]:
tf.compat.v1.enable_eager_execution()




In [24]:
def squeeze_tags(data):
    return {
        "user_id": data["user_id"],
        "recipe_id": data["recipe_id"],
        "rating": data["rating"],
        "recipe_name": data["recipe_name"],
        "tags": tf.squeeze(data["tags"], axis=0),  # Укажите ось, которую нужно удалить
        "liked_tags": tf.squeeze(data["liked_tags"], axis=0),  # Укажите ось, которую нужно удалить
        "unliked_tags": tf.squeeze(data["unliked_tags"], axis=0)  # Укажите ось, которую нужно удалить
    }

ratings = tf.data.Dataset.from_tensor_slices((tf.cast(interactions_with_recipe_info['user_id'].values, tf.string),
                                              tf.cast(interactions_with_recipe_info['recipe_id'].values, tf.string),
                                              tf.cast(interactions_with_recipe_info['rating'].values, tf.float32),
                                              tf.cast(interactions_with_recipe_info['recipe_name'].values, tf.string),
                                              tf.cast(interactions_with_recipe_info['tags_str'].values.reshape(-1,1), tf.string),
                                              tf.cast(interactions_with_recipe_info['liked_tags_str'].values.reshape(-1,1), tf.string),
                                              tf.cast(interactions_with_recipe_info['unliked_tags_str'].values.reshape(-1,1), tf.string)
                                              )).map(lambda x, x1, x2, x3, x4, x5, x6: {
                                                  "user_id": x,
                                                  "recipe_id": x1,
                                                  "rating": x2,
                                                  "recipe_name": x3,
                                                  "tags": split_tags(x4),
                                                  "liked_tags": split_tags(x5),
                                                  "unliked_tags": split_tags(x6)
                                              }).map(squeeze_tags)


In [25]:
for data in ratings.take(1).as_numpy_iterator():
    print(data)

{'user_id': b'38094', 'recipe_id': b'40893', 'rating': 4.0, 'recipe_name': b'white bean   green chile pepper soup', 'tags': array([b'weeknight', b'time-to-make', b'course', b'main-ingredient',
       b'preparation', b'occasion', b'soups-stews', b'beans',
       b'vegetables', b'easy', b'crock-pot-slow-cooker', b'dietary',
       b'equipment', b'empty', b'empty', b'empty', b'empty', b'empty',
       b'empty', b'empty', b'empty', b'empty', b'empty', b'empty',
       b'empty', b'empty', b'empty', b'empty', b'empty', b'empty'],
      dtype=object), 'liked_tags': array([b'weeknight', b'time-to-make', b'course', b'main-ingredient',
       b'preparation', b'occasion', b'soups-stews', b'beans',
       b'vegetables', b'easy', b'crock-pot-slow-cooker', b'dietary',
       b'equipment', b'60-minutes-or-less', b'main-dish', b'fruit',
       b'vegan', b'vegetarian', b'stove-top', b'black-beans',
       b'30-minutes-or-less', b'low-protein', b'salads', b'dinner-party',
       b'low-cholesterol', b'lo

In [26]:
raw_recipes['ingredients_str'] = raw_recipes['ingredients_str'].apply(lambda x: ','.join(x))
raw_recipes['tags_str'] = raw_recipes['tags_str'].apply(lambda x: ','.join(x))

In [27]:
raw_recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,ingredients_str,tags_str
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,"winter squash,mexican seasoning,mixed spice,ho...","60-minutes-or-less,time-to-make,course,main-in..."
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,"prepared pizza crust,sausage patty,eggs,milk,s...","30-minutes-or-less,time-to-make,course,main-in..."
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,"ground beef,yellow onions,diced tomatoes,tomat...","time-to-make,course,preparation,main-dish,chil..."
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,"spreadable cheese with garlic and herbs,new po...","60-minutes-or-less,time-to-make,course,main-in..."
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,"tomato juice,apple cider vinegar,sugar,salt,pe...","weeknight,time-to-make,course,main-ingredient,..."


In [28]:
raw_recipes['id'] = raw_recipes.id.astype("str")
raw_recipes['name'] = raw_recipes.name.astype("str")
raw_recipes['tags'] = raw_recipes.tags.astype("str")
raw_recipes['ingredients'] = raw_recipes.ingredients.astype("str")

In [29]:
raw_recipes['name'][:1]

0    arriba   baked winter squash mexican style
Name: name, dtype: object

In [30]:
# prompt: Создай на основе raw_recipes датасет, но учти что ingredients_str это массив

recipes = tf.data.Dataset.from_tensor_slices((tf.cast(raw_recipes['id'].values, tf.string),
                                              tf.cast(raw_recipes['name'].values, tf.string),
                                              tf.cast(raw_recipes['tags_str'].values.reshape(-1, 1), tf.string)
                                            #   tf.cast(raw_recipes['ingredients_str'].values.reshape(-1, 1), tf.string),
                                              )
                                             ).map(lambda x, x1, x2: {
                                                  "recipe_id": x,
                                                  "recipe_name": x1,
                                                  "tags": split_tags(x2),
                                                #   "ingredients": split_tags(x3)
                                              }).map(lambda data: {
        **data,
        "tags": tf.squeeze(data["tags"], axis=0)  # Удаляем дополнительные измерения
        # "ingredients": tf.squeeze(data["ingredients"], axis=0)  # Удаляем дополнительные измерения
    })



In [31]:
sizes = []
for data in recipes.as_numpy_iterator():
  sizes.append(len(data["tags"]))
print(np.unique(sizes))  

[30]


In [32]:
recipe_names = tf.data.Dataset.from_tensor_slices((tf.cast(raw_recipes['name'].values.reshape(-1, 1), tf.string))).map(lambda x: x)

# Towers 🗼

For our towers set dimensionality of the query and candidate representations: **32**. Higher values will correspond to models that may be more accurate, but will also be slower to fit and more prone to overfitting.

In [55]:
#embedding_dimension = 32

## User tower 👷

Lets start creating our towers with User towers. We will compute by:
1. User id
2. Prefers and hates

### User ID model

In [34]:
unique_user_ids = users_ds["user_id"].unique()
unique_user_ids[:10]

array(['38094', '1293707', '8937', '126440', '57222', '52282', '124416',
       '2000192946', '76535', '273745'], dtype=object)

In [35]:
user_id_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_user_ids, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_user_ids)+1, embedding_dimension)
])




### User Prefers models

In [36]:
# prompt: модель для liked_tags которым стоит отдать предпочтение, но учти что liked_tags в датасете это массив

unique_liked_tags = set()
for tags in users_ds['liked_tags']:
  if isinstance(tags, list):
    for tag in tags:
      unique_liked_tags.add(tag)
unique_liked_tags = list(unique_liked_tags)

liked_tags_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_liked_tags, mask_token=None),
  tf.keras.layers.Embedding(len(unique_liked_tags) + 1, embedding_dimension)
])


In [37]:
unique_liked_tags[:10]

['',
 'infant-baby-friendly',
 'kiwifruit',
 'brazilian',
 'for-large-groups',
 'lemon',
 'beginner-cook',
 'melons',
 'fudge',
 'st-patricks-day']

In [38]:

unique_unliked_tags = set()
for tags in users_ds['unliked_tags']:
  if isinstance(tags, list):
    for tag in tags:
      unique_unliked_tags.add(tag)
unique_unliked_tags = list(unique_unliked_tags)

unliked_tags_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_unliked_tags, mask_token=None),
  tf.keras.layers.Embedding(len(unique_unliked_tags) + 1, embedding_dimension)
])
unique_unliked_tags[:10]

['',
 'infant-baby-friendly',
 'kiwifruit',
 'brazilian',
 'for-large-groups',
 'lemon',
 'beginner-cook',
 'melons',
 'lamb-sheep-main-dish',
 'fudge']

## Recipe tower 🌭

In [39]:
unique_recipe_names = np.unique(list(raw_recipes["name"].unique()))
recipe_name_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_recipe_names, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_recipe_names) + 1, embedding_dimension)
])
unique_recipe_names[:10]

array(['0 carb   0 cal gummy worms', '0 fat chunky watermelon salsa',
       '0 point ice cream  only 1 ingredient', '0 point soup   ww',
       '0 point soup  crock pot', '007  martini', '007 cocktail',
       '1  2  3  swiss meringue buttercream', '1 00 tangy chicken recipe',
       '1 000 artichoke hearts'], dtype='<U85')

In [40]:
# prompt: напиши вызов recipe_name_model

recipe_name_model(['some recipe name'])





<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[-0.04022384,  0.03198082,  0.01680404,  0.02569875,  0.04956589,
        -0.03386796, -0.01278985,  0.01473087,  0.0171161 , -0.04176515,
         0.01333975,  0.0333013 , -0.02724527,  0.02846116, -0.0493012 ,
         0.01441814,  0.0340233 , -0.02964989, -0.00687019,  0.04881856,
         0.03174844,  0.01197533,  0.02987069,  0.0498438 , -0.00471228,
        -0.04069904, -0.00519881,  0.02281589, -0.00624678,  0.0159745 ,
         0.02831981, -0.02725786]], dtype=float32)>

### Recipe ingredient&tag model

In [75]:
unique_tags = set()
for tags in raw_recipes['tags'].apply(convert_to_list):
  if isinstance(tags, list):
    for tag in tags:
      unique_tags.add(tag)
unique_tags = np.unique(list(unique_tags))

tags_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_tags, mask_token="empty"),
  tf.keras.layers.Embedding(len(unique_tags) + 2, 160)
])
unique_tags[:10]

array(['', '1-day-or-more', '15-minutes-or-less', '3-steps-or-less',
       '30-minutes-or-less', '4-hours-or-less', '5-ingredients-or-less',
       '60-minutes-or-less',
       'Throw the ultimate fiesta with this sopaipillas recipe from Food.com.',
       'a1-sauce'], dtype='<U69')

## Combine models

In [76]:
class UserModel(tf.keras.Model):
    def __init__(self, unique_user_ids, tags_model, embedding_dimension):
        super().__init__()
        
        # User ID embeddings
        self.user_id_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])
        
        # Liked tags embeddings
        self.liked_tags_model = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
              vocabulary=unique_tags, mask_token=None),
          tf.keras.layers.Embedding(len(unique_tags) + 1, embedding_dimension)
        ])

        # Unliked tags embeddings
        self.unliked_tags_model = tf.keras.Sequential([
              tf.keras.layers.StringLookup(
                  vocabulary=unique_tags, mask_token=None),
              tf.keras.layers.Embedding(len(unique_tags) + 1, embedding_dimension)
            ])

        # Penalty weight for unliked tags
        self.penalty_weight = -1.5
        self.plus_weight = 1.5

    def call(self, features):
        user_id = features["user_id"]
        liked_tags = features["liked_tags"]
        unliked_tags = features["unliked_tags"]

        # Embed user ID
        user_id_embedding = self.user_id_model(user_id)
        #user_id_embedding = tf.reduce_mean(user_id_embedding, axis=1)

        # Embed liked and unliked tags
        liked_tags_embedding = self.liked_tags_model(liked_tags)
        unliked_tags_embedding = self.unliked_tags_model(unliked_tags)

        # Apply penalty to unliked tags
        unliked_tags_embedding = unliked_tags_embedding * self.penalty_weight
        liked_tags_embedding = liked_tags_embedding * self.plus_weight


        # Aggregate embeddings
        liked_tags_embedding = tf.reduce_max(liked_tags_embedding, axis=1, keepdims=True)
        unliked_tags_embedding = tf.reduce_min(unliked_tags_embedding, axis=1, keepdims=True)


        # Используйте weighted суммирование для учета штрафов
        user_embedding = tf.add(user_id_embedding, liked_tags_embedding)
        user_embedding = tf.add(user_embedding, unliked_tags_embedding)

        return user_embedding



In [77]:
# Извлечение данных пользователя
test_user = {}
for data in ratings.take(1).as_numpy_iterator():
    test_user = data
    
user_model = UserModel(unique_user_ids, tags_model, embedding_dimension)

user_model(test_user)

<tf.Tensor: shape=(30, 32), dtype=float32, numpy=
array([[ 0.019876  , -0.046961  , -0.00843369, -0.03970179,  0.02947155,
        -0.00705806, -0.04265935, -0.00539065, -0.04638531,  0.00102698,
        -0.02408699,  0.01413954, -0.04639607, -0.02812576, -0.04535428,
        -0.00546051,  0.02973939, -0.01493241,  0.01683442,  0.00607761,
         0.04114311, -0.02887453,  0.03994773,  0.03568748, -0.00450126,
        -0.02628521,  0.03341987, -0.03978372, -0.0451223 , -0.01183569,
         0.01994457, -0.01826802],
       [ 0.02008212, -0.04675488, -0.00822757, -0.03949567,  0.02967768,
        -0.00685195, -0.04245323, -0.00518452, -0.04617919,  0.00123311,
        -0.02388087,  0.01434567, -0.04618995, -0.02791964, -0.04514816,
        -0.00525438,  0.02994552, -0.01472629,  0.01704054,  0.00628372,
         0.04134924, -0.02866841,  0.04015384,  0.0358936 , -0.00429515,
        -0.02607909,  0.03362598, -0.0395776 , -0.04491618, -0.01162957,
         0.02015068, -0.0180619 ],
    

In [80]:
class RecipeAndUserModel(tfrs.Model):

    def _reduce_mean_if_needed(self, embedding):
        """Сокращает размерность эмбеддинга до среднего, если он многомерный."""
        if len(embedding.shape) >= 3:
            return tf.reduce_mean(embedding, axis=1)
        return embedding

    def __init__(self, unique_recipe_names, embedding_dimension):
        super().__init__()
        self.embedding_dimension = embedding_dimension
        # Модель пользователя
        
        self.tags_model = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
              vocabulary=unique_tags, mask_token="empty"),
          tf.keras.layers.Embedding(len(unique_tags) + 2, self.embedding_dimension)
        ])
        
        self.user_model = UserModel(unique_user_ids, self.tags_model, self.embedding_dimension*2)

        # Recipe embeddings
        self.recipe_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_recipe_names, mask_token=None),
            tf.keras.layers.Embedding(len(unique_recipe_names) + 1, self.embedding_dimension)
        ])

        self.dense = tf.keras.layers.Dense(embedding_dimension)  # Сжимаем до размерности 32

        # Подготовка кандидатов, учитывающих и теги
        self.candidates = recipes.batch(128).map(self._get_candidate_embeddings)

        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=self.candidates
            )
        )
        
    def _get_candidate_embeddings(self, recipe_name):
        # Получаем эмбеддинги для имен и тегов
        recipe_embedding = self.recipe_model(recipe_name['recipe_name'])
        print(recipe_name['tags'])
        tags_embedding = self.tags_model(recipe_name['tags'])  # Здесь предполагается, что recipe_name может содержать теги
        recipe_embedding = self._reduce_mean_if_needed(recipe_embedding)
        tags_embedding = self._reduce_mean_if_needed(tags_embedding)
        # Объединяем эмбеддинги имен и тегов
        combined_embedding = tf.concat([recipe_embedding, tags_embedding], axis=1)
        return combined_embedding

    def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
        # User embedding
        user_embedding = self.user_model(features)

        # Recipe name embeddings
        recipe_name_embeddings = self.recipe_model(features["recipe_name"])
        #recipe_name_embeddings = self._reduce_mean_if_needed(recipe_name_embeddings)
        # Tags embeddings
        tags = features["tags"]
        tags_embeddings = self.tags_model(tags)
        tags_embeddings = self._reduce_mean_if_needed(tags_embeddings)
        # # Вычисление средневзвешенного значения для тегов (можно модифицировать веса)
        # tags_embeddings_weighted_sum = tf.reduce_sum(tags_embeddings, axis=1)
        # sum_of_weights = tf.reduce_sum(tf.ones_like(tags_embeddings), axis=1)
        # tags_embeddings = tags_embeddings_weighted_sum / sum_of_weights
        
        # recipe_name_embeddings = self._reduce_mean_if_needed(recipe_name_embeddings)
        # tags_embeddings = tf.expand_dims(tags_embeddings, axis=0)
        # Конкатенация эмбеддингов рецепта и тегов
        recipe_embeddings = tf.concat([recipe_name_embeddings, tags_embeddings], axis=1)

        return user_embedding, recipe_embeddings

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings, recipe_embeddings = self(features)
        print("Computed: ", user_embeddings.shape, recipe_embeddings.shape)
        # Используем задачу факторизованного топ-K для оптимизации
        user_embeddings = self._reduce_mean_if_needed(user_embeddings)
        recipe_embeddings = self._reduce_mean_if_needed(recipe_embeddings)
        
        # user_embeddings = tf.keras.layers.Dense(64)(user_embeddings)
        # recipe_embeddings = tf.keras.layers.Dense(64)(recipe_embeddings)
        print("Computed: ", user_embeddings.shape, recipe_embeddings.shape)
        return self.task(user_embeddings, recipe_embeddings)


In [86]:
# Randomly shuffle data and split between train and test.
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(2_000)
test = shuffled.skip(80_000).take(2_000)

cached_train =  train.batch(15_000).cache()
cached_test = test.batch(15_000).cache()

In [84]:
model = RecipeAndUserModel(unique_recipe_names, 64)
early_callback = tf.keras.callbacks.EarlyStopping(monitor='loss',  patience=5, min_delta=0.001)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
model.fit(cached_train, epochs=12)

Tensor("args_2:0", shape=(None, None), dtype=string)
Tensor("args_2:0", shape=(None, None), dtype=string)
Epoch 1/12
Computed:  (None, None, 128) (None, 128)
Computed:  (None, 128) (None, 128)
Computed:  (None, None, 128) (None, 128)
Computed:  (None, 128) (None, 128)
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.src.callbacks.History at 0x2db2a9bc550>

In [87]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.01600000075995922,
 'factorized_top_k/top_5_categorical_accuracy': 0.03099999949336052,
 'factorized_top_k/top_10_categorical_accuracy': 0.03750000149011612,
 'factorized_top_k/top_50_categorical_accuracy': 0.04650000110268593,
 'factorized_top_k/top_100_categorical_accuracy': 0.057999998331069946,
 'loss': 16582.314453125,
 'regularization_loss': 0,
 'total_loss': 16582.314453125}

In [85]:
model.evaluate(cached_test, return_dict=True)

Computed:  (None, None, 128) (None, 128)
Computed:  (None, 128) (None, 128)


ResourceExhaustedError: Graph execution error:

Detected at node recipe_and_user_model_12/user_model_14/Add defined at (most recent call last):
  File "f:\P310\lib\runpy.py", line 196, in _run_module_as_main

  File "f:\P310\lib\runpy.py", line 86, in _run_code

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\traitlets\config\application.py", line 1046, in launch_instance

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelapp.py", line 736, in start

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\tornado\platform\asyncio.py", line 195, in start

  File "f:\P310\lib\asyncio\base_events.py", line 595, in run_forever

  File "f:\P310\lib\asyncio\base_events.py", line 1881, in _run_once

  File "f:\P310\lib\asyncio\events.py", line 80, in _run

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 505, in process_one

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 740, in execute_request

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel\ipkernel.py", line 422, in do_execute

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\ipykernel\zmqshell.py", line 546, in run_cell

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code

  File "C:\Users\Maincharter\AppData\Local\Temp\ipykernel_21732\4057685878.py", line 1, in <module>

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\training.py", line 2296, in evaluate

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\training.py", line 4108, in run_step

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\training.py", line 2066, in test_function

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\training.py", line 2049, in step_function

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\training.py", line 2037, in run_step

  File "f:\P310\lib\site-packages\tensorflow_recommenders\models\base.py", line 88, in test_step

  File "C:\Users\Maincharter\AppData\Local\Temp\ipykernel_21732\3027339719.py", line 75, in compute_loss

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\training.py", line 590, in __call__

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "C:\Users\Maincharter\AppData\Local\Temp\ipykernel_21732\3027339719.py", line 53, in call

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\training.py", line 590, in __call__

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "C:\Users\Maincharter\AppData\Roaming\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "C:\Users\Maincharter\AppData\Local\Temp\ipykernel_21732\1021769160.py", line 54, in call

OOM when allocating tensor with shape[4096,4096,128] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node recipe_and_user_model_12/user_model_14/Add}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_test_function_715140]

### Making predictions

In [48]:
unique_tags[:20]

array(['', '1-day-or-more', '15-minutes-or-less', '3-steps-or-less',
       '30-minutes-or-less', '4-hours-or-less', '5-ingredients-or-less',
       '60-minutes-or-less',
       'Throw the ultimate fiesta with this sopaipillas recipe from Food.com.',
       'a1-sauce', 'african', 'american', 'amish-mennonite', 'angolan',
       'appetizers', 'apples', 'april-fools-day', 'argentine',
       'artichoke', 'asian'], dtype='<U69')

In [88]:
likes = np.full(15, "asian")
likes_2 = np.full(15, "african")
likes = np.append(likes,likes_2)
likes

array(['asian', 'asian', 'asian', 'asian', 'asian', 'asian', 'asian',
       'asian', 'asian', 'asian', 'asian', 'asian', 'asian', 'asian',
       'asian', 'african', 'african', 'african', 'african', 'african',
       'african', 'african', 'african', 'african', 'african', 'african',
       'african', 'african', 'african', 'african'], dtype='<U7')

In [89]:
dislikes = np.full(10, '30-minutes-or-less')
dislikes_2 = np.full(10, '60-minutes-or-less')
dislikes_3 = np.full(10, 'empty')
dislikes = np.append(dislikes, [dislikes_2, dislikes_3])
dislikes

array(['30-minutes-or-less', '30-minutes-or-less', '30-minutes-or-less',
       '30-minutes-or-less', '30-minutes-or-less', '30-minutes-or-less',
       '30-minutes-or-less', '30-minutes-or-less', '30-minutes-or-less',
       '30-minutes-or-less', '60-minutes-or-less', '60-minutes-or-less',
       '60-minutes-or-less', '60-minutes-or-less', '60-minutes-or-less',
       '60-minutes-or-less', '60-minutes-or-less', '60-minutes-or-less',
       '60-minutes-or-less', '60-minutes-or-less', 'empty', 'empty',
       'empty', 'empty', 'empty', 'empty', 'empty', 'empty', 'empty',
       'empty'], dtype='<U18')

In [51]:
test_user['liked_tags']

array([b'weeknight', b'time-to-make', b'course', b'main-ingredient',
       b'preparation', b'occasion', b'soups-stews', b'beans',
       b'vegetables', b'easy', b'crock-pot-slow-cooker', b'dietary',
       b'equipment', b'60-minutes-or-less', b'main-dish', b'fruit',
       b'vegan', b'vegetarian', b'stove-top', b'black-beans',
       b'30-minutes-or-less', b'low-protein', b'salads', b'dinner-party',
       b'low-cholesterol', b'low-carb', b'healthy-2', b'low-in-something',
       b'for-1-or-2', b'side-dishes'], dtype=object)

In [90]:
# Извлечение данных пользователя
test_user = {}
for data in test.take(1).as_numpy_iterator():
    test_user = data

# Создание набора данных для тестового пользователя
user_id_to_test = "42"  # Замените на нужный ID пользователя
test_user_dataset = {
    "user_id": np.array(test_user["user_id"]),
    "liked_tags": np.array(likes),
    "unliked_tags": np.array(dislikes)
}

brute_force = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
def _reduce_mean_if_needed(embedding):
    """Сокращает размерность эмбеддинга до среднего, если он многомерный."""
    if len(embedding.shape) >= 3:
        return tf.reduce_mean(embedding, axis=1)
    return embedding
dense = tf.keras.layers.Dense(96)

def _get_candidate_embeddings(recipe_name):
    # Получаем эмбеддинги для имен и тегов
    recipe_embedding =  model.recipe_model(recipe_name['recipe_name'])
    print(recipe_name['tags'])
    tags_embedding = model.tags_model(recipe_name['tags'])  # Здесь предполагается, что recipe_name может содержать теги
    recipe_embedding = _reduce_mean_if_needed(recipe_embedding)
    tags_embedding = _reduce_mean_if_needed(tags_embedding)
    # Объединяем эмбеддинги имен и тегов
    combined_embedding = tf.concat([recipe_embedding, tags_embedding], axis=1)
    return combined_embedding
    
# Создание набора данных для рецептов и тегов
def get_recipe_and_tag_embeddings(x):
    recipe_embedding = model.recipe_model(x['recipe_name'])  # (None, 32)
    tag_embedding = model.tags_model(x['tags'])  # (None, 32)
    recipe_embedding =_reduce_mean_if_needed(recipe_embedding)
    tag_embedding = _reduce_mean_if_needed(tag_embedding)
    
    combined_embeddings = tf.concat([_reduce_mean_if_needed(recipe_embedding),  _reduce_mean_if_needed(tag_embedding)], axis=1)  # (None, 64)
    
    return dense(combined_embeddings)

# Используем map для обработки всех данных сразу
recipes_embeddings = recipes.batch(100).map(_get_candidate_embeddings)


# Создание набора имен рецептов
recipes_names = recipes.batch(100).map(lambda x: x["recipe_id"])  # Убедитесь, что это (None,)

# Объединяем эмбеддинги с именами
index = brute_force.index_from_dataset(
    tf.data.Dataset.zip((recipes_names, recipes_embeddings))
)

# Получаем рекомендации
_, titles = index(test_user_dataset, k=15)

# Вывод тегов для рекомендуемых рецептов
recommended_tags = raw_recipes[raw_recipes['id'].isin(titles.numpy().flatten().astype('str'))]


Tensor("args_2:0", shape=(None, None), dtype=string)


In [103]:
pd.set_option('display.max_colwidth', None)
raw_recipes[raw_recipes['id'].isin(titles.numpy()[0].flatten().astype('str'))][['tags_str','name']]

Unnamed: 0,tags_str,name
526,"30-minutes-or-less,time-to-make,main-ingredient,preparation,occasion,low-protein,healthy,very-low-carbs,canning,vegetables,easy,no-cook,holiday-event,low-fat,dietary,gifts,christmas,thanksgiving,low-cholesterol,low-saturated-fat,low-calorie,low-carb,inexpensive,low-in-something,technique,empty,empty,empty,empty,empty",so i cheated refrigerator dills
11574,"60-minutes-or-less,time-to-make,course,main-ingredient,cuisine,preparation,occasion,north-american,appetizers,main-dish,side-dishes,beef,vegetables,american,asian,chinese,finger-food,deep-fry,dietary,ground-beef,meat,presentation,technique,empty,empty,empty,empty,empty,empty,empty",awesome egg rolls
22566,"weeknight,time-to-make,course,preparation,occasion,healthy,pies-and-tarts,desserts,oven,heirloom-historical,holiday-event,pies,dietary,thanksgiving,equipment,number-of-servings,4-hours-or-less,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty",best ever pumpkin pie
43749,"60-minutes-or-less,time-to-make,course,main-ingredient,cuisine,preparation,occasion,north-american,main-dish,eggs-dairy,poultry,mexican,potluck,dinner-party,holiday-event,cheese,chicken,comfort-food,meat,chicken-breasts,taste-mood,to-go,empty,empty,empty,empty,empty,empty,empty,empty",chicken chile verde enchiladas
58634,"weeknight,time-to-make,main-ingredient,preparation,poultry,oven,dietary,meat,equipment,4-hours-or-less,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty",cornish game hens with orange stuffing
73987,"30-minutes-or-less,time-to-make,course,cuisine,preparation,occasion,north-american,healthy,5-ingredients-or-less,american,southern-united-states,easy,kid-friendly,dietary,high-calcium,comfort-food,high-in-something,taste-mood,number-of-servings,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty",dumplings
77750,"60-minutes-or-less,time-to-make,course,main-ingredient,preparation,occasion,for-large-groups,low-protein,healthy,drop-cookies,desserts,vegetables,oven,easy,beginner-cook,holiday-event,kid-friendly,cookies-and-brownies,dietary,gifts,low-cholesterol,inexpensive,healthy-2,low-in-something,taste-mood,sweet,to-go,equipment,number-of-servings,empty",easy pumpkin spice cookies cake mix
81699,"weeknight,time-to-make,course,main-ingredient,preparation,very-low-carbs,main-dish,poultry,chicken,crock-pot-slow-cooker,dietary,low-sodium,high-protein,low-carb,high-in-something,low-in-something,meat,equipment,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty,empty",fake rotisserie chicken
98319,"course,main-ingredient,preparation,occasion,main-dish,pork,barbecue,easy,beginner-cook,dinner-party,holiday-event,summer,dietary,seasonal,high-protein,low-carb,high-in-something,low-in-something,meat,taste-mood,savory,equipment,grilling,empty,empty,empty,empty,empty,empty,empty",grilled bourbon marinated pork chops or pork tenderloins
120540,"15-minutes-or-less,time-to-make,course,main-ingredient,preparation,low-protein,healthy,sauces,condiments-etc,pork,barbecue,easy,low-fat,stove-top,dietary,low-cholesterol,savory-sauces,low-in-something,meat,equipment,grilling,number-of-servings,3-steps-or-less,technique,empty,empty,empty,empty,empty,empty",lee lee s famous barbecue sauce for ribs w preserves


In [91]:
recommended_tags[['name', 'tags_str']]

Unnamed: 0,name,tags_str
526,so i cheated refrigerator dills,"30-minutes-or-less,time-to-make,main-ingredien..."
11574,awesome egg rolls,"60-minutes-or-less,time-to-make,course,main-in..."
22566,best ever pumpkin pie,"weeknight,time-to-make,course,preparation,occa..."
43749,chicken chile verde enchiladas,"60-minutes-or-less,time-to-make,course,main-in..."
58634,cornish game hens with orange stuffing,"weeknight,time-to-make,main-ingredient,prepara..."
73987,dumplings,"30-minutes-or-less,time-to-make,course,cuisine..."
77750,easy pumpkin spice cookies cake mix,"60-minutes-or-less,time-to-make,course,main-in..."
81699,fake rotisserie chicken,"weeknight,time-to-make,course,main-ingredient,..."
98319,grilled bourbon marinated pork chops or pork t...,"course,main-ingredient,preparation,occasion,ma..."
120540,lee lee s famous barbecue sauce for ribs w pr...,"15-minutes-or-less,time-to-make,course,main-in..."


In [54]:
recommended_tags[['name', 'tags_str']]

Unnamed: 0,name,tags_str
8801,artichoke chicken saute,"30-minutes-or-less,time-to-make,course,prepara..."
9384,asian plum sauce,"time-to-make,course,main-ingredient,cuisine,pr..."
10452,aunt nelda s sugar cookies,"30-minutes-or-less,time-to-make,course,prepara..."
12572,bacon maple ice cream,"course,desserts,frozen-desserts,empty,empty,em..."
24988,blackened shrimp,"30-minutes-or-less,time-to-make,course,main-in..."
26417,blueberry lemon tea bread,"time-to-make,course,preparation,breads,4-hours..."
48667,chinese white rice,"lactose,60-minutes-or-less,time-to-make,course..."
78435,easy tortellini pesto salad,"30-minutes-or-less,time-to-make,course,main-in..."
88080,frog in a hole with vegemite sausage bacon a...,"15-minutes-or-less,time-to-make,course,main-in..."
88302,frozen blueberry margaritas,"15-minutes-or-less,time-to-make,course,prepara..."
