In [1]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
import tensorflow_recommenders as tfrs




New vegan user - Vladick

In [2]:
users_recipes_ds = pd.read_csv("./simple_data/RAW_interactions.csv")
recipes_ds = pd.read_csv("./simple_data/RAW_recipes.csv")

In [3]:
vladick_id = np.max(users_recipes_ds["user_id"]) + 1

In [4]:
not_vegan_ids = recipes_ds[recipes_ds["tags"].str.contains('beef|chicken|pork|seafood')][['name', 'id']]
vegan_ids = recipes_ds[~recipes_ds["tags"].str.contains('beef|chicken|pork|seafood')][['name', 'id']]

In [5]:
users_recipes_ds = users_recipes_ds.merge(recipes_ds, left_on='recipe_id', right_on='id')
users_recipes_ds = users_recipes_ds[['user_id', 'recipe_id', 'rating', 'name']]
users_recipes_ds.head()

Unnamed: 0,user_id,recipe_id,rating,name
0,38094,40893,4,white bean green chile pepper soup
1,1293707,40893,5,white bean green chile pepper soup
2,8937,44394,4,devilicious cookie cake delights
3,126440,85009,5,baked potato toppings
4,57222,85009,5,baked potato toppings


In [6]:
def set_vegan_or_not(row):
    if  row['recipe_id'] in vegan_ids['id']:
        return 1
    else:
        return 0

In [7]:
users_recipes_ds['vegan'] =  users_recipes_ds.apply(set_vegan_or_not, axis=1)

In [8]:
new_rows = []
def showInfo(a):
    new_rows.append({'user_id': vladick_id, 'recipe_id': a['id'], 'rating': 5.0, 'name': a['name'], 'vegan': 1.0})

vegan_ids[:5000].apply(showInfo, axis=1)

df_extended = pd.DataFrame(new_rows)
users_recipes_ds = pd.concat([users_recipes_ds, df_extended])

In [9]:
new_rows = []
def showInfo(a):
    new_rows.append({'user_id': vladick_id, 'recipe_id': a['id'], 'rating': 0.0, 'name': a['name'], 'vegan': 0})

not_vegan_ids[:int(len(not_vegan_ids)/2)].apply(showInfo, axis=1)

df_extended = pd.DataFrame(new_rows)
users_recipes_ds = pd.concat([users_recipes_ds, df_extended])

In [10]:
users_recipes_ds

Unnamed: 0,user_id,recipe_id,rating,name,vegan
0,38094,40893,4.0,white bean green chile pepper soup,1.0
1,1293707,40893,5.0,white bean green chile pepper soup,1.0
2,8937,44394,4.0,devilicious cookie cake delights,0.0
3,126440,85009,5.0,baked potato toppings,1.0
4,57222,85009,5.0,baked potato toppings,1.0
...,...,...,...,...,...
31362,2002372707,309089,0.0,italian sausage and or meatball subs,0.0
31363,2002372707,189299,0.0,italian sausage bake,0.0
31364,2002372707,95694,0.0,italian sausage bread,0.0
31365,2002372707,222133,0.0,italian sausage broils,0.0


In [11]:
users_recipes_ds['user_id'] = users_recipes_ds.user_id.astype("str")
users_recipes_ds['name'] = users_recipes_ds.name.astype("str")
users_recipes_ds['rating'] = users_recipes_ds.rating.astype(np.float32)
users_recipes_ds['vegan'] = users_recipes_ds.vegan.astype(np.float32)
users_recipes_ds['recipe_id'] = users_recipes_ds.recipe_id.astype("str")

In [12]:
ratings = tf.data.Dataset.from_tensor_slices((tf.cast(users_recipes_ds['user_id'].values.reshape(-1,1), tf.string),    
                                              tf.cast(users_recipes_ds['name'].values.reshape(-1,1), tf.string),
                                              tf.cast(users_recipes_ds['rating'].values.reshape(-1,1),tf.float32),
                                              tf.cast(users_recipes_ds['vegan'].values.reshape(-1,1),tf.float32)))

ratings = ratings.map(lambda x0,x1,x2,x3: {
    "user_id": x0,
    "name": x1,
    "rating": x2,
    "vegan": x3
})

In [13]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)

train = shuffled.take(int(len(ratings) * 0.8))
test = shuffled.skip(int(len(ratings) * 0.8)).take(int(len(ratings) * 0.2))

In [14]:
recipe_names = ratings.batch(len(ratings)).map(lambda x: x["name"])
user_ids = ratings.batch(len(ratings)).map(lambda x: x["user_id"])

unique_recipe_titles = np.unique(np.concatenate(list(recipe_names)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [15]:
class RankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Compute embeddings for movies.
    self.recipe_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_recipe_titles, mask_token=None),
      tf.keras.layers.Embedding(len(unique_recipe_titles) + 1, embedding_dimension)
    ])

           # Добавляем слой для обработки признака веганства
    self.vegan_dense = tf.keras.Sequential([
        tf.keras.layers.Dense(embedding_dimension, activation="relu")
    ])

        # Compute embeddings for recipe_ids.

    # Compute predictions.
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
  ])
    
  def call(self, inputs):

    user_id, name, vegan = inputs

    # Извлекаем эмбеддинги для пользователя и рецепта
    user_embedding = self.user_embeddings(user_id)
    recipe_embedding = self.recipe_embeddings(name)

    # Обрабатываем признак веганства через dense-слой
    vegan_embedding = self.vegan_dense(tf.expand_dims(vegan, -1))  # Добавляем ось, если необходимо

    # Объединяем все эмбеддинги
    combined_embeddings = tf.concat([user_embedding, recipe_embedding, vegan_embedding], axis=1)

    return self.ratings(combined_embeddings)

In [16]:
RankingModel()((["2002372707"], ["jiffy corn muffins mix clone"], [1.0]))





<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.03909951]], dtype=float32)>

In [17]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [18]:
class RecipelensModel(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["user_id"], features["name"], features["vegan"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("rating")
    
    rating_predictions = self(features)

    # The task computes the loss and the metrics.
    return self.task(labels=labels, predictions=rating_predictions)

In [19]:
model = RecipelensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [20]:
cached_train = train.shuffle(800_000).batch(200_000).cache()
cached_test = test.batch(70_000).cache()

In [21]:
model.fit(cached_train, epochs=14)

Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


<keras.src.callbacks.History at 0x153fec6c370>

In [22]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.386635422706604,
 'loss': 1.9082567691802979,
 'regularization_loss': 0,
 'total_loss': 1.9082567691802979}

In [23]:
vegan_ids[int(len(vegan_ids)/2):].head()

Unnamed: 0,name,id
116857,kelleigh s warm shiitake mushroom salad,375851
116858,kellerkuchen cellar cake,129988
116859,kelley s baked apple walnut chicken with cornb...,106363
116860,kelley s cranberry salad,17676
116861,kelley s spicy hot spinach dip,207939


In [24]:
not_vegan_ids[int(len(not_vegan_ids)/2):].head(30)

Unnamed: 0,name,id
113038,italian sausage chicken breasts,287833
113039,italian sausage chicken chili,53587
113040,italian sausage chowder,103167
113041,italian sausage crockpot soup,415707
113042,italian sausage crustless quiche,336832
113043,italian sausage dinner bake,44068
113044,italian sausage etouffee,258637
113045,italian sausage florentine for the crock pot,240543
113046,italian sausage for the crock pot,53919
113049,italian sausage hawaiian,76804


In [25]:
users_recipes_ds[users_recipes_ds["user_id"] == "2002372707"]

Unnamed: 0,user_id,recipe_id,rating,name,vegan
0,2002372707,137739,5.0,arriba baked winter squash mexican style,1.0
1,2002372707,112140,5.0,all in the kitchen chili,1.0
2,2002372707,59389,5.0,alouette potatoes,1.0
3,2002372707,44061,5.0,amish tomato ketchup for canning,1.0
4,2002372707,5289,5.0,apple a day milk shake,1.0
...,...,...,...,...,...
31362,2002372707,309089,0.0,italian sausage and or meatball subs,0.0
31363,2002372707,189299,0.0,italian sausage bake,0.0
31364,2002372707,95694,0.0,italian sausage bread,0.0
31365,2002372707,222133,0.0,italian sausage broils,0.0


In [27]:
test_ratings = {}
test_samples = 5
recipes = users_recipes_ds[users_recipes_ds["vegan"] == 1.0][:10]['name'].to_list()
recipes = recipes + (users_recipes_ds[users_recipes_ds["vegan"] == 0][:10]['name'].to_list())

for id in range(test_samples*2):
  test_ratings[str(id)+"_"+f"({0 if id > test_samples else 1.0})"+"_" + "_" + recipes[id]] = model({
      "user_id": np.array(["2002372707"]),
      "name": np.array([recipes[id]]),
      "vegan": np.array([0 if id > test_samples else 1.0]),
  })

print("Ratings:")
for title, score in sorted(test_ratings.items(), key=lambda x: x[1][0][1], reverse=True):
  print(f"{title}: {score}")

Ratings:
4_(1.0)__mexican hots: [[[1.2990699]
  [4.49988  ]
  [4.406164 ]]]
0_(1.0)__white bean   green chile pepper soup: [[[1.2990699]
  [4.457424 ]
  [4.406164 ]]]
1_(1.0)__white bean   green chile pepper soup: [[[1.2990699]
  [4.457424 ]
  [4.406164 ]]]
9_(0)__chicken tamale pie for 2  ww core: [[[1.2990699]
  [4.401784 ]
  [4.288332 ]]]
2_(1.0)__baked potato toppings: [[[1.2990699]
  [4.212338 ]
  [4.406164 ]]]
3_(1.0)__baked potato toppings: [[[1.2990699]
  [4.212338 ]
  [4.406164 ]]]
5_(1.0)__lamb stew with tomatoes  chickpeas and spices: [[[1.2990699]
  [4.1546164]
  [4.406164 ]]]
6_(0)__lamb stew with tomatoes  chickpeas and spices: [[[1.2990699]
  [4.1546164]
  [4.288332 ]]]
7_(0)__lamb stew with tomatoes  chickpeas and spices: [[[1.2990699]
  [4.1546164]
  [4.288332 ]]]
8_(0)__lamb stew with tomatoes  chickpeas and spices: [[[1.2990699]
  [4.1546164]
  [4.288332 ]]]
