In [1]:
# !pip install pandas
# !pip install --upgrade kagglehub
# !pip install -U LibRecommender
# !pip install keras==2.12.0 tensorflow==2.12.0
#
# !pip show LibRecommender

In [2]:
import json
import os
import pandas as pd
from libreco.data import random_split, DatasetPure
from libreco.algorithms import BPR
from libreco.evaluation import evaluate
import kagglehub
import tensorflow as tf

class RecipeRecommender:
    def __init__(self, data_path="shuyangli94/food-com-recipes-and-user-interactions"):
        self.data_path = data_path
        self.model = None
        self.data_info = None
        self.name_df = None
        self.data_filtered = None
        self.train_data = None
        self.eval_data = None
        self.test_data = None
        self.user_id_map = {}
        self._load_recipe_names()

        # Initialize recipe name mapping
        self._load_recipe_names()


    def load_and_preprocess(self, min_interactions):
        """Load and preprocess interaction data"""
        # Download and load dataset
        path = kagglehub.dataset_download(self.data_path)

        # Load and combine interaction data
        train = pd.read_csv(os.path.join(path, "interactions_train.csv"))
        eval = pd.read_csv(os.path.join(path, "interactions_validation.csv"))
        test = pd.read_csv(os.path.join(path, "interactions_test.csv"))

        combined = pd.concat([train, eval, test], ignore_index=True)
        combined = self._rename_and_filter_data(combined)

        # Filter items
        item_counts = combined["item"].value_counts()
        items_to_keep = item_counts[item_counts >= min_interactions].index
        filtered = combined[combined["item"].isin(items_to_keep)]

        # Filter users
        user_counts = filtered["user"].value_counts()
        users_to_keep = user_counts[user_counts >= min_interactions].index
        self.data_filtered = filtered[filtered["user"].isin(users_to_keep)]

    def train(self, embed_size=256, n_epochs=5, lr=5e-5):
        """Train the recommendation model"""
        # Split data
        self.train_data, self.eval_data, self.test_data = random_split(
            self.data_filtered,
            multi_ratios=[0.8, 0.1, 0.1]
        )

        # Build datasets
        self.train_data, self.data_info = DatasetPure.build_trainset(self.train_data)
        self.eval_data = DatasetPure.build_evalset(self.eval_data)
        self.test_data = DatasetPure.build_testset(self.test_data)

        # Initialize model
        tf.compat.v1.reset_default_graph()
        self.model = BPR(
            task="ranking",
            data_info=self.data_info,
            loss_type="bpr",
            embed_size=embed_size,
            n_epochs=n_epochs,
            lr=lr,
            batch_size=1024,
            num_neg=5,
            reg=5e-6,
            sampler="random"
        )

        # Train model
        self.model.fit(
            self.train_data,
            neg_sampling=True,
            shuffle=True,
            verbose=2,
            eval_data=self.eval_data,
            metrics=["loss", "roc_auc", "precision", "recall", "ndcg"]
        )
    def save_recommendations_as_csv(self,items_information,amount_of_recs, path):
      df = self.get_recommendations(items_information,amount_of_recs)
      df.to_csv(path, index=False)
      return df

    def get_recommendations(self, items_information, n_rec):
        """
        Holt Empfehlungen für alle User in user_id_map und speichert die Ergebnisse in einem DataFrame.
        """
        dfs = []
        for user_identifier in self.user_id_map:
            df = self.get_recommendation(user_identifier, n_rec, items_information)
            dfs.append(df)
        # Alle einzelnen DataFrames zusammenfügen
        final_df = pd.concat(dfs, ignore_index=True)
        return final_df

    def get_recommendation(self, user_identifier, n_rec, items_information):
      """Get recommendations for a user (UUID or numeric ID) und speichert alle Daten in einem DataFrame"""
      if not self.model:
          raise ValueError("Model not trained. Call train() first.")

      # UUID Lookup
      if isinstance(user_identifier, str):
          if user_identifier not in self.user_id_map:
              raise ValueError(f"User UUID '{user_identifier}' not found.")
          user_id = self.user_id_map[user_identifier]

     # Empfehlungen abrufen
      recommendations = self.model.recommend_user(
          user=user_id,
          n_rec=n_rec,
          filter_consumed=True
     )

      # Liste für die Daten vorbereiten
      records = []
      for recipe in recommendations[user_id]:
          # Item-Titel und Zutaten anhand der recipe_id abrufen
          item_title, item_ingredients = self.__find_item_by_id(recipe, items_information)
          # Datensatz zur Liste hinzufügen
          records.append({
              "uuid": user_identifier,
              "item_id": recipe,
             "item_title": item_title,
              "item_ingredients": item_ingredients
          })

      # DataFrame aus der Liste erstellen
      df = pd.DataFrame(records)
      return df

    def evaluate(self):
        """Evaluate model performance"""
        return evaluate(
            model=self.model,
            data=self.test_data,
            neg_sampling=True,
            metrics=["loss", "roc_auc", "precision", "recall", "ndcg"]
        )

    def info(self, UUID):
      """Gibt einen DataFrame mit allen Interaktionen des angegebenen Benutzers (UUID) zurück."""
      # Überprüfen, ob Daten geladen wurden
      if self.data_filtered is None or not isinstance(self.data_filtered, pd.DataFrame):
          return pd.DataFrame(columns=["user", "item", "label", "name"])

      # Prüfen, ob die UUID vorhanden ist
      if UUID not in self.user_id_map:
          return pd.DataFrame(columns=["user", "item", "label", "name"])

      # Numerische Benutzer-ID abrufen
      user_id = self.user_id_map[UUID]

      # Interaktionen filtern
      user_interactions = self.data_filtered[self.data_filtered['user'] == user_id].copy()

      if user_interactions.empty:
          return pd.DataFrame(columns=["user", "item", "label", "name"])

      # UUID statt numerischer ID setzen
      user_interactions['user'] = UUID

      # Rezeptnamen hinzufügen
      merged = user_interactions.merge(self.name_df, left_on='item', right_on='id', how='left')
      merged['name'] = merged['name'].fillna('Unknown Recipe')

      # Ergebnis formatieren
      result = merged[['user', 'item', 'label', 'name']]

      return result

    def save(self, storagepath):
      """Speichert Modell und Zustand"""
      if not self.model:
          raise ValueError("Modell nicht trainiert")

      os.makedirs(storagepath, exist_ok=True)

      # 1. Modell mit LibreCos eigener Methode speichern
      self.model.save(storagepath, model_name="BPR_model")

      # 2. User-Mapping als JSON
      with open(os.path.join(storagepath, "user_mapping.json"), "w") as f:
          json.dump(self.user_id_map, f)

      # 3. Rezeptnamen-Daten
      self.name_df.to_json(
          os.path.join(storagepath, "recipe_names.json"),
          orient="records"
      )

      # 4. Gefilterte Daten
      if self.data_filtered is not None:
          self.data_filtered.to_parquet(
             os.path.join(storagepath, "filtered_data.parquet")
          )

    @classmethod
    def get(cls, storagepath):
        """Lädt gespeicherte Instanz"""
        instance = cls.__new__(cls)
        instance.data_path = None  # Nicht mehr relevant

        # 1. Modell laden
        instance.model = BPR.load(
            path=storagepath,
            model_name="BPR_model",
            data_info=None  # Wird automatisch geladen
        )

        # 2. DataInfo aus dem Modell holen
        instance.data_info = instance.model.data_info

        # 3. User-Mapping laden
        with open(os.path.join(storagepath, "user_mapping.json"), "r") as f:
            instance.user_id_map = json.load(f)

        # 4. Rezeptnamen
        instance.name_df = pd.read_json(
            os.path.join(storagepath, "recipe_names.json"),
            orient="records"
        )

        # 5. Gefilterte Daten
        instance.data_filtered = pd.read_parquet(
            os.path.join(storagepath, "filtered_data.parquet")
        )

        return instance

#----------------------------------------------------------------------------


    def _load_recipe_names(self):
        """Load recipe ID to name mapping"""
        path = kagglehub.dataset_download(self.data_path)
        raw_recipes_path = os.path.join(path, "RAW_recipes.csv")
        self.name_df = pd.read_csv(raw_recipes_path)[["name", "id"]]

    def _rename_and_filter_data(self, interactions_data):
      # Erzeuge explizite Kopie des DataFrames
      df = interactions_data.copy()

      # Spalten umbenennen (ohne inplace)
      df = df.rename(columns={
          "user_id": "user",
          "recipe_id": "item",
          "rating": "label"
      })

      # Spalten filtern
      keep_cols = ["user", "item", "label"]
      df = df[keep_cols]

      # Typkonvertierung mit .loc
      df.loc[:, "label"] = df["label"].astype(int)
      return df

    def _get_recipe_name(self, recipe_id):
        """Helper to get recipe name from ID"""
        name = self.name_df.loc[self.name_df['id'] == recipe_id, 'name']
        return name.values[0] if not name.empty else "Unknown Recipe"

    def import_ratings_csv(self, file_path):
      """Import ratings from CSV and map UUIDs to numeric IDs"""
      try:
          # Load CSV
          df = pd.read_csv(file_path)
          print("CSV erfolgreich geladen:")
          print(df.head())

          # Check required columns
          required = {"uuid", "item_id", "rating"}
          if not required.issubset(df.columns):
              missing = required - set(df.columns)
              raise ValueError(f"Fehlende Spalten: {missing}")

          # Process and map UUIDs
          processed_df = self.__process_ratings(df)

          # Add to data
          self.data_filtered = pd.concat(
              [self.data_filtered, processed_df],
              ignore_index=True
         )
          print(f"{len(processed_df)} neue Bewertungen hinzugefügt.")

      except FileNotFoundError:
          print(f"Datei {file_path} nicht gefunden.")
      except Exception as e:
          print(f"Fehler: {str(e)}")

    def __process_ratings(self, df):
      """Map UUIDs to numeric IDs"""
      # Rename columns
      df = df.rename(columns={
          "uuid": "user",
          "item_id": "item",
          "rating": "label"
      })

      # convert rating value range from csv (-2,2) to (1,5)
      df["label"] = df["label"].apply(lambda x: x+3)

      # Determine current max ID from user_id_map
      current_max = max(self.user_id_map.values()) if self.user_id_map else 0

      # Generate new IDs for unknown UUIDs
      new_users = [uuid for uuid in df["user"].unique() if uuid not in self.user_id_map]
      num_new = len(new_users)

      if num_new > 0:
          new_ids = range(current_max + 1, current_max + num_new + 1)
          self.user_id_map.update(zip(new_users, new_ids))

      # Replace UUIDs with numeric IDs
      df["user"] = df["user"].map(self.user_id_map)
      return df


    def __get_score(self,userid,itemid):
     return self.model.predict(userid,itemid)

    def __find_item_by_id(self,recipe_id, items_information):
      df = items_information.loc[items_information["id"] == recipe_id]
      return df['name'].values[0], df['ingredients'].values[0]


2025-03-27 17:53:59.078960: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-27 17:53:59.080710: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-27 17:53:59.113755: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-27 17:53:59.114606: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Instructions for updating:
non-resource variables are not supported in the long term


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from config import RATINGS_FILE

# Beispielaufruf
recommender = RecipeRecommender()
recommender.load_and_preprocess(min_interactions=20)
# Neue Nutzer per CSV importieren
recommender.import_ratings_csv("../"+RATINGS_FILE)
recommender.train()
recommender.evaluate()

CSV erfolgreich geladen:
                                   uuid  item_id  rating  \
0  3593cd68-32a3-4575-a41e-03d94d968649    90323       1   
1  3593cd68-32a3-4575-a41e-03d94d968649    43806       2   
2  3593cd68-32a3-4575-a41e-03d94d968649    41168      -1   
3  3593cd68-32a3-4575-a41e-03d94d968649    90229       1   
4  3593cd68-32a3-4575-a41e-03d94d968649      519       1   

                                          item_title  \
0                                        garlic loaf   
1  chicken coconut curry soup   a k a  easy mulli...   
2                       cherry almond butter cookies   
3                                garlic fries  light   
4              second only to my meatloaf  meatballs   

                                    item_ingredients  
0  ['butter', 'parsley', 'garlic powder', 'garlic...  
1  ['whole boneless skinless chicken breast', 'po...  
2  ['butter', 'sugar', 'egg', 'vanilla', 'flour',...  
3  ['canola oil', 'salt', 'baking potatoes', 'coo...  
4 

2025-03-27 17:54:09.804373: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:353] MLIR V1 optimization pass is not enabled
train: 100%|██████████| 518/518 [00:03<00:00, 170.95it/s]


Epoch 1 elapsed: 3.032s
	 [32mtrain_loss: 0.6923[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 209.47it/s]
eval_listwise: 100%|██████████| 2426/2426 [00:00<00:00, 3081.42it/s]


	 eval log_loss: 0.6926
	 eval roc_auc: 0.5482
	 eval precision@10: 0.0045
	 eval recall@10: 0.0088
	 eval ndcg@10: 0.0220


train: 100%|██████████| 518/518 [00:02<00:00, 172.82it/s]


Epoch 2 elapsed: 3.004s
	 [32mtrain_loss: 0.6905[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 196.97it/s]
eval_listwise: 100%|██████████| 2426/2426 [00:00<00:00, 3259.74it/s]


	 eval log_loss: 0.6920
	 eval roc_auc: 0.5790
	 eval precision@10: 0.0089
	 eval recall@10: 0.0169
	 eval ndcg@10: 0.0421


train: 100%|██████████| 518/518 [00:02<00:00, 173.77it/s]


Epoch 3 elapsed: 2.988s
	 [32mtrain_loss: 0.6885[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 203.77it/s]
eval_listwise: 100%|██████████| 2426/2426 [00:00<00:00, 3173.24it/s]


	 eval log_loss: 0.6915
	 eval roc_auc: 0.6021
	 eval precision@10: 0.0113
	 eval recall@10: 0.0219
	 eval ndcg@10: 0.0527


train: 100%|██████████| 518/518 [00:03<00:00, 166.81it/s]


Epoch 4 elapsed: 3.109s
	 [32mtrain_loss: 0.6866[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 193.07it/s]
eval_listwise: 100%|██████████| 2426/2426 [00:00<00:00, 3613.79it/s]


	 eval log_loss: 0.6909
	 eval roc_auc: 0.6185
	 eval precision@10: 0.0132
	 eval recall@10: 0.0262
	 eval ndcg@10: 0.0604


train: 100%|██████████| 518/518 [00:03<00:00, 164.75it/s]


Epoch 5 elapsed: 3.149s
	 [32mtrain_loss: 0.6847[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 184.20it/s]
eval_listwise: 100%|██████████| 2426/2426 [00:01<00:00, 1636.44it/s]


	 eval log_loss: 0.6904
	 eval roc_auc: 0.6301
	 eval precision@10: 0.0141
	 eval recall@10: 0.0283
	 eval ndcg@10: 0.0656


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 177.88it/s]
eval_listwise: 100%|██████████| 2401/2401 [00:01<00:00, 1895.46it/s]


{'loss': 0.6903925517189129,
 'roc_auc': 0.6317725844049902,
 'precision': 0.015243648479800084,
 'recall': 0.03151605799500543,
 'ndcg': 0.06885874571273692}

In [4]:
recommender.info("df903ba4-a8f3-406f-814d-4420b00ab611")

Unnamed: 0,user,item,label,name


In [5]:
def load_items_information():
    path = kagglehub.dataset_download("shuyangli94/food-com-recipes-and-user-interactions")

    recipes_path = os.path.join(path, "RAW_recipes.csv")
    recipes = pd.read_csv(recipes_path)

    return recipes

items_information = load_items_information()

In [6]:
from config import RECOMMENDATIONS_FILE

# Empfehlungen für importierten Nutzer (UUID)
recommendations = recommender.save_recommendations_as_csv(items_information,20, "../"+RECOMMENDATIONS_FILE+"bpr.csv")

In [7]:
recommendations

Unnamed: 0,uuid,item_id,item_title,item_ingredients
0,3593cd68-32a3-4575-a41e-03d94d968649,89204,crock pot chicken with black beans cream cheese,"['boneless chicken breasts', 'black beans', 'c..."
1,3593cd68-32a3-4575-a41e-03d94d968649,68955,japanese mum s chicken,"['chicken drumsticks', 'water', 'balsamic vine..."
2,3593cd68-32a3-4575-a41e-03d94d968649,28148,oven fried chicken chimichangas,"['picante sauce', 'ground cumin', 'dried orega..."
3,3593cd68-32a3-4575-a41e-03d94d968649,39087,creamy cajun chicken pasta,"['boneless skinless chicken breast halves', 'l..."
4,3593cd68-32a3-4575-a41e-03d94d968649,27208,to die for crock pot roast,"['beef roast', 'brown gravy mix', 'dried itali..."
...,...,...,...,...
495,0feaada3-c3e5-4b9f-a8d9-6fd5930985da,43023,creamy garlic penne pasta,"['penne', 'butter', 'garlic cloves', 'flour', ..."
496,0feaada3-c3e5-4b9f-a8d9-6fd5930985da,28148,oven fried chicken chimichangas,"['picante sauce', 'ground cumin', 'dried orega..."
497,0feaada3-c3e5-4b9f-a8d9-6fd5930985da,69173,kittencal s italian melt in your mouth meatballs,"['ground beef', 'egg', 'parmesan cheese', 'bre..."
498,0feaada3-c3e5-4b9f-a8d9-6fd5930985da,34382,mashed red potatoes with garlic and parmesan,"['red potatoes', 'garlic cloves', 'butter', 'm..."
