In [1]:
# !pip install pandas
# !pip install --upgrade kagglehub
# !pip install -U LibRecommender
# !pip install keras==2.12.0 tensorflow==2.12.0
#
# !pip show LibRecommender

In [8]:
import json
import os
import pandas as pd
from libreco.data import random_split, DatasetPure
from libreco.algorithms import BPR, UserCF, ItemCF, SVD, SVDpp, ALS
from libreco.evaluation import evaluate
import kagglehub
import tensorflow as tf

class RecipeRecommender:
    def __init__(self, data_path="shuyangli94/food-com-recipes-and-user-interactions"):
        # Hyperparameter mit BPR als Referenz
        self.embed_size = 64     # Für Embedding-basierte Modelle
        self.n_epochs = 8         # Für trainierbare Modelle
        self.lr = 5e-5            # Lernrate
        self.reg = 5e-6           # Regularisierung
        self.batch_size = 1024    # Batch-Größe
        self.num_neg = 10          # Negative Samples
        self.sampler = "random"   # Sampling-Methode
        self.k_sim = 50           # Für CF-Modelle
        self.sim_type = "cosine"  # Ähnlichkeitsmaß

        # Gemeinsame Objekte
        self.model = None
        self.data_info = None
        self.name_df = None
        self.data_filtered = None
        self.train_data = None
        self.eval_data = None
        self.test_data = None
        self.user_id_map = {}

        self.data_path = data_path
        self._load_recipe_names()


    def set_model(self, model_type):
        """Zentrale Methode zur Modellauswahl mit einheitlichen Parametern"""
        tf.compat.v1.reset_default_graph()
        self.__prepare_data(model_type)  # Daten für alle Modelle vorbereiten

        common_params = {
            "task": "ranking",
            "data_info": self.data_info
        }

        model_config = {
           "BPR": {
            "class": BPR,
            "params": {
                "loss_type": "bpr",
                "embed_size": self.embed_size,
                "n_epochs": self.n_epochs,
                "lr": self.lr,
                "batch_size": self.batch_size,
                "num_neg": self.num_neg,
                "reg": self.reg,
                "sampler": self.sampler
            }
        },
        "UserCF": {
            "class": UserCF,
            "params": {
                "k_sim": self.k_sim,
                "sim_type": self.sim_type
            }
        },
        "ItemCF": {
            "class": ItemCF,
            "params": {
                "k_sim": self.k_sim,
                "sim_type": self.sim_type
            }
        },
        "SVD": {
            "class": SVD,
            "params": {
                "embed_size": self.embed_size,
                "n_epochs": self.n_epochs,
                "lr": self.lr,
                "reg": self.reg
            }
        },
        "SVDpp": {
            "class": SVDpp,
            "params": {
                "embed_size": self.embed_size,
                "n_epochs": self.n_epochs,
                "lr": self.lr,
                "reg": self.reg,
            }
        },
        "ALS": {
            "class": ALS,
            "params": {
                "embed_size": self.embed_size,
                "n_epochs": self.n_epochs,
                "reg": self.reg,
                "alpha": 10,
                "use_cg": True,
                "n_threads": 1
            }
        }
    }
        config = model_config.get(model_type)
        if not config:
            raise ValueError(f"Unbekanntes Modell: {model_type}")

        self.model = config["class"](**common_params, **config["params"])


    def train(self):
      if not self.model:
          raise ValueError("Model not trained. Call set_model() first.")

     # Gemeinsame Parameter
      common_params = {
          "verbose": 2,
          "eval_data": self.eval_data,
          "metrics": ["loss", "roc_auc", "precision", "recall", "ndcg"]
     }

      # Modellspezifische Parameter
      if isinstance(self.model, (UserCF, ItemCF)):
          # Für Collaborative Filtering
          fit_params = {
              "neg_sampling": True,
              "verbose": 1
         }
      else:
          # Für Embedding-basierte Modelle: batch_size entfernen
          fit_params = {
              "neg_sampling": True,
              "shuffle": True,
             **common_params
         }

     # Training durchführen
      self.model.fit(
          self.train_data,
          **fit_params
      )

    def load_and_preprocess(self, min_interactions):
        """Load and preprocess interaction data"""
        # Download and load dataset
        path = kagglehub.dataset_download(self.data_path)

        # Load and combine interaction data
        train = pd.read_csv(os.path.join(path, "interactions_train.csv"))
        eval = pd.read_csv(os.path.join(path, "interactions_validation.csv"))
        test = pd.read_csv(os.path.join(path, "interactions_test.csv"))

        combined = pd.concat([train, eval, test], ignore_index=True)
        combined = self._rename_and_filter_data(combined)

        # Filter items
        item_counts = combined["item"].value_counts()
        items_to_keep = item_counts[item_counts >= min_interactions].index
        filtered = combined[combined["item"].isin(items_to_keep)]

        # Filter users
        user_counts = filtered["user"].value_counts()
        users_to_keep = user_counts[user_counts >= min_interactions].index
        self.data_filtered = filtered[filtered["user"].isin(users_to_keep)]

    def __prepare_data(self,model_type):
      # Convert ratings to 0/1 for UserCF and ItemCF
        if model_type in ["UserCF", "ItemCF"]:
        # Binarize ratings: 0-2 → 0, 3-5 → 1
          self.data_filtered['label'] = self.data_filtered['label'].apply(
              lambda x: 0 if x <= 2 else 1
          )
      # Split data
        self.train_data, self.eval_data, self.test_data = random_split(
            self.data_filtered,
            multi_ratios=[0.8, 0.1, 0.1]
        )

        # Build datasets
        self.train_data, self.data_info = DatasetPure.build_trainset(self.train_data)
        self.eval_data = DatasetPure.build_evalset(self.eval_data)
        self.test_data = DatasetPure.build_testset(self.test_data)


    def save_recommendations_as_csv(self,items_information,amount_of_recs, path):
      df = self.get_recommendations(items_information,amount_of_recs)
      df.to_csv(path, index=False)
      return df

    def get_recommendations(self, items_information, n_rec):
        """
        Holt Empfehlungen für alle User in user_id_map und speichert die Ergebnisse in einem DataFrame.
        """
        dfs = []
        for user_identifier in self.user_id_map:
            df = self.get_recommendation(user_identifier, n_rec, items_information)
            dfs.append(df)
        # Alle einzelnen DataFrames zusammenfügen
        final_df = pd.concat(dfs, ignore_index=True)
        return final_df

    def get_recommendation(self, user_identifier, n_rec, items_information):
      """Get recommendations for a user (UUID or numeric ID) und speichert alle Daten in einem DataFrame"""
      if not self.model:
          raise ValueError("Model not trained. Call train() first.")

      # UUID Lookup
      if isinstance(user_identifier, str):
          if user_identifier not in self.user_id_map:
              raise ValueError(f"User UUID '{user_identifier}' not found.")
          user_id = self.user_id_map[user_identifier]

     # Empfehlungen abrufen
      recommendations = self.model.recommend_user(
          user=user_id,
          n_rec=n_rec,
          filter_consumed=True
     )

      # Liste für die Daten vorbereiten
      records = []
      for recipe in recommendations[user_id]:
          # Item-Titel und Zutaten anhand der recipe_id abrufen
          item_title, item_ingredients = self.__find_item_by_id(recipe, items_information)
          # Datensatz zur Liste hinzufügen
          records.append({
              "uuid": user_identifier,
              "item_id": recipe,
             "item_title": item_title,
              "item_ingredients": item_ingredients
          })

      # DataFrame aus der Liste erstellen
      df = pd.DataFrame(records)
      return df

    def evaluate(self):
        """Evaluate model performance"""
        return evaluate(
            model=self.model,
            data=self.test_data,
            neg_sampling=True,
            metrics=["loss", "roc_auc", "precision", "recall", "ndcg"]
        )

    def _load_recipe_names(self):
        """Load recipe ID to name mapping"""
        path = kagglehub.dataset_download(self.data_path)
        raw_recipes_path = os.path.join(path, "RAW_recipes.csv")
        self.name_df = pd.read_csv(raw_recipes_path)[["name", "id"]]

    def _rename_and_filter_data(self, interactions_data):
      # Erzeuge explizite Kopie des DataFrames
      df = interactions_data.copy()

      # Spalten umbenennen (ohne inplace)
      df = df.rename(columns={
          "user_id": "user",
          "recipe_id": "item",
          "rating": "label"
      })

      # Spalten filtern
      keep_cols = ["user", "item", "label"]
      df = df[keep_cols]

      # Typkonvertierung mit .loc
      df.loc[:, "label"] = df["label"].astype(int)
      return df

    def _get_recipe_name(self, recipe_id):
        """Helper to get recipe name from ID"""
        name = self.name_df.loc[self.name_df['id'] == recipe_id, 'name']
        return name.values[0] if not name.empty else "Unknown Recipe"

    def import_ratings_csv(self, file_path):
        """Import ratings from CSV and map recipe names to correct IDs"""
        try:
            # Load CSV
            df = pd.read_csv(file_path)
            print("CSV erfolgreich geladen:")
            print(df.head())

            # Check if recipe_name column exists
            if "item_title" not in df.columns:
                raise ValueError("Fehlende Spalte: item_title")

            # Map recipe names to correct IDs
            df["item_id"] = df["item_title"].apply(lambda name: self.__find_item_id_by_name(name))

            # Check required columns after mapping
            required = {"uuid", "item_id", "rating"}
            if not required.issubset(df.columns):
                missing = required - set(df.columns)
                raise ValueError(f"Fehlende Spalten: {missing}")

            # Process and map UUIDs
            processed_df = self.__process_ratings(df)

            # Add to data
            self.data_filtered = pd.concat(
                [self.data_filtered, processed_df],
                ignore_index=True
            )
            print(f"{len(processed_df)} neue Bewertungen hinzugefügt.")

        except Exception as e:
            print(f"Fehler beim Importieren der Bewertungen: {e}")

    def __process_ratings(self, df):
      """Map UUIDs to numeric IDs"""
      # Rename columns
      df = df.rename(columns={
          "uuid": "user",
          "item_id": "item",
          "rating": "label"
      })

      # Convert from range [-2,2] to [1,5]
      df["label"] = df["label"] + 3

      # Determine current max ID from user_id_map
      current_max = max(self.user_id_map.values()) if self.user_id_map else 0

      # Generate new IDs for unknown UUIDs
      new_users = [uuid for uuid in df["user"].unique() if uuid not in self.user_id_map]
      num_new = len(new_users)

      print("WTH", num_new)
      if num_new > 0:
          new_ids = range(current_max + 1, current_max + num_new + 1)
          self.user_id_map.update(zip(new_users, new_ids))

      # Replace UUIDs with numeric IDs
      df["user"] = df["user"].map(self.user_id_map)
      return df


    def __get_score(self,userid,itemid):
     return self.model.predict(userid,itemid)

    def __find_item_by_id(self,recipe_id, items_information):
      df = items_information.loc[items_information["id"] == recipe_id]
      return df['name'].values[0], df['ingredients'].values[0]

    def __find_item_id_by_name(self, item_name):
      df = self.name_df.loc[self.name_df["name"] == item_name]
      return df['id'].values[0]

In [3]:
def load_items_information():
    path = kagglehub.dataset_download("shuyangli94/food-com-recipes-and-user-interactions")

    recipes_path = os.path.join(path, "RAW_recipes.csv")
    recipes = pd.read_csv(recipes_path)

    return recipes

items_information = load_items_information()

In [9]:
import contextlib
from config import RATINGS_FILE
from config import RECOMMENDATIONS_FILE
import io

tf.get_logger().setLevel('ERROR')

RECOMMENDER_TYPES = ["SVD", "SVDpp", "ALS", "BPR", "UserCF", "ItemCF"]

for RECOMMENDER_TYPE in RECOMMENDER_TYPES:
    print(f"Training {RECOMMENDER_TYPE}...")

    with contextlib.redirect_stdout(io.StringIO()): # suppresses print statements
        recommender = RecipeRecommender()
        recommender.load_and_preprocess(min_interactions=20)
        # Neue Nutzer per CSV importieren
        recommender.import_ratings_csv("../"+RATINGS_FILE)
        recommender.set_model(RECOMMENDER_TYPE)
        recommender.train()
        eval = recommender.evaluate()

    print(eval)
    recommendations = recommender.save_recommendations_as_csv(
        items_information,
        20,
        "../"+RECOMMENDATIONS_FILE+RECOMMENDER_TYPE.lower()+".csv"
    )

    print(f"Recommendations for {RECOMMENDER_TYPE} saved to {RECOMMENDATIONS_FILE+RECOMMENDER_TYPE.lower()+'.csv'}")

Training SVD...


train: 100%|██████████| 826/826 [00:01<00:00, 591.04it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 834.56it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4423.23it/s]
train: 100%|██████████| 826/826 [00:01<00:00, 651.02it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 461.29it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3785.18it/s]
train: 100%|██████████| 826/826 [00:01<00:00, 625.56it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 695.20it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3943.88it/s]
train: 100%|██████████| 826/826 [00:01<00:00, 628.80it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 683.03it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4122.42it/s]
train: 100%|██████████| 826/826 [00:01<00:00, 635.97it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 590.31it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4316.91it/s]
train: 100%|██████████| 826/826 [00:01<00:00, 641.

{'loss': 0.6878232019150221, 'roc_auc': 0.668207986371163, 'precision': 0.017284464806330696, 'recall': 0.03666426184226875, 'ndcg': 0.07620526905902279}
Recommendations for SVD saved to data/recommendations_svd.csv
Training SVDpp...


train: 100%|██████████| 826/826 [00:06<00:00, 133.50it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 905.70it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4255.35it/s]
train: 100%|██████████| 826/826 [00:05<00:00, 140.94it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 711.26it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4361.41it/s]
train: 100%|██████████| 826/826 [00:05<00:00, 143.24it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 632.77it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4484.68it/s]
train: 100%|██████████| 826/826 [00:05<00:00, 138.54it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 753.08it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3649.20it/s]
train: 100%|██████████| 826/826 [00:06<00:00, 135.57it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 581.01it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 2573.49it/s]
train: 100%|██████████| 826/826 [00:05<00:00, 139.

{'loss': 0.6711130683806029, 'roc_auc': 0.6747311730511194, 'precision': 0.019241982507288632, 'recall': 0.04455498260414667, 'ndcg': 0.08357291029879352}
Recommendations for SVDpp saved to data/recommendations_svdpp.csv
Training ALS...


eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 419.58it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3567.48it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 768.61it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3871.51it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 769.56it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3309.77it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 561.71it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3427.79it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 781.83it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4442.23it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 809.55it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 2986.23it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 883.48it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3493.71it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00, 813.84it/s]
eval_listwise

{'loss': 0.6791907465111139, 'roc_auc': 0.614718052331181, 'precision': 0.012078300708038319, 'recall': 0.030527470585607924, 'ndcg': 0.0547997478245302}
Recommendations for ALS saved to data/recommendations_als.csv
Training BPR...


train: 100%|██████████| 1036/1036 [00:01<00:00, 588.59it/s]
eval_pointwise: 100%|██████████| 18/18 [00:00<00:00, 940.99it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4051.69it/s]
train: 100%|██████████| 1036/1036 [00:01<00:00, 596.11it/s]
eval_pointwise: 100%|██████████| 18/18 [00:00<00:00, 738.73it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4147.85it/s]
train: 100%|██████████| 1036/1036 [00:01<00:00, 637.88it/s]
eval_pointwise: 100%|██████████| 18/18 [00:00<00:00, 850.59it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3756.24it/s]
train: 100%|██████████| 1036/1036 [00:01<00:00, 643.74it/s]
eval_pointwise: 100%|██████████| 18/18 [00:00<00:00, 775.96it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 4042.85it/s]
train: 100%|██████████| 1036/1036 [00:01<00:00, 656.11it/s]
eval_pointwise: 100%|██████████| 18/18 [00:00<00:00, 780.09it/s]
eval_listwise: 100%|██████████| 2425/2425 [00:00<00:00, 3528.72it/s]
train: 100%|██████████| 1036/1

{'loss': 0.6862985082920336, 'roc_auc': 0.6609456010066809, 'precision': 0.01657642648896293, 'recall': 0.03274216763774256, 'ndcg': 0.07328320480665594}
Recommendations for BPR saved to data/recommendations_bpr.csv
Training UserCF...


top_k: 100%|██████████| 2511/2511 [00:00<00:00, 7325.86it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00,  8.77it/s]
eval_listwise: 100%|██████████| 2396/2396 [00:08<00:00, 291.20it/s]


{'loss': 7.887533769781554, 'roc_auc': 0.5814682716197465, 'precision': 0.019240400667779635, 'recall': 0.043639376101859806, 'ndcg': 0.08812397966194815}
Recommendations for UserCF saved to data/recommendations_usercf.csv
Training ItemCF...


top_k: 100%|██████████| 4527/4527 [00:00<00:00, 5317.73it/s]
eval_pointwise: 100%|██████████| 4/4 [00:00<00:00,  8.96it/s]
eval_listwise: 100%|██████████| 2396/2396 [00:06<00:00, 344.30it/s]


{'loss': 8.869423674388294, 'roc_auc': 0.4945897836628138, 'precision': 0.01373121869782972, 'recall': 0.034119062135645986, 'ndcg': 0.05512266287355322}
Recommendations for ItemCF saved to data/recommendations_itemcf.csv
