In [3]:
!pip install pandas
!pip install --upgrade kagglehub
!pip install -U LibRecommender
!pip install keras==2.12.0 tensorflow==2.12.0

!pip show LibRecommender

Collecting LibRecommender
  Downloading LibRecommender-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (30 kB)
Collecting gensim>=4.0.0 (from LibRecommender)
  Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting numpy<2.0,>=1.18.5 (from gensim>=4.0.0->LibRecommender)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy<1.14.0,>=1.7.0 (from gensim>=4.0.0->LibRecommender)
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Downloading LibRecommender-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━

Name: LibRecommender
Version: 1.5.1
Summary: Versatile end-to-end recommender system.
Home-page: https://github.com/massquantity/LibRecommender
Author: massquantity
Author-email: massquantity <jinxin_madie@163.com>
License: MIT
Location: /usr/local/lib/python3.11/dist-packages
Requires: gensim, tqdm
Required-by: 


In [86]:
import os
import pandas as pd
from libreco.data import random_split, DatasetPure
from libreco.algorithms import BPR
from libreco.evaluation import evaluate
import kagglehub
import tensorflow as tf

class RecipeRecommender:
    def __init__(self, data_path="shuyangli94/food-com-recipes-and-user-interactions"):
        # Initialize paths and parameters
        self.data_path = data_path
        self.model = None
        self.data_info = None
        self.name_df = None
        self.data_filtered = None
        self.train_data = None
        self.eval_data = None
        self.test_data = None
        self.user_id_map = {}  # Maps UUIDs to numeric IDs

        # Initialize recipe name mapping
        self._load_recipe_names()

    def _load_recipe_names(self):
        """Load recipe ID to name mapping"""
        path = kagglehub.dataset_download(self.data_path)
        raw_recipes_path = os.path.join(path, "RAW_recipes.csv")
        self.name_df = pd.read_csv(raw_recipes_path)[["name", "id"]]

    def _rename_and_filter_data(self, interactions_data):
      # Erzeuge explizite Kopie des DataFrames
      df = interactions_data.copy()

      # Spalten umbenennen (ohne inplace)
      df = df.rename(columns={
          "user_id": "user",
          "recipe_id": "item",
          "rating": "label"
      })

      # Spalten filtern
      keep_cols = ["user", "item", "label"]
      df = df[keep_cols]

      # Typkonvertierung mit .loc
      df.loc[:, "label"] = df["label"].astype(int)
      return df

    def load_and_preprocess(self, min_interactions):
        """Load and preprocess interaction data"""
        # Download and load dataset
        path = kagglehub.dataset_download(self.data_path)

        # Load and combine interaction data
        train = pd.read_csv(os.path.join(path, "interactions_train.csv"))
        eval = pd.read_csv(os.path.join(path, "interactions_validation.csv"))
        test = pd.read_csv(os.path.join(path, "interactions_test.csv"))

        combined = pd.concat([train, eval, test], ignore_index=True)
        combined = self._rename_and_filter_data(combined)

        # Filter items
        item_counts = combined["item"].value_counts()
        items_to_keep = item_counts[item_counts >= min_interactions].index
        filtered = combined[combined["item"].isin(items_to_keep)]

        # Filter users
        user_counts = filtered["user"].value_counts()
        users_to_keep = user_counts[user_counts >= min_interactions].index
        self.data_filtered = filtered[filtered["user"].isin(users_to_keep)]

    def train(self, embed_size=256, n_epochs=5, lr=5e-5):
        """Train the recommendation model"""
        # Split data
        self.train_data, self.eval_data, self.test_data = random_split(
            self.data_filtered,
            multi_ratios=[0.8, 0.1, 0.1]
        )

        # Build datasets
        self.train_data, self.data_info = DatasetPure.build_trainset(self.train_data)
        self.eval_data = DatasetPure.build_evalset(self.eval_data)
        self.test_data = DatasetPure.build_testset(self.test_data)

        # Initialize model
        tf.compat.v1.reset_default_graph()
        self.model = BPR(
            task="ranking",
            data_info=self.data_info,
            loss_type="bpr",
            embed_size=embed_size,
            n_epochs=n_epochs,
            lr=lr,
            batch_size=1024,
            num_neg=5,
            reg=5e-6,
            sampler="random"
        )

        # Train model
        self.model.fit(
            self.train_data,
            neg_sampling=True,
            shuffle=True,
            verbose=2,
            eval_data=self.eval_data,
            metrics=["loss", "roc_auc", "precision", "recall", "ndcg"]
        )

    def evaluate(self):
        """Evaluate model performance"""
        return evaluate(
            model=self.model,
            data=self.test_data,
            neg_sampling=True,
            metrics=["loss", "roc_auc", "precision", "recall", "ndcg"]
        )

    def get_recommendation(self, user_identifier, n_rec=10):
        """Get recommendations for a user (UUID or numeric ID)"""
        if not self.model:
            raise ValueError("Model not trained. Call train() first.")

        # Handle UUID lookup
        if isinstance(user_identifier, str):
            if user_identifier not in self.user_id_map:
                raise ValueError(f"User UUID '{user_identifier}' not found.")
            user_id = self.user_id_map[user_identifier]
        else:
            user_id = user_identifier

        recommendations = self.model.recommend_user(
            user=user_id,
            n_rec=n_rec,
            filter_consumed=True
        )

        recipe_ids = recommendations[user_id]
        return [self._get_recipe_name(rid) for rid in recipe_ids]

    def get_recommendation_ids(self, user_identifier, n_rec=10):
        """Get recommendations for a user (UUID or numeric ID)"""
        if not self.model:
            raise ValueError("Model not trained. Call train() first.")

        # Handle UUID lookup
        if isinstance(user_identifier, str):
            if user_identifier not in self.user_id_map:
                raise ValueError(f"User UUID '{user_identifier}' not found.")
            user_id = self.user_id_map[user_identifier]
        else:
            user_id = user_identifier

        recommendations = self.model.recommend_user(
            user=user_id,
            n_rec=n_rec,
            filter_consumed=True
        )

        return recommendations[user_id]

    def _get_recipe_name(self, recipe_id):
        """Helper to get recipe name from ID"""
        name = self.name_df.loc[self.name_df['id'] == recipe_id, 'name']
        return name.values[0] if not name.empty else "Unknown Recipe"

    def import_ratings_csv(self, file_path):
      """Import ratings from CSV and map UUIDs to numeric IDs"""
      try:
          # Load CSV
          df = pd.read_csv(file_path)
          print("CSV erfolgreich geladen:")
          print(df.head())

          # Check required columns
          required = {"uuid", "item_id", "rating"}
          if not required.issubset(df.columns):
              missing = required - set(df.columns)
              raise ValueError(f"Fehlende Spalten: {missing}")

          # Process and map UUIDs
          processed_df = self.__process_ratings(df)

          # Add to data
          self.data_filtered = pd.concat(
              [self.data_filtered, processed_df],
              ignore_index=True
         )
          print(f"{len(processed_df)} neue Bewertungen hinzugefügt.")

      except FileNotFoundError:
          print(f"Datei {file_path} nicht gefunden.")
      except Exception as e:
          print(f"Fehler: {str(e)}")

    def __process_ratings(self, df):
      """Map UUIDs to numeric IDs"""
      # Rename columns
      df = df.rename(columns={
          "uuid": "user",
          "item_id": "item",
          "rating": "label"
      })

      # Determine current max ID from user_id_map
      current_max = max(self.user_id_map.values()) if self.user_id_map else 0

      # Generate new IDs for unknown UUIDs
      new_users = [uuid for uuid in df["user"].unique() if uuid not in self.user_id_map]
      num_new = len(new_users)

      if num_new > 0:
          new_ids = range(current_max + 1, current_max + num_new + 1)
          self.user_id_map.update(zip(new_users, new_ids))

      # Replace UUIDs with numeric IDs
      df["user"] = df["user"].map(self.user_id_map)
      return df


    def info(self, UUID):
      """Gibt einen DataFrame mit allen Interaktionen des angegebenen Benutzers (UUID) zurück."""
      # Überprüfen, ob Daten geladen wurden
      if self.data_filtered is None or not isinstance(self.data_filtered, pd.DataFrame):
          return pd.DataFrame(columns=["user", "item", "label", "name"])

      # Prüfen, ob die UUID vorhanden ist
      if UUID not in self.user_id_map:
          return pd.DataFrame(columns=["user", "item", "label", "name"])

      # Numerische Benutzer-ID abrufen
      user_id = self.user_id_map[UUID]

      # Interaktionen filtern
      user_interactions = self.data_filtered[self.data_filtered['user'] == user_id].copy()

      if user_interactions.empty:
          return pd.DataFrame(columns=["user", "item", "label", "name"])

      # UUID statt numerischer ID setzen
      user_interactions['user'] = UUID

      # Rezeptnamen hinzufügen
      merged = user_interactions.merge(self.name_df, left_on='item', right_on='id', how='left')
      merged['name'] = merged['name'].fillna('Unknown Recipe')

      # Ergebnis formatieren
      result = merged[['user', 'item', 'label', 'name']]

      return result

In [87]:
# Beispielaufruf
recommender = RecipeRecommender()
recommender.load_and_preprocess(min_interactions=20)
# Neue Nutzer per CSV importieren
recommender.import_ratings_csv("/content/sample_data/ratings.csv")
recommender.train()
recommender.evaluate()

CSV erfolgreich geladen:
                                   uuid  item_id  rating
0  df903ba4-a8f3-406f-814d-4420b00ab611    58086     2.0
1  df903ba4-a8f3-406f-814d-4420b00ab611    40621     1.0
2  df903ba4-a8f3-406f-814d-4420b00ab611    85475     0.0
3  df903ba4-a8f3-406f-814d-4420b00ab611    71967     0.0
4  df903ba4-a8f3-406f-814d-4420b00ab611    38584     1.0
75 neue Bewertungen hinzugefügt.
Training start time: [35m2025-03-23 22:19:14[0m


train: 100%|██████████| 517/517 [00:06<00:00, 74.62it/s]


Epoch 1 elapsed: 6.932s
	 [32mtrain_loss: 0.6923[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 139.84it/s]
eval_listwise: 100%|██████████| 2362/2362 [00:01<00:00, 1429.59it/s]


	 eval log_loss: 0.6926
	 eval roc_auc: 0.5466
	 eval precision@10: 0.0037
	 eval recall@10: 0.0069
	 eval ndcg@10: 0.0176


train: 100%|██████████| 517/517 [00:06<00:00, 86.09it/s]


Epoch 2 elapsed: 6.015s
	 [32mtrain_loss: 0.6905[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 164.06it/s]
eval_listwise: 100%|██████████| 2362/2362 [00:02<00:00, 972.75it/s]


	 eval log_loss: 0.6920
	 eval roc_auc: 0.5776
	 eval precision@10: 0.0089
	 eval recall@10: 0.0176
	 eval ndcg@10: 0.0404


train: 100%|██████████| 517/517 [00:06<00:00, 79.52it/s]


Epoch 3 elapsed: 6.508s
	 [32mtrain_loss: 0.6885[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 159.53it/s]
eval_listwise: 100%|██████████| 2362/2362 [00:01<00:00, 1376.72it/s]


	 eval log_loss: 0.6915
	 eval roc_auc: 0.6011
	 eval precision@10: 0.0120
	 eval recall@10: 0.0236
	 eval ndcg@10: 0.0518


train: 100%|██████████| 517/517 [00:06<00:00, 75.59it/s]


Epoch 4 elapsed: 6.842s
	 [32mtrain_loss: 0.6866[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 141.03it/s]
eval_listwise: 100%|██████████| 2362/2362 [00:01<00:00, 1431.55it/s]


	 eval log_loss: 0.6909
	 eval roc_auc: 0.6182
	 eval precision@10: 0.0133
	 eval recall@10: 0.0261
	 eval ndcg@10: 0.0574


train: 100%|██████████| 517/517 [00:05<00:00, 86.62it/s]


Epoch 5 elapsed: 5.977s
	 [32mtrain_loss: 0.6847[0m


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 165.19it/s]
eval_listwise: 100%|██████████| 2362/2362 [00:02<00:00, 1056.40it/s]


	 eval log_loss: 0.6904
	 eval roc_auc: 0.6302
	 eval precision@10: 0.0143
	 eval recall@10: 0.0279
	 eval ndcg@10: 0.0606


eval_pointwise: 100%|██████████| 10/10 [00:00<00:00, 106.60it/s]
eval_listwise: 100%|██████████| 2389/2389 [00:02<00:00, 1039.94it/s]


{'loss': 0.6903283554432341,
 'roc_auc': 0.6360119191796274,
 'precision': 0.01506906655504395,
 'recall': 0.03275767970283889,
 'ndcg': 0.06495866939135236}

In [88]:
recommender.info("df903ba4-a8f3-406f-814d-4420b00ab611")

Unnamed: 0,user,item,label,name
0,df903ba4-a8f3-406f-814d-4420b00ab611,58086,2.0,dijon beets
1,df903ba4-a8f3-406f-814d-4420b00ab611,40621,1.0,fish cakes fast and simple
2,df903ba4-a8f3-406f-814d-4420b00ab611,85475,0.0,dijon chicken with panko crust
3,df903ba4-a8f3-406f-814d-4420b00ab611,71967,0.0,homemade mayonnaise
4,df903ba4-a8f3-406f-814d-4420b00ab611,38584,1.0,yummy and super easy crock pot oatmeal
5,df903ba4-a8f3-406f-814d-4420b00ab611,50719,1.0,the sweetest blueberry muffins
6,df903ba4-a8f3-406f-814d-4420b00ab611,17987,1.0,quick butternut squash soup
7,df903ba4-a8f3-406f-814d-4420b00ab611,78747,0.0,gallo pinto costa rican rice and beans
8,df903ba4-a8f3-406f-814d-4420b00ab611,26526,0.0,amish white bread
9,df903ba4-a8f3-406f-814d-4420b00ab611,16716,2.0,best ever buckeyes


In [89]:
# Empfehlungen für importierten Nutzer (UUID)
print(recommender.get_recommendation("df903ba4-a8f3-406f-814d-4420b00ab611"))

['crock pot chicken with black beans   cream cheese', 'japanese mum s chicken', 'creamy cajun chicken pasta', 'to die for crock pot roast', 'yes  virginia there is a great meatloaf', 'oven fried chicken chimichangas', 'kittencal s moist cheddar garlic oven fried chicken breast', 'creamy burrito casserole', 'greek potatoes  oven roasted and delicious', 'whatever floats your boat  brownies']
