# Gruppenprojekt - Big Data Analysis
Dieses Notebook beschreibt unser Programm und bietet einen Überblik über unser Projekt mit den *FlixNet*‑Daten.

***INFO**: Dieses Projekt wurde in einem GithubRepository entwickelt und wurde im nachgang in dieses Notebook übertragen.*


## 🔧 Libraries & Setup

In [61]:
from abc import abstractmethod
from datetime import datetime
from typing import List
from typing import Optional, Sequence, Literal

import numpy as np
import pandas as pd
import scipy.stats as stats
import torch
import torch.nn as nn
import torch.optim as optim
from loguru import logger
from pydantic import BaseModel
from scipy.sparse import csr_matrix
from scipy.sparse import hstack
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

## Recommender deklarieren
(Base-Recommender, Collaborative Filtering, Content-Based & Optional noch den Deep-Learning-Recommender)

In [62]:
class Recommender:
    def __init__(self):
        self.k = 3 # default
        self.user_id = None
        self.item_id = None
        self.similarity: Literal["cosine", "pearson"] = "cosine"  # default
        self.calculation_variant: Literal["weighted", "unweighted"] = "weighted"  # default
        self.data = None


    @abstractmethod
    def _preprocess_data(self):
        ...


    def _prepare_information(self, user_id: str, item_id: str, k: int, similarity: Literal["cosine", "pearson"] = "cosine", calculation_variant: Literal["weighted", "unweighted"] = "weighted") -> None:
        self.user_id = user_id
        self.item_id = item_id
        self.similarity = similarity
        self.calculation_variant = calculation_variant
        self.k = k

        if similarity == 'pearson' and self.data is not None:
            self.data['mean'] = self.data.mean(axis=1)


    @abstractmethod
    def predict(
            self,
            user_id: str,
            item_id: str,
            similarity: Optional[Literal['cosine', 'pearson']] = 'cosine',   # only for collaborative filtering
            calculation_variety: Optional[Literal['weighted', 'unweighted']] = 'weighted',  # only for collaborative filtering
            k: Optional[int] = 3,
            second_k_value: Optional[int] = None):
        ...

In [63]:
class CollaborativeFilteringRecommender(Recommender):
    def __init__(self, data: pd.DataFrame, mode: Literal['user', 'item'] = 'user', display_results_for_each_step: Optional[bool] = False) -> None:
        super().__init__()
        self.display_results_for_each_step = display_results_for_each_step
        self.original_data = data
        self.mode = mode
        self._preprocess_data()


    def _preprocess_data(self) -> None:
        self.original_data = self.original_data.set_index("user_ID")
        self.original_data.index = self.original_data.index.astype(str) # convert the index to string (due to error with int values)
        if self.mode == 'item':
            self.data = self.original_data.T  # transpose for item based
        else:
            self.data = self.original_data  # original for user based


    def _calculate_distance_and_indices(self, dataframe: pd.DataFrame) -> ([], []):
        knn = NearestNeighbors(metric="cosine", algorithm='brute')
        knn.fit(dataframe.values)
        distances, indices = knn.kneighbors(dataframe.values, n_neighbors=self.k + 1)

        if self.mode == 'item':
            index = dataframe.index.get_loc(self.item_id)
        else:
            index = dataframe.index.get_loc(self.user_id)

        similar_distances = distances[index, 1:]
        similar_indices = indices[index, 1:]

        return similar_distances, similar_indices

    def _calculate_similarities(self, similar_distances: np.ndarray) -> np.ndarray:
        similarity = [1 - x for x in similar_distances]
        similarity = [(y + 1) / 2 for y in similarity]
        return np.array(similarity)

    def _calculate_result(self, similarity: np.ndarray, ratings: np.ndarray) -> float:
        if self.calculation_variant == "weighted":
            mean = np.dot(ratings, similarity) / similarity.sum()
            return mean
        else:
            return float(np.mean(ratings))

    def _check_values(self) -> None:
        if self.mode == 'user':
            if self.user_id not in self.data.index:
                raise ValueError(f"User {self.user_id} nicht in Daten.")
            if self.item_id not in self.data.columns:
                raise ValueError(f"Item {self.item_id} nicht in Daten.")
        elif self.mode == 'item':
            if self.user_id not in self.original_data.index:
                raise ValueError(
                    f"User {self.user_id} nicht in Originaldaten.")
            if self.item_id not in self.data.index:
                raise ValueError(
                    f"Item {self.item_id} nicht in transponierten Daten.")

    def _process_item_based(self) -> pd.DataFrame:
        user_ratings = self.original_data.loc[self.user_id]

        # filter based on the item. Only the users that already gave a rating are relevant
        rated_items = user_ratings[user_ratings > 0.0].index.tolist()

        if not rated_items:
            raise ValueError(f"User {self.user_id} hat keine Items bewertet!")

        return self.data.loc[rated_items + [self.item_id]]

    def _process_user_based(self) -> pd.DataFrame:
        # filter based on the item. Only the users that already gave a rating are relevant
        relevant_df = self.data[self.data[self.item_id] > 0.0]

        # add the user we are looking for (due to non-existing rating this user where filtered out)
        return pd.concat([relevant_df, self.data.loc[[self.user_id]]])

    def _normalize_for_pearson(self, relevant_df: pd.DataFrame) -> pd.DataFrame:
        mean_values = relevant_df.mean(axis=1).to_numpy()
        relevant_df = relevant_df.sub(mean_values, axis=0)
        return relevant_df

    def predict(
            self,
            user_id: str,
            item_id: str,
            similarity: Literal['cosine', 'pearson'] = 'cosine',
            calculation_variety: Literal['weighted', 'unweighted'] = 'weighted',
            k: Optional[int] = 3,
            second_k_value: Optional[int] = None) -> float:
        self._prepare_information(user_id=user_id, item_id=item_id, similarity=similarity, calculation_variant=calculation_variety, k=k)
        self._check_values()

        if self.mode == 'item':
            relevant_df = self._process_item_based()
        else:
            relevant_df = self._process_user_based()

        if similarity == 'pearson':
            self._normalize_for_pearson(relevant_df)

        # make sure that there are no NaN values -> set NaN to 0.0
        relevant_df = relevant_df.fillna(0.0)
        similar_distances, similar_indices = self._calculate_distance_and_indices(dataframe=relevant_df)

        if self.mode == 'item':
            ratings = relevant_df.iloc[similar_indices][self.user_id].to_numpy()
        else:
            ratings = relevant_df.iloc[similar_indices][self.item_id].to_numpy()

        similarity = self._calculate_similarities(similar_distances)
        result = self._calculate_result(similarity, ratings)

        if self.display_results_for_each_step:
          self.explain(similar_indices, relevant_df, ratings, similarity, result)

        return result

    def explain(self, similar_indices, relevant_df, ratings, similarity, result) -> None:
        print("-" * 50)
        print(f"<mode: {self.mode}>")
        print(f"({self.calculation_variant}) Mittelwert: {result:.4f}")
        print(f"Metrik: {self.similarity}")
        print()
        print(f"k ({self.k}) ähnlichsten {'Items' if self.mode == 'item' else 'Nutzer'}:")
        df = pd.DataFrame({
            "ID": relevant_df.index[similar_indices],
            "rating": ratings,
            "similarity": similarity
        }).reset_index(drop=True)
        print(df.to_string(index=True, header=True))
        print("-" * 50)

In [64]:
class ContentBasedRecommender(Recommender):
    def __init__(self, item_profile: pd.DataFrame, user_ratings: pd.DataFrame) -> None:
        super().__init__()
        self.item_profile = item_profile
        self.user_ratings = user_ratings
        self.k = 3
        self.feature_matrix = None
        self._preprocess_data()

        # check if the features "budget", "revenue", "runtime" are relevant for the item/rating correlation
        self._check_features_correlation(features=["budget", "revenue", "runtime"])
        self._calculate_tfidf_matrix()


    def _preprocess_data(self):
        self.item_profile["item_ID"] = self.item_profile["item_ID"].astype(str)
        self.user_ratings["item_ID"] = self.user_ratings["item_ID"].astype(str)
        self.user_ratings["user_ID"] = self.user_ratings["user_ID"].astype(str)

    def _check_features_correlation(self, features: List[str]) -> None:
        irrelevant_features = []  #  list for irrelevant feature that will be removed

        for feature in features:
            if feature not in self.item_profile.columns:
                continue

            # combine item and user ratings
            merged_data = pd.merge(self.user_ratings, self.item_profile, on="item_ID")

            # convert to numeric
            feature_data = pd.to_numeric(merged_data[feature].fillna(0), errors="coerce")
            rating_data = pd.to_numeric(merged_data["rating"].fillna(0), errors="coerce")

            # calculate the correlation between the user rating and the feature
            correlation, p_value = stats.pearsonr(feature_data, rating_data)

            # check if the correlation is relevant / significant
            if abs(correlation) < 0.1 or p_value > 0.05:
                logger.debug(f"Feature '{feature}' does not have a sigificant correlation and will be ignored.")
                irrelevant_features.append(feature)
            else:
                logger.debug(f"Feature '{feature}' has a significant correlation: {correlation}")

        self.item_profile.drop(columns=irrelevant_features, inplace=True)


    def _safe_get_feature(self, feature_name):
        if feature_name in self.item_profile.columns:
            return self.item_profile[feature_name]
        else:
            return None

    def _calculate_tfidf_matrix(self) -> None:
        # optional but if the title is empty we set it as an empty string
        self.item_profile["title"] = self.item_profile["title"].fillna("")

        # use the TfidfVectorizer() to transform title into numerical feature
        title_vectorizer = TfidfVectorizer()
        title_features = title_vectorizer.fit_transform(self.item_profile["title"])

        # change genre columns in text by just extracting the word after '"Genre_"'
        genre_cols = [col for col in self.item_profile.columns if col.startswith("Genre_")]
        if genre_cols:
            self.item_profile["genre_text"] = self.item_profile[genre_cols].astype(int).apply(
                lambda row: " ".join([col.replace("Genre_", "") for col, val in row.items() if val == 1]), axis=1
            )
            genre_vectorizer = TfidfVectorizer()
            genre_features = genre_vectorizer.fit_transform(self.item_profile["genre_text"])
        else:
            genre_features = np.empty((len(self.item_profile), 0))

        # the language of the items transformed into one-hot-encoded-dummies
        language_dummies = pd.get_dummies(self.item_profile["original_language"], prefix="lang")

        # put runtime into three categories (short, medium, long)
        runtime_feature = self._safe_get_feature("runtime")
        if runtime_feature is not None:
            runtime_bucket = pd.qcut(runtime_feature, q=3, labels=["kurz", "mittel", "lang"])
            runtime_dummies = pd.get_dummies(runtime_bucket, prefix="runtime")
        else:
            runtime_dummies = pd.DataFrame(index=self.item_profile.index)

        # budget and include will be logarithmically transformed and then scaled
        numerical_features = []
        if "budget" in self.item_profile.columns:
            self.item_profile["log_budget"] = np.log1p(self.item_profile["budget"].fillna(0))
            numerical_features.append("log_budget")
        if "revenue" in self.item_profile.columns:
            self.item_profile["log_revenue"] = np.log1p(self.item_profile["revenue"].fillna(0))
            numerical_features.append("log_revenue")

        if numerical_features:
            scaler = StandardScaler()
            scaled_numericals = scaler.fit_transform(self.item_profile[numerical_features])
        else:
            scaled_numericals = np.empty((len(self.item_profile), 0))

        # create feature matrix
        self.feature_matrix = hstack([
            title_features,
            genre_features,
            language_dummies.values,
            runtime_dummies.values,
            scaled_numericals
        ])

        self.feature_matrix = csr_matrix(self.feature_matrix)

    def _check_values(self):
        if self.user_id not in self.user_ratings["user_ID"].values:
            raise ValueError(f"User-ID {self.user_id} not found.")

        if self.item_id not in self.item_profile["item_ID"].values:
            raise ValueError(f"Item-ID {self.item_id} not found.")


    def predict(
            self,
            user_id: str,
            item_id: str,
            similarity: Optional[Literal['cosine', 'pearson']] = 'cosine',  # only for collaborative filtering
            calculation_variety: Optional[Literal['weighted', 'unweighted']] = 'weighted', # only for collaborative filtering
            k: Optional[int] = 3,
            second_k_value: Optional[int] = None) -> float:

        # default function to save all the information
        self._prepare_information(user_id=user_id, item_id=item_id, k=k)

        # check if the values included in the dataframes
        self._check_values()

        # extract only the items, the user rated
        rated_items = self.user_ratings[self.user_ratings["user_ID"] == user_id]
        rated_item_ids = rated_items["item_ID"].values

        # this case can happen when k is greater than the rated items by the user
        if self.k > len(rated_item_ids):
            self.k = len(rated_item_ids)

        # extract the rated item indices from the item profile
        rated_item_indices = self.item_profile[self.item_profile["item_ID"].isin(rated_item_ids)].index

        # check if the user rated some items... if not then return 0.0
        if len(rated_item_indices) == 0:
            return 0.0

        # get the feature matrix that is calculated in the '_calculate_tfidf_matrix()'-Method
        filtered_matrix = self.feature_matrix[rated_item_indices]

        # default kNN usage like in the lecture with brute algorithm and cosine as metric
        knn = NearestNeighbors(metric="cosine", algorithm="brute")
        knn.fit(filtered_matrix)

        item_index = self.item_profile[self.item_profile["item_ID"] == item_id].index[0]
        distances, indices = knn.kneighbors(self.feature_matrix[item_index], n_neighbors=self.k + 1)  # k+1 because the item itself is also included

        # the similar item indices beginning with the first real neighbor
        similar_items = indices.flatten()[1:]
        similar_item_indices = rated_item_indices[similar_items]

        # extract for each item in the similar item indices list the rating and save it in the list
        similar_ratings = []
        for idx in similar_item_indices:
            similar_item_id = self.item_profile.iloc[idx]["item_ID"]
            user_rating = self.user_ratings[(self.user_ratings["user_ID"] == user_id) & (self.user_ratings["item_ID"] == similar_item_id)]
            if not user_rating.empty:
                similar_ratings.append(user_rating["rating"].values[0])

        # if the similar ratings is zero then we return a default 0.0
        if not similar_ratings:
            return 0.0

        # calculate the predicted rating based on the sum of ratings and len of ratings
        return sum(similar_ratings) / len(similar_ratings)

In [65]:
class HybridRecommender(Recommender):
    def __init__(self, data: pd.DataFrame, item_profile: pd.DataFrame, user_ratings: pd.DataFrame, mode: Literal['user', 'item'] = 'user', alpha: float = 0.5):
        super().__init__()
        self.collaborative_recommender = CollaborativeFilteringRecommender(data=data, mode=mode)
        self.content_based_recommender = ContentBasedRecommender(item_profile=item_profile, user_ratings=user_ratings)
        self.alpha = alpha

    def predict(
            self,
            user_id: str,
            item_id: str,
            similarity: Optional[Literal['cosine', 'pearson']] = 'cosine',  # only for collaborative filtering
            calculation_variety: Optional[Literal['weighted', 'unweighted']] = 'weighted', # only for collaborative filtering
            k: Optional[int] = 3,
            second_k_value: Optional[int] = 3):

        collaborative_prediction = self.collaborative_recommender.predict(
            user_id=user_id,
            item_id=item_id,
            similarity=similarity, # ignore that it can be NONE
            calculation_variety=calculation_variety, # ignore that it can be NONE
            k=k
        )

        content_based_prediction = self.content_based_recommender.predict(
            user_id=user_id,
            item_id=item_id,
            similarity=similarity,
            calculation_variety=calculation_variety,
            k=second_k_value
        )

        # combine both with alpha as weight
        combined_prediction = (self.alpha * collaborative_prediction) + ((1 - self.alpha) * content_based_prediction)
        return combined_prediction

---

`OPTIONAL`  Deep-Learning-Recomender

In [66]:
class RatingsDataset(Dataset):
     # PyTorch dataset that returns (user_idx, item_idx, rating) tuples
    def __init__(self, df: pd.DataFrame):
        # cast once to tensors that avoids conversions inside the training loop
        self.u = torch.tensor(df["user_idx"].values, dtype=torch.long)
        self.i = torch.tensor(df["item_idx"].values, dtype=torch.long)
        self.r = torch.tensor(df["rating"].values, dtype=torch.float32)
    def __len__(self):
        return len(self.r)
    def __getitem__(self, idx):
        return self.u[idx], self.i[idx], self.r[idx]


class HybridMF(nn.Module):
    # Matrix‑factorisation with bias terms + linear projection of item features
    def __init__(self, num_users: int, num_items: int, d: int, item_features: torch.Tensor, dropout: float = 0.15):
        super().__init__()

        # user / item embeddings (latent factors)
        self.P = nn.Embedding(num_users, d)
        self.Q = nn.Embedding(num_items, d)

        # bias embeddings
        self.bu = nn.Embedding(num_users, 1)
        self.bi = nn.Embedding(num_items, 1)
        self.mu = nn.Parameter(torch.zeros(1))  # global mean

        # linear projection that maps item side‑features into the same latent space
        self.F = nn.Linear(item_features.shape[1], d, bias=False)
        self.register_buffer("item_features", item_features)  # moved to GPU/CPU automatically to improve performance based on system

        # dropout for a bit of regularisation
        self.drop = nn.Dropout(dropout)

        # lightweight weight initialisation
        nn.init.normal_(self.P.weight, std=0.05)
        nn.init.normal_(self.Q.weight, std=0.05)
        nn.init.xavier_uniform_(self.F.weight)

    def forward(self, u, i):
         # build item representation: latent factors + projected features
        q = self.Q(i) + self.F(self.item_features[i])
        q = self.drop(q)

        # final prediction for each (u,i) pair
        return (self.P(u) * q).sum(-1) + self.mu + self.bu(u).squeeze() + self.bi(i).squeeze()  # product of latent vectors + global bias + user bias + item bias


class DeepLearningRecommender(Recommender):
        # Pre‑processing, training loop, evaluation & prediction are encapsulated here.
        def __init__(
            self,
            ratings: pd.DataFrame,
            item_profile: pd.DataFrame,
            numeric_cols: Optional[Sequence[str]] = ("runtime", "budget", "revenue"),
            val_ratio: float = 0.2,
            embedding_dim: int = 64,
            batch_size: int = 1024,
            epochs: int = 60,
            lr: float = 1e-3,
            weight_decay: float = 3e-5,
            dropout_p: float = 0.20,
            early_stopping_rounds: int = 10,
            device: Optional[str] = None,
            seed: int = 42,
        ):
            super().__init__()

            self.embedding_dim = embedding_dim
            self.batch_size = batch_size
            self.epochs = epochs
            self.lr = lr
            self.weight_decay = weight_decay
            self.dropout_p = dropout_p
            self.early_stopping_rounds = early_stopping_rounds
            self.seed = seed
            self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")

            self.ratings_raw = ratings.copy()
            self.item_profile_raw = item_profile.copy()
            self.numeric_cols = numeric_cols
            self.val_ratio = val_ratio

            # complete preparation + training pipeline
            self._preprocess_data()
            self._build_model()
            self.fit()  # auto‑train so that external testers don’t need to call fit()


        def _preprocess_data(self):
            # cast ids to str to avoid categorical type issues
            self.ratings_raw[["user_ID", "item_ID"]] = self.ratings_raw[["user_ID", "item_ID"]].astype(str)

            # dense id mapping
            self.user2idx = {u: i for i, u in enumerate(self.ratings_raw["user_ID"].unique())}
            self.item2idx = {m: j for j, m in enumerate(self.ratings_raw["item_ID"].unique())}
            self.idx2user = {i: u for u, i in self.user2idx.items()}
            self.idx2item = {j: m for m, j in self.item2idx.items()}

            ratings = self.ratings_raw.copy()
            ratings["user_idx"] = ratings["user_ID"].map(self.user2idx)
            ratings["item_idx"] = ratings["item_ID"].map(self.item2idx)

            # Feature matrix (one‑hot genres + scaled numeric cols)
            ip = self.item_profile_raw[self.item_profile_raw["item_ID"].isin(self.item2idx)].copy()
            ip["item_idx"] = ip["item_ID"].map(self.item2idx)
            ip.sort_values("item_idx", inplace=True)
            feat_df = ip.filter(regex="^Genre_")   # already one‑hot 0/1 columns

            # numeric columns (if present)
            if self.numeric_cols:
                present = [c for c in self.numeric_cols if c in ip.columns]
                if missing := set(self.numeric_cols) - set(present):
                    logger.warning(f"Skipped numeric cols: {missing}")
                if present and not ip.empty:
                    feat_df[present] = StandardScaler().fit_transform(ip[present].fillna(0))
                elif present:
                    # no rows –> create zero columns to keep matrix shape consistent
                    for col in present:
                        feat_df[col] = 0

            # if no features at all – fallback to a single zero column
            self.feat_mat = torch.tensor(feat_df.values if not feat_df.empty else np.zeros((len(self.item2idx), 1)), dtype=torch.float32)

            # optional but "why-not"...  global mean used for cold‑start cases
            self.global_mean = ratings["rating"].mean()

            # pivot for optional Pearson similarity (required by base class... for testing we do not remove this)
            self.data = ratings.pivot(index="user_ID", columns="item_ID", values="rating")

            # create loaders
            train_df, val_df = train_test_split(ratings, test_size=self.val_ratio, random_state=self.seed)
            self.train_loader = DataLoader(RatingsDataset(train_df), batch_size=self.batch_size, shuffle=True)
            self.val_loader = DataLoader(RatingsDataset(val_df), batch_size=self.batch_size)

        # model construction
        def _build_model(self):
            self.model = HybridMF(
                num_users=len(self.user2idx),
                num_items=len(self.item2idx),
                d=self.embedding_dim,
                item_features=self.feat_mat,
                dropout=self.dropout_p,
            ).to(self.device)


        def fit(self):
            # Adam optimiser... suited for sparse embeddings
            opt = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)

            # scheduler lowers LR (Learn-Rate) by ×0.5 if val‑MAE plateaus for 3 epochs
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.5, patience=3)

            best_mae, epochs_no_imp = float("inf"), 0  # track early‑stopping progress

            for ep in range(1, self.epochs + 1):
                self.model.train()
                # iterate over mini‑batches (u,i,r) from DataLoader
                for u, i, r in self.train_loader:
                    # move to device once per batch
                    u, i, r = u.to(self.device), i.to(self.device), r.to(self.device)

                    opt.zero_grad()  # reset gradients
                    pred = self.model(u, i) # forward pass

                    # Smooth L1 ≈ MAE for |err|>β, MSE for |err|<β – stable & robust
                    loss = nn.functional.smooth_l1_loss(pred, r, beta=1.0)
                    loss.backward() # back‑prop
                    opt.step() # Adam update

                # validation after each epoch
                val_mae = self._evaluate_loader(self.val_loader)
                logger.info(f"ep{ep:02d} • val MAE {val_mae:.4f}")
                scheduler.step(val_mae)  # maybe reduce LR

                # early‑stopping logic
                if val_mae + 1e-4 < best_mae:           # question: significant improvement? ... yes / no?
                    best_mae, epochs_no_imp = val_mae, 0
                    torch.save(self.model.state_dict(), "hybrid_best.pt")  # checkpoint (save in file)
                else:
                    epochs_no_imp += 1
                    if epochs_no_imp >= self.early_stopping_rounds:
                        break  # stop training...

            # restore best weights before evaluation
            self.model.load_state_dict(torch.load("hybrid_best.pt"))

        # helper to evaluate loaders
        def _evaluate_loader(self, loader):
            # computes Mean Absolute Error over a given DataLoader
            self.model.eval(); err, n = 0.0, 0
            with torch.no_grad():
                for u, i, r in loader:
                    preds = self.model(u.to(self.device), i.to(self.device)).cpu()
                    err += torch.abs(preds - r).sum().item(); n += len(r)
            return err / n


        # default prediction method from base class
        def predict(
            self,
            user_id: str,
            item_id: str,
            similarity: Optional[Literal['cosine', 'pearson']] = 'cosine',
            calculation_variety: Optional[Literal['weighted', 'unweighted']] = 'weighted',
            k: Optional[int] = 3,
            second_k_value: Optional[int] = None,
        ) -> float:
            self._prepare_information(user_id, item_id, k, similarity, calculation_variety)  # ignore that this can be none... just a "simple" test recommender here

            # check if the user or item is not known
            if user_id not in self.user2idx and item_id not in self.item2idx:
                return float(self.global_mean)

            # case 1: only user is unknown so we take the item-bias
            if user_id not in self.user2idx:
                idx = self.item2idx.get(item_id)
                if idx is None:
                    return float(self.global_mean)
                item_bias = self.model.bi.weight[idx].item()
                return float(np.clip(self.global_mean + item_bias, 0.5, 5.0))

            # case 2: only item is unknown so we take the user-bias
            if item_id not in self.item2idx:
                u_idx = self.user2idx[user_id]
                user_bias = self.model.bu.weight[u_idx].item()
                return float(np.clip(self.global_mean + user_bias, 0.5, 5.0))

            # case 3 - both unnown (default case)
            u = torch.tensor([self.user2idx[user_id]], device=self.device)
            i = torch.tensor([self.item2idx[item_id]], device=self.device)
            self.model.eval()
            with torch.no_grad():
                score = self.model(u, i).item()
            return float(np.clip(score, 0.5, 5.0))

---

## MAE Tester
Verwendet, um die verschiedenen Recommender zu Evaluieren und anhand des MAEs zu vergleichen

In [67]:
class Test(BaseModel):
    name: str
    type: Literal["collaborative_filtering", "content_based", "hybrid", "deep_learning"]
    mode: Optional[Literal["user", "item"]] = "item"
    k_value: Optional[int] = 3
    second_k_value: Optional[int] = 3
    metric: Optional[Literal["cosine", "pearson"]] = 'cosine'
    calculation_variety: Optional[Literal["weighted", "unweighted"]] = 'weighted'
    batch_size: Optional[int] = 128
    embedding_dim: Optional[int] = 16
    epochs: Optional[int] = 25
    alpha: Optional[float] = 0.5


class TestResult(BaseModel):
    name: str
    type: Literal["collaborative_filtering", "content_based", "hybrid", "deep_learning"]
    mode: Literal["user", "item"] | None
    k_value: int | None
    second_k_value: int | None
    metric: Literal["cosine", "pearson"] | None
    calculation_variety: Literal["weighted", "unweighted"] | None
    alpha: float | None
    epochs: int | None
    batch_size: int | None
    embedding_dim: int | None
    mae: float


class TestResults(BaseModel): # just for saving in a "pretty" form
    date: str
    num_tests: int
    best_test: TestResult
    results: List[TestResult]



class MAETester:
    def __init__(self, tests: List[Test], test_data_path: str, data_path: str, item_profile_path: str, ratings: str):
        self.tests = tests
        self.testdata = pd.read_csv(test_data_path)  # testdata (for evaluaton)
        self.item_profile = pd.read_csv(item_profile_path)
        self.user_ratings = pd.read_csv(ratings)
        self._prepare_data()
        self.data = pd.read_csv(data_path)  # trainings-data
        self.results: List[TestResult] = []


    def _prepare_data(self):
        self.testdata["user_ID"] = self.testdata["user_ID"].astype(str)
        self.testdata["item_ID"] = self.testdata["item_ID"].astype(str)

    # because we only have 0.5 steps in testdata
    @staticmethod
    def _round_to_nearest_half(value: float):
        return round(value * 2) / 2

    def run_tests(self) -> pd.DataFrame:
        for test in self.tests:
            result = self._run_test(test)
            self.results.append(result)
            logger.success(f"Test abgeschlossen: {test.name}, MAE: {result.mae:.4f}\n")

        # display final resultse
        result_df = self._summarize_test_results()

        # save final results to file
        self._save_to_file()

        return result_df

    def _run_test(self, test: Test) -> TestResult:
        logger.info(f"Running test: {test.name}")

        if test.type == "content_based":
            recommender = ContentBasedRecommender(
                item_profile=self.item_profile,
                user_ratings=self.user_ratings,
            )
        elif test.type == "collaborative_filtering":
            recommender = CollaborativeFilteringRecommender(
                mode=test.mode, # ignore type (that this can be NONE)
                data=self.data,
            )
        elif test.type == "hybrid":
            recommender = HybridRecommender(
                data=self.data,
                item_profile=self.item_profile,
                user_ratings=self.user_ratings,
                mode=test.mode,  # ignore type (that this can be NONE)
                alpha=test.alpha,
            )
        elif test.type == "deep_learning":
            recommender = DeepLearningRecommender(
                ratings=self.user_ratings,
                batch_size=test.batch_size,
                epochs=test.epochs,
                embedding_dim=test.embedding_dim,
                item_profile=self.item_profile
            )
        else:
            raise ValueError(f"Unbekannter Recomendertyp: {test.type}")

        predictions = []
        actuals = []

        testdata_list = self.testdata.to_numpy()

        for row in tqdm(testdata_list, desc="Vorhersagen werden berechnet"):
            user_id: str = str(row[0])
            item_id: str = str(row[1])
            actual_rating = row[2]

            try:
                predicted_rating = recommender.predict(
                    user_id=user_id,
                    item_id=item_id,
                    similarity=test.metric,
                    calculation_variety=test.calculation_variety,
                    k=test.k_value,
                    second_k_value=test.second_k_value,
                )
                #
                # predicted_rating = self._round_to_nearest_half(value=predicted_rating)

                predictions.append(predicted_rating)
                actuals.append(actual_rating)
            except ValueError as e:
                logger.warning(f"Fehler bei der Vorhersage: {e}")

        mae = self._mean_absolute_error(actuals, predictions)

        return TestResult(
            name=test.name,
            type=test.type,
            mode=test.mode,
            k_value=test.k_value,
            metric=test.metric,
            calculation_variety=test.calculation_variety,
            alpha=test.alpha,
            batch_size=test.batch_size,
            embedding_dim=test.embedding_dim,
            epochs=test.epochs,
            second_k_value=test.second_k_value,
            mae=mae,
        )

    @staticmethod
    def _mean_absolute_error(actuals: List[float], predictions: List[float]) -> float:
        if not actuals or not predictions or len(actuals) != len(predictions):
            raise ValueError("Listen für tatsächliche und vorhergesagte Werte müssen gleich lang und nicht leer sein.")

        absolute_errors = [abs(a - p) for a, p in zip(actuals, predictions)]
        mae = sum(absolute_errors) / len(absolute_errors)
        return mae

    def _summarize_test_results(self) -> pd.DataFrame:
        if not self.results:
            logger.info("Keine Testergebnisse vorhanden.")
            return

        summary_df = pd.DataFrame([{
            "Testname": result.name,
            "Recomendertyp": result.type,
            "Modus": result.mode if result.type == "collaborative_filtering" else "/",
            "k-Wert": result.k_value if result.type != "deep_learning" else "/",
            "Metrik": result.metric if result.type == "collaborative_filtering" else "/",
            "Berechnungsvariante": result.calculation_variety if result.type == "collaborative_filtering" else "/",
            "Alpha (weight)": result.alpha if result.type == "hybrid" else "/",
            "Batch-Größe": result.batch_size if result.type == "deep_learning" else "/",
            "Embedding-Dimension": result.embedding_dim if result.type == "deep_learning" else "/",
            "Epochen": result.epochs if result.type == "deep_learning" else "/",
            "MAE": result.mae
        } for result in self.results])

        print("-" * 50)
        print("Zusammenfassung der Testergebnisse:")
        print(summary_df.to_string(index=False))
        print("-" * 50)

        return summary_df


    def _save_to_file(self) -> None:
        if not self.results:
            logger.info("Keine Testergebnisse vorhanden, nichts zu speichern.")
            return
        date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        best_test = min(self.results, key=lambda result: result.mae)  # best results based on mae
        test_results = TestResults(
            date=date,
            num_tests=len(self.results),
            best_test=best_test,
            results=self.results
        )

        file_path = f"./outputs/testergebnis_{date.replace(':', '-')}.json"

        with open(file_path, "w", encoding="utf-8") as json_file:
            json_file.write(test_results.model_dump_json(indent=4))

        logger.success(f"Testergebnisse erfolgreich gespeichert.")

# Beginn der Aufrufe und Nutzung der Recommender

wir haben viele Testfälle durchgeführt (diese sind ganz unten zu finden) und uns dann aufgrund der MAE-Ergebnisse für folgenden Hybriden Recommender entschieden der sowohl CollaborativeFiltering als auch Conten-Based-Filtering berücksichtigt.

Um diesen Recommender zu testen stellen wir eine struktur bereit wobei lediglich der parameter `testdata` angepasst werden muss um den evaluationsdatensatz zu verwenden. Aufrgund der sehr nah beieinanderliegenden Ergebnisse für pearson und cosine stellen wir beide Testprofile zur Verfügung.

In [68]:
choosen_recommender_profile = [
    Test(name="HybridRecommender", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="pearson", calculation_variety="weighted", alpha=0.5),
    Test(name="HybridRecommender", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="cosine", calculation_variety="weighted", alpha=0.5)
]

---

#### Optionale (andere) Testprofile die verwendet wurden und vollständigkeitshalber hier aufgeführt sind

In [69]:
collaborative_filtering_profiles = [
    Test(name="UserBased_1_cosine", type="collaborative_filtering", mode="user", k_value=4, metric="cosine", calculation_variety="weighted"),
    Test(name="UserBased_2_cosine", type="collaborative_filtering", mode="user", k_value=3, metric="cosine", calculation_variety="weighted"),
    Test(name="UserBased_3_cosine", type="collaborative_filtering", mode="user", k_value=4, metric="cosine", calculation_variety="weighted"),
    Test(name="UserBased_4_cosine", type="collaborative_filtering", mode="user", k_value=5, metric="cosine", calculation_variety="weighted"),

    Test(name="ItemBased_1_cosine", type="collaborative_filtering", mode="item", k_value=4, metric="cosine", calculation_variety="weighted"),
    Test(name="ItemBased_2_cosine", type="collaborative_filtering", mode="item", k_value=3, metric="cosine", calculation_variety="weighted"),
    Test(name="ItemBased_3_cosine", type="collaborative_filtering", mode="item", k_value=4, metric="cosine", calculation_variety="weighted"),
    Test(name="ItemBased_4_cosine", type="collaborative_filtering", mode="item", k_value=5, metric="cosine", calculation_variety="weighted"),

    Test(name="UserBased_1_pearson", type="collaborative_filtering", mode="user", k_value=4, metric="pearson", calculation_variety="weighted"),
    Test(name="UserBased_2_pearson", type="collaborative_filtering", mode="user", k_value=3, metric="pearson", calculation_variety="weighted"),
    Test(name="UserBased_3_pearson", type="collaborative_filtering", mode="user", k_value=4, metric="pearson", calculation_variety="weighted"),
    Test(name="UserBased_4_pearson", type="collaborative_filtering", mode="user", k_value=5, metric="pearson", calculation_variety="weighted"),

    Test(name="ItemBased_1_pearson", type="collaborative_filtering", mode="item", k_value=4, metric="pearson", calculation_variety="weighted"),
    Test(name="ItemBased_2_pearson", type="collaborative_filtering", mode="item", k_value=3, metric="pearson", calculation_variety="weighted"),
    Test(name="ItemBased_3_pearson", type="collaborative_filtering", mode="item", k_value=4, metric="pearson", calculation_variety="weighted"),
    Test(name="ItemBased_4_pearson", type="collaborative_filtering", mode="item", k_value=5, metric="pearson", calculation_variety="weighted"),
]

In [70]:
content_based_profiles = [
    Test(name="ContentBased_1", type="content_based", k_value=3),
    Test(name="ContentBased_2", type="content_based", k_value=4),
    Test(name="ContentBased_3", type="content_based", k_value=5),
    Test(name="ContentBased_4", type="content_based", k_value=6),
    Test(name="ContentBased_5", type="content_based", k_value=7),
    Test(name="ContentBased_6", type="content_based", k_value=8),
    Test(name="ContentBased_7", type="content_based", k_value=9),
    Test(name="ContentBased_8", type="content_based", k_value=10),
    Test(name="ContentBased_9", type="content_based", k_value=11),
    Test(name="ContentBased_10", type="content_based", k_value=12),
    Test(name="ContentBased_11", type="content_based", k_value=13),
    Test(name="ContentBased_12", type="content_based", k_value=14),
]

In [71]:
deep_learning_profiles = [
    # Test(name="deep_learning_large_embedding", type="deep_learning", embedding_dim=128, epochs=25, batch_size=128),
    # Test(name="deep_learning_small_batch", type="deep_learning", embedding_dim=64, epochs=25, batch_size=64),
    # Test(name="deep_learning_large_batch", type="deep_learning", embedding_dim=64, epochs=25, batch_size=256),
    # Test(name="deep_learning_more_epochs", type="deep_learning", embedding_dim=64, epochs=50, batch_size=128),
    # Test(name="deep_learning_less_epochs", type="deep_learning", embedding_dim=64, epochs=10, batch_size=128),
    # Test(name="deep_learning_less_epochs", type="deep_learning", embedding_dim=64, epochs=10, batch_size=128),
    # Test(name="deep_learning_finetuning_1", type="deep_learning", embedding_dim=48, epochs=10, batch_size=128),
    # Test(name="deep_learning_finetuning_2", type="deep_learning", embedding_dim=16, epochs=25, batch_size=128),
    Test(name="deep_learning_finetuning_3", type="deep_learning", embedding_dim=16, epochs=25, batch_size=64),
]

In [72]:
hybrid_profiles = [
    Test(name="Hybrid_1", type="hybrid", mode="user", k_value=5, metric="cosine", calculation_variety="weighted", alpha=0.5),
    Test(name="Hybrid_2", type="hybrid", mode="user", k_value=5, metric="cosine", calculation_variety="weighted", alpha=0.75),
    Test(name="Hybrid_3", type="hybrid", mode="user", k_value=5, metric="cosine", calculation_variety="weighted", alpha=0.25),

    Test(name="Hybrid_4", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="cosine", calculation_variety="weighted", alpha=0.5),
    Test(name="Hybrid_5", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="cosine", calculation_variety="weighted", alpha=0.75),
    Test(name="Hybrid_6", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="cosine", calculation_variety="weighted", alpha=0.25),

    Test(name="Hybrid_7", type="hybrid", mode="user", k_value=5, metric="pearson", calculation_variety="weighted", alpha=0.5),
    Test(name="Hybrid_8", type="hybrid", mode="user", k_value=5, metric="pearson", calculation_variety="weighted", alpha=0.75),
    Test(name="Hybrid_9", type="hybrid", mode="user", k_value=5, metric="pearson", calculation_variety="weighted", alpha=0.25),

    Test(name="Hybrid_10", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="pearson", calculation_variety="weighted", alpha=0.5),
    Test(name="Hybrid_11", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="pearson", calculation_variety="weighted", alpha=0.75),
    Test(name="Hybrid_12", type="hybrid", mode="user", k_value=5, second_k_value=14, metric="pearson", calculation_variety="weighted", alpha=0.25),

]

---

## Aufruf des MAE Testers
Dem Tester werden die entscheidenden Daten übergeben und anschließend wird der MAE für die ausgewählten / übergebenene Test-Profile ermittelt

In [73]:
testdata_path = "./data/Testdaten_FlixNet.csv"   # change to use another Test/Eval-Dataset

In [74]:
tester = MAETester(
        tests=deep_learning_profiles,
        test_data_path=testdata_path,
        data_path="./data/Bewertungsmatrix_FlixNet.csv",
        ratings="./data/Ratings_FlixNet.csv",
        item_profile_path="./data/Itemprofile_FlixNet.csv",
    )

In [75]:
result = tester.run_tests()
result # print result here as dataframe

[32m2025-06-13 17:46:04.436[0m | [1mINFO    [0m | [36m__main__[0m:[36m_run_test[0m:[36m73[0m - [1mRunning test: deep_learning_finetuning_3[0m
[32m2025-06-13 17:46:04.886[0m | [1mINFO    [0m | [36m__main__[0m:[36mfit[0m:[36m173[0m - [1mep01 • val MAE 3.1049[0m
[32m2025-06-13 17:46:05.301[0m | [1mINFO    [0m | [36m__main__[0m:[36mfit[0m:[36m173[0m - [1mep02 • val MAE 2.8267[0m
[32m2025-06-13 17:46:05.723[0m | [1mINFO    [0m | [36m__main__[0m:[36mfit[0m:[36m173[0m - [1mep03 • val MAE 2.5626[0m
[32m2025-06-13 17:46:06.152[0m | [1mINFO    [0m | [36m__main__[0m:[36mfit[0m:[36m173[0m - [1mep04 • val MAE 2.3039[0m
[32m2025-06-13 17:46:06.555[0m | [1mINFO    [0m | [36m__main__[0m:[36mfit[0m:[36m173[0m - [1mep05 • val MAE 2.0347[0m
[32m2025-06-13 17:46:06.961[0m | [1mINFO    [0m | [36m__main__[0m:[36mfit[0m:[36m173[0m - [1mep06 • val MAE 1.7632[0m
[32m2025-06-13 17:46:07.360[0m | [1mINFO    [0m | [36m__main_

--------------------------------------------------
Zusammenfassung der Testergebnisse:
                  Testname Recomendertyp Modus k-Wert Metrik Berechnungsvariante Alpha (weight)  Batch-Größe  Embedding-Dimension  Epochen      MAE
deep_learning_finetuning_3 deep_learning     /      /      /                   /              /           64                   16       25 0.761889
--------------------------------------------------


Unnamed: 0,Testname,Recomendertyp,Modus,k-Wert,Metrik,Berechnungsvariante,Alpha (weight),Batch-Größe,Embedding-Dimension,Epochen,MAE
0,deep_learning_finetuning_3,deep_learning,/,/,/,/,/,64,16,25,0.761889
