In [1]:
import pandas as pd


def _evaluate_all(weights):
    """
    This function evaluates the predictions of a trained model.
    """

    def _get_user_ratings_per_user(ratings_df):
        user_ratings = {}
        for _, row in ratings_df.iterrows():
            user_id = row["userId"]
            movie_id = row["movieId"]
            rating = row["rating"]

            if user_id not in user_ratings:
                user_ratings[user_id] = []

            user_ratings[user_id].append(
                {
                    "movieId": movie_id,
                    "rating": rating,
                    "title": "",
                    "year": 1000,
                    "externalId": str(movie_id),
                }
            )

        return user_ratings

    ratings_df = pd.read_csv("../data/ml-latest-small/test_set_ratings.csv")
    input_data, test_data = sk_train_test_split(
        ratings_df, test_size=0.3, random_state=42
    )

    user_ratings_input = _get_user_ratings_per_user(test_data)
    user_ratings_test = _get_user_ratings_per_user(input_data)

    results = []

    for user_id, user_ratings_input in user_ratings_input.items():
        if user_id not in user_ratings_test:
            # TODO: Remove from array, that score is calulated correctly
            continue

        movies_to_test = user_ratings_test[user_id]

        score = make_recommendations(user_ratings_input, movies_to_test, True, weights)

        user_results = []
        for movie in score:
            rating_for_movie = next(
                (
                    test_movie
                    for test_movie in movies_to_test
                    if test_movie["externalId"] == movie["externalId"]
                    and "rating" in test_movie
                ),
                None,
            )
            user_results.append(
                {
                    "score": movie["score"],
                    "title": movie["title"],
                    "year": movie["year"],
                    "externalId": movie["externalId"],
                    "rating": rating_for_movie["rating"] * 20,
                }
            )
        results.append({"userId": user_id, "results": user_results})

    def _calculate_rmse(results):
        rmse_score = 0
        mae_score = 0
        ctr = 0
        for user_result in results:
            for movie in user_result["results"]:
                if "rating" in movie:
                    rmse_score += ((movie["rating"] - movie["score"]) / 20) ** 2
                    mae_score += abs(movie["rating"] - movie["score"]) / 20
                    ctr += 1
        rmse_score = (rmse_score / ctr) ** 0.5
        mae_score = mae_score / ctr
        print("\nEvaluation results for all models:")
        print(f"RMSE: {rmse_score}")
        print(f"MAE: {mae_score}")
        print("\n")
        return rmse_score, mae_score

    rmse, mae = _calculate_rmse(results)

    return rmse

# Optimize weights of different models in hybrid model

## RMSE 1

In [2]:
import numpy as np
from scipy.optimize import minimize


def find_optimal_weights_rmse_simpler(
    mae_model_content_based, mae_model_neighborhood, mae_model_matrix_factorization
):
    def combined_rmse(weights, rmse):
        # Berechne den gewichteten MAE
        weighted_mae = np.sum(weights * rmse) / np.sum(weights)
        return weighted_mae

    # Startwerte für die Gewichte
    initial_weights = np.array([1.0, 1.0, 1.0])

    # MAE-Werte der Modelle in ein Array packen
    rmses = np.array(
        [
            mae_model_content_based,
            mae_model_neighborhood,
            mae_model_matrix_factorization,
        ]
    )

    # Optimierung durchführen
    result = minimize(
        combined_rmse,
        initial_weights,
        args=(rmses,),
        bounds=[(0, None), (0, None), (0, None)],
    )

    # Optimale Gewichte
    optimal_weights = result.x / np.sum(
        result.x
    )  # Normieren der Gewichte, sodass sie sich auf 1 summieren
    return optimal_weights


# Beispiel-RMSE-Werte der einzelnen Modelle
rmse_model_content_based = 1.72
rmse_model_neighborhood = 1.12
rmse_model_matrix_factorization = 1.07

# Optimale Gewichte berechnen
optimal_weights_mae = find_optimal_weights_rmse_simpler(
    rmse_model_content_based, rmse_model_neighborhood, rmse_model_matrix_factorization
)
print("Optimale Gewichte (RMSE):", optimal_weights_mae)
# Optimale Gewichte (RMSE): [0.00000000e+00 3.74094352e-18 1.00000000e+00]

Optimale Gewichte (RMSE): [0. 0. 1.]


## RMSE 2

In [3]:
import numpy as np
from scipy.optimize import minimize


def find_optimal_weights(
    rmse_model_content_based, rmse_model_neighborhood, rmse_model_matrix_factorization
):
    def combined_rmse(weights, rmses):
        # Berechne den gewichteten RMSE
        weighted_rmse = np.sqrt(np.sum((weights * rmses) ** 2) / np.sum(weights))
        return weighted_rmse

    # Startwerte für die Gewichte
    initial_weights = np.array([1.0, 1.0, 1.0])

    # RMSE-Werte der Modelle in ein Array packen
    rmses = np.array(
        [
            rmse_model_content_based,
            rmse_model_neighborhood,
            rmse_model_matrix_factorization,
        ]
    )

    # Optimierung durchführen
    result = minimize(
        combined_rmse,
        initial_weights,
        args=(rmses,),
        bounds=[(0, None), (0, None), (0, None)],
    )

    # Optimale Gewichte
    optimal_weights = result.x / np.sum(
        result.x
    )  # Normieren der Gewichte, sodass sie sich auf 1 summieren
    return optimal_weights


# Beispiel-RMSE-Werte der einzelnen Modelle
rmse_model_content_based = 1.72
rmse_model_neighborhood = 1.12
rmse_model_matrix_factorization = 1.07

# Optimale Gewichte berechnen
optimal_weights = find_optimal_weights(
    rmse_model_content_based, rmse_model_neighborhood, rmse_model_matrix_factorization
)
print("Optimale Gewichte:", optimal_weights)
# Optimale Gewichte: [0.11270225 0.37353889 0.51375886] -> 11.27% Content-Based, 37.35% Neighborhood, 51.38% Matrix Factorization

Optimale Gewichte: [0.11270238 0.37353928 0.51375835]


  weighted_rmse = np.sqrt(np.sum((weights * rmses) ** 2) / np.sum(weights))


## MAE

In [4]:
import numpy as np
from scipy.optimize import minimize


def find_optimal_weights_mae(
    mae_model_content_based, mae_model_neighborhood, mae_model_matrix_factorization
):
    def combined_mae(weights, maes):
        # Berechne den gewichteten MAE
        weighted_mae = np.sum(weights * maes) / np.sum(weights)
        return weighted_mae

    # Startwerte für die Gewichte
    initial_weights = np.array([1.0, 1.0, 1.0])

    # MAE-Werte der Modelle in ein Array packen
    maes = np.array(
        [
            mae_model_content_based,
            mae_model_neighborhood,
            mae_model_matrix_factorization,
        ]
    )

    # Optimierung durchführen
    result = minimize(
        combined_mae,
        initial_weights,
        args=(maes,),
        bounds=[(0, None), (0, None), (0, None)],
    )

    # Optimale Gewichte
    optimal_weights = result.x / np.sum(
        result.x
    )  # Normieren der Gewichte, sodass sie sich auf 1 summieren
    return optimal_weights


# Beispiel-MAE-Werte der einzelnen Modelle
mae_model_content_based = 1.39
mae_model_neighborhood = 0.87
mae_model_matrix_factorization = 0.84

# Optimale Gewichte berechnen
optimal_weights_mae = find_optimal_weights_mae(
    mae_model_content_based, mae_model_neighborhood, mae_model_matrix_factorization
)
print("Optimale Gewichte (MAE):", optimal_weights_mae)

Optimale Gewichte (MAE): [0.00000000e+00 5.21944168e-17 1.00000000e+00]
