In [2]:
import pandas as pd
import numpy as np
import math
import warnings

warnings.filterwarnings("error")

# Load the MovieLens 100K rating dataset
ratings = pd.read_csv("../dataset/ratings.csv").drop('timestamp', axis=1)

# Function to compute Pearson correlation coefficient
def pearson_correlation(df_user1, df_user2):
    merged_ratings = df_user1.merge(df_user2, on="movieId", how="inner")
    if merged_ratings.empty:
        return math.nan
    
    ratings_user1 = merged_ratings['rating_x']
    ratings_user2 = merged_ratings['rating_y']
    mean_user1 = ratings_user1.mean()
    mean_user2 = ratings_user2.mean()
    
    num = np.sum((ratings_user1 - mean_user1) * (ratings_user2 - mean_user2))
    den = np.sqrt(np.sum((ratings_user1 - mean_user1) ** 2)) * np.sqrt(np.sum((ratings_user2 - mean_user2) ** 2))
    
    try:
        coef = num / den
    except (RuntimeWarning, ZeroDivisionError):
        return math.nan
    
    return coef

# Function to predict movie scores for a user
def prediction(userId, movieId, ratings):
    df_userA = ratings[ratings['userId'] == userId]
    userA_mean = df_userA['rating'].mean()
    users_for_film = ratings[ratings['movieId'] == movieId].drop(['movieId', 'rating'], axis=1)
    num = 0
    den = 0
    
    for user in users_for_film['userId']:
        df_userB = ratings[ratings['userId'] == user]
        sim = pearson_correlation(df_userA, df_userB)
        if not math.isnan(sim):
            num += sim * (df_userB[df_userB['movieId'] == movieId].iloc[0]['rating'] - df_userB['rating'].mean())
            den += sim

    try:
        div = num / den
        pred = userA_mean + div
    except (RuntimeWarning, ZeroDivisionError):
        return math.nan
    
    return pred

# Function to compute group recommendations using the average method
def average_method(group, ratings):
    recommendations = {}
    for user in group:
        user_ratings = ratings[ratings['userId'] == user]
        movies_to_recommend = list(set(ratings['movieId']) - set(user_ratings['movieId']))
        for movie in movies_to_recommend:
            pred = prediction(user, movie, ratings)
            if not math.isnan(pred):
                recommendations[movie] = recommendations.get(movie, []) + [pred]

    group_recommendations = {}
    for movie, preds in recommendations.items():
        group_recommendations[movie] = np.mean(preds)

    return group_recommendations

# Function to compute group recommendations using the least misery method
def least_misery_method(group, ratings):
    recommendations = {}
    for user in group:
        user_ratings = ratings[ratings['userId'] == user]
        movies_to_recommend = list(set(ratings['movieId']) - set(user_ratings['movieId']))
        for movie in movies_to_recommend:
            pred = prediction(user, movie, ratings)
            if not math.isnan(pred):
                recommendations[movie] = recommendations.get(movie, []) + [pred]

    group_recommendations = {}
    for movie, preds in recommendations.items():
        group_recommendations[movie] = min(preds)

    return group_recommendations

# Function to compute disagreements between users in a group
def compute_disagreements(group, ratings):
    disagreements = {}
    for user1 in group:
        for user2 in group:
            if user1 != user2:
                df_user1 = ratings[ratings['userId'] == user1]
                df_user2 = ratings[ratings['userId'] == user2]
                disagreement = np.abs(pearson_correlation(df_user1, df_user2))
                disagreements[(user1, user2)] = disagreement

    return disagreements

# Function to compute group recommendations using disagreement-aware method
def disagreement_aware_method(group, ratings):
    recommendations = {}
    disagreements = compute_disagreements(group, ratings)
    
    for user in group:
        user_ratings = ratings[ratings['userId'] == user]
        movies_to_recommend = list(set(ratings['movieId']) - set(user_ratings['movieId']))
        
        for movie in movies_to_recommend:
            pred = prediction(user, movie, ratings)
            if not math.isnan(pred):
                disagreement_factor = np.mean([disagreements[(user, other_user)] for other_user in group if other_user != user])
                recommendations[movie] = recommendations.get(movie, []) + [pred * (1 - disagreement_factor)]

    group_recommendations = {}
    for movie, preds in recommendations.items():
        group_recommendations[movie] = np.mean(preds)

    return group_recommendations

# Select a group of 3 users
group = [1, 2, 3]

# Compute group recommendations using the average method
avg_method_recommendations = average_method(group, ratings)
top_10_avg_method = dict(sorted(avg_method_recommendations.items(), key=lambda x: x[1], reverse=True)[:10])
print("Top 10 recommendations using the average method:")
print(top_10_avg_method)

# Compute group recommendations using the least misery method
lm_method_recommendations = least_misery_method(group, ratings)
top_10_lm_method = dict(sorted(lm_method_recommendations.items(), key=lambda x: x[1], reverse=True)[:10])
print("\nTop 10 recommendations using the least misery method:")
print(top_10_lm_method)

# Compute group recommendations using the disagreement-aware method
da_method_recommendations = disagreement_aware_method(group, ratings)
top_10_da_method = dict(sorted(da_method_recommendations.items(), key=lambda x: x[1], reverse=True)[:10])
print("\nTop 10 recommendations using the disagreement-aware method:")
print(top_10_da_method)

KeyboardInterrupt: 