In [1]:
import pandas as pd
import numpy as np
import warnings

# Ignore warnings
warnings.filterwarnings("ignore")

# Load the dataset
data = pd.read_csv('ratings.csv')

# Displaying the first few rows of the dataset
print(data.head())

# Return Similar users
def get_similar_users(target_user, data, similarity_func, top_n=10):
    # Check if the target user exists in the dataset
    if target_user not in data['userId'].unique():
        raise ValueError(f"User {target_user} not found in the dataset.")

    user_ids = data['userId'].unique()

    similarities = [(user, similarity_func(target_user, user, data)) for user in user_ids if user != target_user]

    sorted_similar_users = sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]

    # Return only the user IDs of the top N most similar users
    return [user[0] for user in sorted_similar_users]

def pearson_correlation(user1, user2, data):
    ratings1 = data[data['userId'] == user1]
    ratings2 = data[data['userId'] == user2]

    common_movies = pd.merge(ratings1, ratings2, on='movieId', how='inner')

    if len(common_movies) == 0:
        return 0

    rating1_diff = common_movies['rating_x'] - common_movies['rating_y']
    sim = np.dot(rating1_diff, rating1_diff) / (np.linalg.norm(rating1_diff) ** 2)

    return 1 / (1 + sim)

#  Return indivisual user movie recommendations list
def get_user_recommendations(user, data, similarity_func, top_n=10):

    similar_users = get_similar_users(user, data, similarity_func, top_n)

    # Get the movie ratings for the user
    user_movie_ratings = data[data['userId'] == user]

    # Collect all movies rated by similar users, but not by the target user
    recommendations = {}
    for similar_user in similar_users:
        similar_user_ratings = data[data['userId'] == similar_user]
        for index, row in similar_user_ratings.iterrows():
            movie_id = row['movieId']
            if movie_id not in user_movie_ratings['movieId'].values:
                if movie_id not in recommendations:
                    recommendations[movie_id] = []
                recommendations[movie_id].append(row['rating'])

    # Calculate average ratings for each recommended movie
    average_recommendations = {movie: np.mean(ratings) for movie, ratings in recommendations.items()}

    # Sort the movies by average rating in descending order and get the top N
    sorted_recommendations = sorted(average_recommendations.items(), key=lambda x: x[1], reverse=True)

    # Return the top N recommended movie IDs
    return dict(sorted_recommendations[:top_n])

# Function to get final group recommendations movie list
def get_group_recommendations(group, data, similarity_func, aggregation_func):
    all_recommendations = {}
    for user in group:
        user_recs = get_user_recommendations(user, data, similarity_func)
        for movie, rating in user_recs.items():
            if movie not in all_recommendations:
                all_recommendations[movie] = []
            all_recommendations[movie].append(rating)

    aggregated_recommendations = aggregation_func(all_recommendations)

    # Sort the movies by the aggregated score
    sorted_movies = sorted(aggregated_recommendations.items(), key=lambda item: item[1], reverse=True)
    return sorted_movies[:10]

# Part (a) Question 1
def average_aggregation(recommendations):
    aggregated_ratings = {}
    for movie, ratings in recommendations.items():
        average_rating = np.mean(ratings)
        aggregated_ratings[movie] = average_rating
    return aggregated_ratings


# Part (a) Question 2
def least_misery_aggregation(recommendations):
    minimum_ratings = {}
    for movie, ratings in recommendations.items():
        minimum_rating = np.min(ratings)
        minimum_ratings[movie] = minimum_rating
    return minimum_ratings

group = [6, 5, 4]

avg_recos = get_group_recommendations(group, data, pearson_correlation, average_aggregation)
print("Top-10 recommendations for the group using the average method:", avg_recos)

least_misery_recos = get_group_recommendations(group, data, pearson_correlation, least_misery_aggregation)
print("Top-10 recommendations for the group using the least misery method:", least_misery_recos)


# Part (b) Question 1
# Counting disagreements for every movie of users everytime
def count_disagreements(recommendations, threshold=1):
    disagreement_counts = {}

    for movie, ratings in recommendations.items():
        for i in range(len(ratings)):
            for j in range(i + 1, len(ratings)):
                rating_diff = abs(ratings[i] - ratings[j])
                if rating_diff > threshold:
                    if movie not in disagreement_counts:
                        disagreement_counts[movie] = 0
                    disagreement_counts[movie] += 1

    return disagreement_counts


# Part (b) Question 2
# Aggregation function but with disagreements
def adjusted_aggregation_with_disagreements(recommendations, disagreement_counts, alpha=0.5):

    average_scores = average_aggregation(recommendations)

    adjusted_scores = {}
    for movie, avg_score in average_scores.items():
        if movie in disagreement_counts:
            # Subtract a fraction of the disagreement count from the average score
            adjusted_scores[movie] = avg_score - alpha * disagreement_counts[movie]
        else:
            adjusted_scores[movie] = avg_score

    return adjusted_scores

# Returning the final grouo recommendations as disagreements taking into account
def get_group_recommendations_with_disagreements(group, data, similarity_func, aggregation_func, threshold=1, alpha=0.5):
    all_recommendations = {}
    for user in group:
        user_recs = get_user_recommendations(user, data, similarity_func)
        for movie, rating in user_recs.items():
            if movie not in all_recommendations:
                all_recommendations[movie] = []
            all_recommendations[movie].append(rating)

    disagreement_counts = count_disagreements(all_recommendations, threshold)
    adjusted_recos_with_disagreements = adjusted_aggregation_with_disagreements(all_recommendations, disagreement_counts, alpha)
    # Sort the movies by the adjusted score
    sorted_movies = sorted(adjusted_recos_with_disagreements.items(), key=lambda item: item[1], reverse=True)
    return sorted_movies[:10]



#   Setting a rating difference threshold and counting disagreements is crucial
#   producing group recommendations. It enables the system to account for diverse
#   preferences within a group, identifying significant differences in individual ratings,
#   especially in heterogeneous groups with varied tastes. The threshold parameter provides
#   a means to customize the sensitivity of the system to rating differences, allowing for
#   fine-tuning based on the desired level of discernment. By considering disagreements, the
#   recommendation system can offer more nuanced and context-aware suggestions, improving the
#   overall quality of group recommendations. Additionally, this approach helps mitigate
#   the impact of outliers and extreme ratings, promotes higher consensus among group members,
#   and avoids the challenge of generating unanimous recommendations, providing a more realistic
#   representation of group dynamics.


# Example usage
# Assume a threshold of 1 for rating differences to consider as disagreements
# Assume an alpha value of 0.5 for adjusting the aggregation
group_recos_with_disagreements = get_group_recommendations_with_disagreements(group, data, pearson_correlation, adjusted_aggregation_with_disagreements, threshold=1, alpha=0.5)
print("Top-10 recommendations for the group considering disagreements:", group_recos_with_disagreements)

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
Top-10 recommendations for the group using the average method: [(80906.0, 5.0), (131724.0, 5.0), (2150.0, 5.0), (3156.0, 5.0), (3510.0, 5.0), (4720.0, 5.0), (33493.0, 5.0), (84152.0, 5.0), (152077.0, 5.0), (166528.0, 5.0)]
Top-10 recommendations for the group using the least misery method: [(80906.0, 5.0), (131724.0, 5.0), (2150.0, 5.0), (3156.0, 5.0), (3510.0, 5.0), (4720.0, 5.0), (33493.0, 5.0), (84152.0, 5.0), (152077.0, 5.0), (166528.0, 5.0)]
Top-10 recommendations for the group considering disagreements: [(80906.0, 5.0), (131724.0, 5.0), (2150.0, 5.0), (3156.0, 5.0), (3510.0, 5.0), (4720.0, 5.0), (33493.0, 5.0), (84152.0, 5.0), (152077.0, 5.0), (166528.0, 5.0)]
