In [2]:
import pandas as pd
import numpy as np
import warnings
import random

# Ignore warnings
warnings.filterwarnings("ignore")

# Load the datasets
data = pd.read_csv('ratings.csv')
movies_data = pd.read_csv('movies.csv')

# Pearson correlation function
def pearson_correlation(user1, user2, data):
    ratings1 = data[data['userId'] == user1]
    ratings2 = data[data['userId'] == user2]
    common_movies = pd.merge(ratings1, ratings2, on='movieId', how='inner')
    if len(common_movies) == 0:
        return 0
    rating1_diff = common_movies['rating_x'] - common_movies['rating_y']
    sim = np.dot(rating1_diff, rating1_diff) / (np.linalg.norm(rating1_diff) ** 2)
    return 1 / (1 + sim)

# Find similar users
def get_similar_users(target_user, data, similarity_func, top_n=10):
    user_ids = data['userId'].unique()
    similarities = [(user, similarity_func(target_user, user, data)) for user in user_ids if user != target_user]
    return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n][0]

# Get user recommendations
def get_user_recommendations(user, data, similarity_func, top_n=10):
    similar_users = get_similar_users(user, data, similarity_func, top_n)
    user_movie_ratings = data[data['userId'] == user]
    recommendations = {}
    for similar_user in similar_users:
        similar_user_ratings = data[data['userId'] == similar_user]
        for _, row in similar_user_ratings.iterrows():
            movie_id = row['movieId']
            if movie_id not in user_movie_ratings['movieId'].values:
                recommendations.setdefault(movie_id, []).append(row['rating'])
    return dict(sorted(recommendations.items(), key=lambda x: np.mean(x[1]), reverse=True)[:top_n])

# Get group recommendations
def get_group_recommendations(group, data, similarity_func, top_n=10):
    all_recommendations = {}
    for user in group:
        user_recs = get_user_recommendations(user, data, similarity_func, top_n)
        for movie, rating in user_recs.items():
            all_recommendations.setdefault(movie, []).extend(rating)
    return dict(sorted(all_recommendations.items(), key=lambda x: np.mean(x[1]), reverse=True)[:top_n])

# Why-not explanations
def why_not_atomic(movie_id, group_recommendations):
    if movie_id in group_recommendations:
        return f"Movie ID {movie_id} is in the top-10 recommendations."
    else:
        return f"Movie ID {movie_id} is not in the top-10 recommendations possibly due to lower average ratings from similar users or it was not rated by the similar users."

def extract_genres(movies_data):
    genre_dict = {}
    for _, row in movies_data.iterrows():
        movie_id = row['movieId']
        genres = row['genres'].split('|')
        genre_dict[movie_id] = genres
    return genre_dict

def is_genre_in_top_recommendations(genre, top_recommendations, genre_dict):
    for movie_id in top_recommendations.keys():
        if genre in genre_dict.get(movie_id, []):
            return True
    return False

def why_not_group_updated(genre, group_recommendations, genre_dict):
    if is_genre_in_top_recommendations(genre, group_recommendations, genre_dict):
        return f"'{genre}' movies are present in the top-10 recommendations."
    else:
        return f"'{genre}' movies are not in the top-10 recommendations."

def why_not_position_absenteeism(movie_id, group_recommendations):
    if movie_id not in group_recommendations:
        return f"Movie ID {movie_id} is not in the top-10 recommendations."
    else:
        first_movie_id = list(group_recommendations.keys())[0]
        if movie_id == first_movie_id:
            return f"Movie ID {movie_id} is already ranked first."
        else:
            return f"Movie ID {movie_id} is not ranked first due to its lower average rating."

# Example usage
group = [6, 5, 4]
group_recommendations = get_group_recommendations(group, data, pearson_correlation, top_n=10)
genre_dict = extract_genres(movies_data)

# Select a random movie ID for the atomic case
random_movie_id_atomic = random.choice(data['movieId'].unique())
# Select a random movie ID from the top-10 recommendations for the position absenteeism case
top_10_movie_ids = list(group_recommendations.keys())
random_movie_id_position = random.choice(top_10_movie_ids[1:]) if len(top_10_movie_ids) > 1 else top_10_movie_ids[0]

# Dynamic atomic case example
atomic_explanation_dynamic = why_not_atomic(random_movie_id_atomic, group_recommendations)
# Group case example with a specific genre ("Action")
group_explanation_dynamic = why_not_group_updated("Action", group_recommendations, genre_dict)
# Dynamic position absenteeism case example
position_explanation_dynamic = why_not_position_absenteeism(random_movie_id_position, group_recommendations)

# Print the explanations
print("Random Movie ID for Atomic Case:", random_movie_id_atomic)
print("Atomic Case Explanation:", atomic_explanation_dynamic)
print("Group Case Explanation for 'Action' Genre:", group_explanation_dynamic)
print("Random Movie ID for Position Absenteeism Case:", random_movie_id_position)
print("Position Absenteeism Case Explanation:", position_explanation_dynamic)



Random Movie ID for Atomic Case: 140541
Atomic Case Explanation: Movie ID 140541 is not in the top-10 recommendations possibly due to lower average ratings from similar users or it was not rated by the similar users.
Group Case Explanation for 'Action' Genre: 'Action' movies are present in the top-10 recommendations.
Random Movie ID for Position Absenteeism Case: 131724.0
Position Absenteeism Case Explanation: Movie ID 131724.0 is not ranked first due to its lower average rating.
