## Importing Libraries

In [2]:
import torch
import pandas as pd
import numpy as np
from collections import Counter

## Preprocessing and model loading

In [3]:
def genres_counts(data):
    flat_genres = [genre for sublist in data['Genres'].apply(
        lambda x: x.split('|')).values.flatten() for genre in sublist]
    genre_counts = dict(Counter(flat_genres))
    return genre_counts

In [4]:
def load_and_preprocess_data():
    # Load data
    movie_data = pd.read_csv('../data/movies_grouplens/movies.dat',
                             delimiter='::', header=None, engine='python', encoding='latin1')
    rating_data = pd.read_csv('../data/movies_grouplens/ratings.dat',
                              delimiter='::', header=None, engine='python', encoding='latin1')
    user_data = pd.read_csv('../data/movies_grouplens/users.dat',
                            delimiter='::', header=None, engine='python', encoding='latin1')

    # Set column names
    movie_data.columns = ['MovieID', 'Title', 'Genres']
    rating_data.columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
    user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

    # Merge datasets
    df = pd.merge(rating_data, movie_data, on='MovieID', how='left')
    df = pd.merge(df, user_data, on='UserID', how='left')

    # Convert categorical variables
    df['Gender'] = df['Gender'].map({'M': 0, 'F': 1})

    # Create index mappings
    users = df['UserID'].unique()
    movies = df['MovieID'].unique()

    user_to_idx = {user: idx for idx, user in enumerate(users)}
    movie_to_idx = {movie: idx for idx, movie in enumerate(movies)}

    # Add index columns
    df['User_idx'] = df['UserID'].map(user_to_idx)
    df['Movie_idx'] = df['MovieID'].map(movie_to_idx)

    # Normalize age and occupation values to start from 0
    df['Age'] = df['Age'] - df['Age'].min()
    df['Occupation'] = df['Occupation'] - df['Occupation'].min()

    return df, user_to_idx, movie_to_idx

In [5]:
device = torch.device(
    'mps' if torch.backends.mps.is_available() else 'cpu')
device = torch.device('cuda') if torch.cuda.is_available() else device

In [6]:
con_aware_NCF = torch.jit.load(
    '../models/NCF_CNN_Enchanced.pt', map_location=device)

In [7]:
ncf_cnn = torch.jit.load('../models/NCF_CNN.pt', map_location=device)

In [8]:
data_svd = pd.read_csv('../data/movies_grouplens/ratings.dat',
                       delimiter='::', header=None, engine='python', encoding='latin1')
data_svd .columns = ['user', 'event', 'rating', 'timestamp']

# Create a user-item matrix
ratings_matrix = data_svd .pivot(
    index='user', columns='event', values='rating').fillna(0)

In [9]:
data_ua, user_to_idx_ua, movie_to_idx_ua = load_and_preprocess_data()


user_data = pd.read_csv(
    '../data/movies_grouplens/users.dat', delimiter='::', header=None, engine='python', encoding='latin1'
)
user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

user_data['Gender'] = user_data['Gender'].map({'M': 0, 'F': 1})
user_data['Age'] = user_data['Age'] - user_data['Age'].min()
user_data['Occupation'] = user_data['Occupation'] - \
    user_data['Occupation'].min()

In [10]:
data_cnn_ncf = pd.read_csv('../data/movies_grouplens/ratings.dat',
                           delimiter='::', header=None, engine='python', encoding='latin1')
data_cnn_ncf.columns = ['user', 'event', 'rating', 'timestamp']

users = data_cnn_ncf['user'].unique()
events = data_cnn_ncf['event'].unique()

user_to_idx = {user: idx for idx, user in enumerate(users)}
event_to_idx = {event: idx for idx, event in enumerate(events)}
idx_to_user = {idx: user for user,
               idx in user_to_idx.items()}  # For reverse lookup
idx_to_event = {idx: event for event,
                idx in event_to_idx.items()}  # For reverse lookup

data_cnn_ncf['user_idx'] = data_cnn_ncf['user'].map(user_to_idx)
data_cnn_ncf['event_idx'] = data_cnn_ncf['event'].map(event_to_idx)

In [11]:
# Perform SVD
U, sigma, Vt = np.linalg.svd(ratings_matrix)

latent_dimensioanlity_k = 96
U = U[:, :latent_dimensioanlity_k]
sigma = np.diag(sigma[:latent_dimensioanlity_k])
Vt = Vt[:latent_dimensioanlity_k, :]

predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Convert to DataFrame
predicted_ratings_df = pd.DataFrame(
    predicted_ratings, columns=ratings_matrix.columns, index=ratings_matrix.index)

In [12]:
movie_data = pd.read_csv('../data/movies_grouplens/movies.dat',
                         delimiter='::', header=None, engine='python', encoding='latin1')
movie_data.columns = ['MovieID', 'Title', 'Genres']
movie_data['Genres'] = movie_data['Genres'].fillna('')

## SVD

In [13]:
def get_svd_recommendations(user_id, k=10):

    user_ratings = predicted_ratings_df.loc[user_id]
    top_recommendations = user_ratings.sort_values(ascending=False).head(k)
    return top_recommendations

## NCF with CNN

In [14]:
def get_cnn_sncf_recommendations(user_id, data, model, device, idx_to_event, idx_to_user, user_to_idx, k=10):
    model.eval()  # Set the model to evaluation mode

    # Map user_id to user_idx
    user_idx = user_to_idx[user_id]

    # Get all event indices
    all_events = np.arange(data['event_idx'].min(),
                           data['event_idx'].max() + 1)

    # Get watched events by the user
    watched_events = set(
        data[data['user_idx'] == user_idx]['event_idx'].unique())

    # Filter not-watched events
    not_watched = np.setdiff1d(all_events, list(watched_events))

    # Prepare tensors for batch prediction on GPU
    user_idx_tensor = torch.tensor(
        [user_idx] * len(not_watched), device=device, dtype=torch.long)
    event_idx_tensor = torch.tensor(
        not_watched, device=device, dtype=torch.long)

    # Perform batch prediction directly on GPU
    with torch.no_grad():
        # Keep ratings on GPU
        ratings = model(user_idx_tensor, event_idx_tensor)
    # Converts ratings from shape [3628, 1] to [3628]
    ratings = ratings.flatten()

    # Sort recommendations by rating in descending order
    sorted_indices = torch.argsort(ratings, descending=True)  # Sort on GPU
    top_k_indices = sorted_indices[:k]  # Get the top-k indices

    # Extract top-k events and ratings
    top_k_events = event_idx_tensor[top_k_indices]

    top_k_ratings = ratings[top_k_indices]
    # Prepare recommendations
    recommendations = [
        [user_id, idx_to_event[int(event)], float(rating)]
        for event, rating in zip(top_k_events, top_k_ratings)
    ]

    return recommendations

## User-Aware NCF with CNN

In [15]:
def get_ua_recommendations(user_id, model, data, user_to_idx, device, user_data):

    user_row = user_data[user_data['UserID'] == user_id].iloc[0]
    gender, age, occupation = user_row['Gender'], user_row['Age'], user_row['Occupation']

    user_idx = user_to_idx[user_id]
    all_movies = np.arange(data['Movie_idx'].min(),
                           data['Movie_idx'].max() + 1)

    watched_movies = set(
        data[data['User_idx'] == user_idx]['Movie_idx'].unique())

    not_watched = np.setdiff1d(all_movies, list(watched_movies))

    not_watched_data = data[data['Movie_idx'].isin(
        not_watched)][['Movie_idx', 'Title', 'Genres']].drop_duplicates()

    user_idx_tensor = torch.tensor(
        [user_idx] * len(not_watched), device=device)
    movie_idx_tensor = torch.tensor(
        not_watched_data['Movie_idx'].values, device=device)
    gender_tensor = torch.tensor([gender] * len(not_watched), device=device)
    age_tensor = torch.tensor([age] * len(not_watched), device=device)
    occupation_tensor = torch.tensor(
        [occupation] * len(not_watched), device=device)

    with torch.no_grad():
        ratings = model(user_idx_tensor, movie_idx_tensor,
                        gender_tensor, age_tensor, occupation_tensor).cpu().numpy()

    not_watched_data['UserID'] = user_id
    not_watched_data['Gender'] = gender
    not_watched_data['Age'] = age
    not_watched_data['Occupation'] = occupation
    not_watched_data['Rating'] = ratings

    column_order = ['UserID', 'Movie_idx', 'Title',
                    'Genres', 'Gender', 'Age', 'Occupation', 'Rating']
    result = not_watched_data[column_order].rename(
        columns={'Movie_idx': 'MovieID'})
    return result

## Recommendations

In [16]:
user_id = 400
k = 10

In [17]:
recommendations_svd = get_svd_recommendations(user_id).index.to_list()

In [18]:
recommendations_cnn_sncf = get_cnn_sncf_recommendations(
    user_id, data_cnn_ncf, ncf_cnn, device, idx_to_event, idx_to_user, user_to_idx)

In [19]:
recommendations_ua = get_ua_recommendations(user_id, con_aware_NCF, data_ua, user_to_idx_ua,
                                            device, user_data).sort_values(by='Rating', ascending=False).head(k)

## Results

In [20]:
recommendations_svd = movie_data[movie_data['MovieID'].isin(
    recommendations_svd)]
recommendations_svd

Unnamed: 0,MovieID,Title,Genres
108,110,Braveheart (1995),Action|Drama|War
523,527,Schindler's List (1993),Drama|War
589,593,"Silence of the Lambs, The (1991)",Drama|Thriller
1179,1197,"Princess Bride, The (1987)",Action|Adventure|Comedy|Romance
1180,1198,Raiders of the Lost Ark (1981),Action|Adventure
1250,1270,Back to the Future (1985),Comedy|Sci-Fi
2693,2762,"Sixth Sense, The (1999)",Thriller
2789,2858,American Beauty (1999),Comedy|Drama
2849,2918,Ferris Bueller's Day Off (1986),Comedy
3045,3114,Toy Story 2 (1999),Animation|Children's|Comedy


In [21]:
movie_ids = [rec[1] for rec in recommendations_cnn_sncf]
recommendations_cnn_sncf = movie_data[movie_data['MovieID'].isin(movie_ids)]
recommendations_cnn_sncf

Unnamed: 0,MovieID,Title,Genres
257,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi
315,318,"Shawshank Redemption, The (1994)",Drama
735,745,"Close Shave, A (1995)",Animation|Comedy|Thriller
847,858,"Godfather, The (1972)",Action|Crime|Drama
896,908,North by Northwest (1959),Drama|Thriller
910,922,Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),Film-Noir
1162,1178,Paths of Glory (1957),Drama|War
1189,1207,To Kill a Mockingbird (1962),Drama
1230,1250,"Bridge on the River Kwai, The (1957)",Drama|War
1950,2019,Seven Samurai (The Magnificent Seven) (Shichin...,Action|Drama


In [22]:
recommendations_ua[['MovieID', 'Title', 'Genres']]

Unnamed: 0,MovieID,Title,Genres
29,29,"Close Shave, A (1995)",Animation|Comedy|Thriller
172,167,"Shawshank Redemption, The (1994)",Drama
44,44,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi
678,505,Life Is Beautiful (La Vita è bella) (1997),Comedy|Drama
998,669,"Godfather, The (1972)",Action|Crime|Drama
964,648,Roman Holiday (1953),Comedy|Romance
47,47,To Kill a Mockingbird (1962),Drama
21627,2617,Sanjuro (1962),Action|Adventure
932,629,Rear Window (1954),Mystery|Thriller
1371,839,"Great Escape, The (1963)",Adventure|War


## About User

In [23]:
about_user = data_ua[(data_ua['UserID'] == user_id) & (data_ua['Rating'] >= 4)
                     ].sort_values(by='Rating', ascending=False)[['UserID', 'MovieID', 'Title', 'Genres', 'Rating']].head(k)
about_user

Unnamed: 0,UserID,MovieID,Title,Genres,Rating
58894,400,593,"Silence of the Lambs, The (1991)",Drama|Thriller,5
58891,400,1196,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Drama|Sci-Fi|War,5
58900,400,3114,Toy Story 2 (1999),Animation|Children's|Comedy,5
58913,400,1036,Die Hard (1988),Action|Thriller,5
58910,400,2762,"Sixth Sense, The (1999)",Thriller,5
58908,400,2599,Election (1999),Comedy,5
58912,400,2918,Ferris Bueller's Day Off (1986),Comedy,5
58926,400,1242,Glory (1989),Action|Drama|War,5
58915,400,50,"Usual Suspects, The (1995)",Crime|Thriller,5
58914,400,2858,American Beauty (1999),Comedy|Drama,5


## Genre Counts

In [24]:
genres_counts(recommendations_ua)

{'Animation': 1,
 'Comedy': 3,
 'Thriller': 2,
 'Drama': 4,
 'Action': 3,
 'Adventure': 3,
 'Fantasy': 1,
 'Sci-Fi': 1,
 'Crime': 1,
 'Romance': 1,
 'Mystery': 1,
 'War': 1}

In [25]:
genres_counts(recommendations_cnn_sncf)

{'Action': 3,
 'Adventure': 1,
 'Fantasy': 1,
 'Sci-Fi': 1,
 'Drama': 7,
 'Animation': 1,
 'Comedy': 1,
 'Thriller': 2,
 'Crime': 1,
 'Film-Noir': 1,
 'War': 2}

In [26]:
genres_counts(recommendations_svd)

{'Action': 3,
 'Drama': 4,
 'War': 2,
 'Thriller': 2,
 'Adventure': 2,
 'Comedy': 5,
 'Romance': 1,
 'Sci-Fi': 1,
 'Animation': 1,
 "Children's": 1}

In [27]:
genres_counts(about_user)

{'Drama': 4,
 'Thriller': 4,
 'Action': 3,
 'Adventure': 1,
 'Sci-Fi': 1,
 'War': 2,
 'Animation': 1,
 "Children's": 1,
 'Comedy': 4,
 'Crime': 1}