## Importing Libraries

In [159]:
import torch
import pandas as pd
import numpy as np
from collections import Counter
from src.modules.CBF import CBF

## Preprocessing and model loading

In [160]:
cbf = CBF()

In [161]:
def get_recommendations(user_id, model, data, user_to_idx, idx_to_movie, device, path='data/movies_grouplens/users.dat'):
    user_data = pd.read_csv(path,
                            delimiter='::', header=None, engine='python', encoding='latin1')

    user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']
    user_idx = user_to_idx[user_id]

    user_data['Gender'] = user_data['Gender'].map({'M': 0, 'F': 1})
    user_data['Age'] = user_data['Age'] - user_data['Age'].min()
    user_data['Occupation'] = user_data['Occupation'] - \
        user_data['Occupation'].min()

    ud = user_data[(user_data['UserID'] == user_id)].values
    not_watched = []
    list_ratings = []

    movie_idxs = np.arange(data['Movie_idx'].min(),
                           data['Movie_idx'].max()+1, 1)

    for movie in movie_idxs:
        if (not len(data[(data['User_idx'] == user_idx) & (data['Movie_idx'] == movie)]['Movie_idx'].to_list())):
            not_watched.append(movie)
        # else:
        #     print(f'Already Wacthed {movie}')

    for movie in not_watched:
        movie_features = ['Title', 'Genres']
        title, genres = data.iloc[data[data['Movie_idx']
                                       == movie].index[0]][movie_features]
        gender, age, occupation = ud[0][1:4]
        # print(gender, age, occupation)
        rating = model(torch.tensor([user_idx]).to(device), torch.tensor([movie]).to(device), torch.tensor(
            [gender]).to(device), torch.tensor([age]).to(device), torch.tensor([occupation]).to(device))

        list_ratings.append([user_id, idx_to_movie
                             [movie], title,
                            genres, gender, age, occupation, rating.item()])


# Define the column names
    column_names = ['UserID', 'MovieID', 'Title',
                    'Genres', 'Gender', 'Age', 'Occupation', 'Rating']

    # Create a DataFrame
    df_list = pd.DataFrame(list_ratings, columns=column_names)
    return df_list

In [162]:
def genres_counts(data):
    flat_genres = [genre for sublist in data['Genres'].apply(
        lambda x: x.split('|')).values.flatten() for genre in sublist]
    genre_counts = dict(Counter(flat_genres))
    return genre_counts

In [163]:
def load_and_preprocess_data():
    # Load data
    movie_data = pd.read_csv('data/movies_grouplens/movies.dat',
                             delimiter='::', header=None, engine='python', encoding='latin1')
    rating_data = pd.read_csv('data/movies_grouplens/ratings.dat',
                              delimiter='::', header=None, engine='python', encoding='latin1')
    user_data = pd.read_csv('data/movies_grouplens/users.dat',
                            delimiter='::', header=None, engine='python', encoding='latin1')

    # Set column names
    movie_data.columns = ['MovieID', 'Title', 'Genres']
    rating_data.columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
    user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

    # Merge datasets
    df = pd.merge(rating_data, movie_data, on='MovieID', how='left')
    df = pd.merge(df, user_data, on='UserID', how='left')

    # Convert categorical variables
    df['Gender'] = df['Gender'].map({'M': 0, 'F': 1})

    # Create index mappings
    users = df['UserID'].unique()
    movies = df['MovieID'].unique()

    user_to_idx = {user: idx for idx, user in enumerate(users)}
    movie_to_idx = {movie: idx for idx, movie in enumerate(movies)}

    # Add index columns
    df['User_idx'] = df['UserID'].map(user_to_idx)
    df['Movie_idx'] = df['MovieID'].map(movie_to_idx)

    # Normalize age and occupation values to start from 0
    df['Age'] = df['Age'] - df['Age'].min()
    df['Occupation'] = df['Occupation'] - df['Occupation'].min()

    return df, user_to_idx, movie_to_idx

In [164]:
device = torch.device(
    'mps' if torch.backends.mps.is_available() else 'cpu')
device = torch.device('cuda') if torch.cuda.is_available() else device

In [165]:
con_aware_NCF = torch.jit.load(
    'models/NCF_CNN_Enchanced.pt', map_location=device)

In [166]:
ncf_cnn = torch.jit.load('models/NCF_CNN.pt', map_location=device)

In [167]:
data_ua, user_to_idx_ua, movie_to_idx_ua = load_and_preprocess_data()


user_data = pd.read_csv(
    'data/movies_grouplens/users.dat', delimiter='::', header=None, engine='python', encoding='latin1'
)
user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

user_data['Gender'] = user_data['Gender'].map({'M': 0, 'F': 1})
user_data['Age'] = user_data['Age'] - user_data['Age'].min()
user_data['Occupation'] = user_data['Occupation'] - \
    user_data['Occupation'].min()

In [168]:
data_cnn_ncf = pd.read_csv('data/movies_grouplens/ratings.dat',
                           delimiter='::', header=None, engine='python', encoding='latin1')
data_cnn_ncf.columns = ['user', 'event', 'rating', 'timestamp']

users = data_cnn_ncf['user'].unique()
events = data_cnn_ncf['event'].unique()

user_to_idx = {user: idx for idx, user in enumerate(users)}
event_to_idx = {event: idx for idx, event in enumerate(events)}
idx_to_user = {idx: user for user,
               idx in user_to_idx.items()}  # For reverse lookup
idx_to_event = {idx: event for event,
                idx in event_to_idx.items()}  # For reverse lookup

data_cnn_ncf['user_idx'] = data_cnn_ncf['user'].map(user_to_idx)
data_cnn_ncf['event_idx'] = data_cnn_ncf['event'].map(event_to_idx)

In [169]:
movie_data = pd.read_csv('data/movies_grouplens/movies.dat',
                         delimiter='::', header=None, engine='python', encoding='latin1')
movie_data.columns = ['MovieID', 'Title', 'Genres']
movie_data['Genres'] = movie_data['Genres'].fillna('')

## NCF with CNN

In [170]:
def get_cnn_sncf_recommendations(user_id, data, model, device, idx_to_event, idx_to_user, user_to_idx, k=10):
    model.eval()
    user_idx = user_to_idx[user_id]

    all_events = np.arange(data['event_idx'].min(),
                           data['event_idx'].max() + 1)

    watched_events = set(
        data[data['user_idx'] == user_idx]['event_idx'].unique())

    not_watched = np.setdiff1d(all_events, list(watched_events))

    user_idx_tensor = torch.tensor(
        [user_idx] * len(not_watched), device=device, dtype=torch.long)
    event_idx_tensor = torch.tensor(
        not_watched, device=device, dtype=torch.long)

    with torch.no_grad():
        ratings = model(user_idx_tensor, event_idx_tensor)

    ratings = ratings.flatten()

    sorted_indices = torch.argsort(ratings, descending=True)
    top_k_indices = sorted_indices[:k]

    top_k_events = event_idx_tensor[top_k_indices]

    top_k_ratings = ratings[top_k_indices]

    recommendations = [
        [user_id, idx_to_event[int(event)], float(rating)]
        for event, rating in zip(top_k_events, top_k_ratings)
    ]

    return recommendations

## User-Aware NCF with CNN

In [171]:
def get_ua_recommendations(user_id, model, data, user_to_idx, movie_to_idx, device, user_data):
    idx_to_movie = {v: k for k, v in movie_to_idx.items()}

    user_row = user_data[user_data['UserID'] == user_id].iloc[0]
    gender, age, occupation = user_row['Gender'], user_row['Age'], user_row['Occupation']

    user_idx = user_to_idx[user_id]
    all_movies = np.arange(data['Movie_idx'].min(),
                           data['Movie_idx'].max() + 1)

    watched_movies = set(
        data[data['User_idx'] == user_idx]['Movie_idx'].unique())

    not_watched = np.setdiff1d(all_movies, list(watched_movies))

    not_watched_data = data[data['Movie_idx'].isin(
        not_watched)][['Movie_idx', 'Title', 'Genres']].drop_duplicates()

    user_idx_tensor = torch.tensor(
        [user_idx] * len(not_watched), device=device)
    movie_idx_tensor = torch.tensor(
        not_watched_data['Movie_idx'].values, device=device)
    gender_tensor = torch.tensor([gender] * len(not_watched), device=device)
    age_tensor = torch.tensor([age] * len(not_watched), device=device)
    occupation_tensor = torch.tensor(
        [occupation] * len(not_watched), device=device)

    with torch.no_grad():
        ratings = model(user_idx_tensor, movie_idx_tensor,
                        gender_tensor, age_tensor, occupation_tensor)

    not_watched_data['UserID'] = user_id
    not_watched_data['Gender'] = gender
    not_watched_data['Age'] = age
    not_watched_data['Occupation'] = occupation
    not_watched_data['Rating'] = ratings.cpu().numpy()

    column_order = ['UserID', 'Movie_idx', 'Title',
                    'Genres', 'Gender', 'Age', 'Occupation', 'Rating']
    result = not_watched_data[column_order].rename(
        columns={'Movie_idx': 'MovieID'})
    result['MovieID'] = result['MovieID'].apply(lambda x: idx_to_movie[x])
    return result

## CBF

In [172]:
def get_cbf_recommendations(user_id, k=10):
    return cbf.cbf_recommend_movies(user_id)

## Hybrid Recommendations

In [173]:
def get_hybrid_recommendations(alpha, ua_res, cbf_res):
    hybrid_recommendations = ua_res.merge(
        cbf_res, on=['MovieID', 'Title', 'Genres'])
    hybrid_recommendations['Ensemble_Rating'] = alpha * \
        hybrid_recommendations['Rating'] + \
        (1-alpha)*hybrid_recommendations['Score']
    return hybrid_recommendations.sort_values(by='Ensemble_Rating', ascending=False)[
        ['MovieID', 'Title', 'Genres', 'Ensemble_Rating']]

## Recommendations

In [174]:
user_id = 582
k = 10
alpha = 0.5

In [175]:
recommendations_cnn_sncf = get_cnn_sncf_recommendations(
    user_id, data_cnn_ncf, ncf_cnn, device, idx_to_event, idx_to_user, user_to_idx)

In [176]:
recommendations_ua = get_ua_recommendations(user_id, con_aware_NCF, data_ua, user_to_idx_ua, movie_to_idx_ua,
                                            device, user_data).sort_values(by='Rating', ascending=False)

In [177]:
recommendations_ua

Unnamed: 0,UserID,MovieID,Title,Genres,Gender,Age,Occupation,Rating
135,582,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,0,17,4,4.820176
1202,582,216,Billy Madison (1995),Comedy,0,17,4,4.758090
93,582,110,Braveheart (1995),Action|Drama|War,0,17,4,4.756071
172,582,318,"Shawshank Redemption, The (1994)",Drama,0,17,4,4.752721
177,582,356,Forrest Gump (1994),Comedy|Romance|War,0,17,4,4.718955
...,...,...,...,...,...,...,...,...
896452,582,1843,Slappy and the Stinkers (1998),Children's|Comedy,0,17,4,2.960094
243842,582,2821,Male and Female (1919),Adventure|Drama,0,17,4,2.959841
263752,582,3748,"Match, The (1999)",Comedy|Romance,0,17,4,2.959471
983062,582,2909,"Five Wives, Three Secretaries and Me (1998)",Documentary,0,17,4,2.957237


In [178]:
recommendations_cbf = get_cbf_recommendations(user_id)

In [179]:
recommendations_cbf

Unnamed: 0,MovieID,Title,Genres,Score
0,1648,"House of Yes, The (1997)",Comedy|Drama|Thriller,4.112984
1,3227,"Not Love, Just Frenzy (Más que amor, frenesí) ...",Comedy|Drama|Thriller,4.112984
2,2699,Arachnophobia (1990),Action|Comedy|Sci-Fi|Thriller,3.797093
3,1876,Deep Impact (1998),Action|Drama|Sci-Fi|Thriller,3.568429
4,1861,Junk Mail (1997),Comedy|Thriller,3.513449
...,...,...,...,...
3878,77,Nico Icon (1995),Documentary,0.000000
3879,3880,"Ballad of Ramblin' Jack, The (2000)",Documentary,0.000000
3880,602,"Great Day in Harlem, A (1994)",Documentary,0.000000
3881,604,Criminals (1996),Documentary,0.000000


In [180]:
recommendations_hybrid = get_hybrid_recommendations(
    alpha, recommendations_ua, recommendations_cbf)

## Results

In [181]:
movie_ids = [rec[1] for rec in recommendations_cnn_sncf]
recommendations_cnn_sncf = movie_data[movie_data['MovieID'].isin(movie_ids)]
recommendations_cnn_sncf

Unnamed: 0,MovieID,Title,Genres
49,50,"Usual Suspects, The (1995)",Crime|Thriller
315,318,"Shawshank Redemption, The (1994)",Drama
523,527,Schindler's List (1993),Drama|War
589,593,"Silence of the Lambs, The (1991)",Drama|Thriller
847,858,"Godfather, The (1972)",Action|Crime|Drama
896,908,North by Northwest (1959),Drama|Thriller
910,922,Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),Film-Noir
1162,1178,Paths of Glory (1957),Drama|War
1180,1198,Raiders of the Lost Ark (1981),Action|Adventure
1189,1207,To Kill a Mockingbird (1962),Drama


In [182]:
recommendations_ua = recommendations_ua[['MovieID', 'Title', 'Genres']].head(k)

In [183]:
recommendations_cbf = recommendations_cbf[[
    'MovieID', 'Title', 'Genres']].head(k)

In [184]:
recommendations_hybrid.head(k)

Unnamed: 0,MovieID,Title,Genres,Ensemble_Rating
10,2324,Life Is Beautiful (La Vita è bella) (1997),Comedy|Drama,4.073208
30,1968,"Breakfast Club, The (1985)",Comedy|Drama,4.0166
0,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,3.959005
111,2431,Patch Adams (1998),Comedy|Drama,3.942214
136,2861,For Love of the Game (1999),Comedy|Drama,3.928423
249,1876,Deep Impact (1998),Action|Drama|Sci-Fi|Thriller,3.917119
170,1060,Swingers (1996),Comedy|Drama,3.909674
6,3147,"Green Mile, The (1999)",Drama|Thriller,3.907688
195,1784,As Good As It Gets (1997),Comedy|Drama,3.902006
203,2447,Varsity Blues (1999),Comedy|Drama,3.900584


## About User

In [185]:
about_user = data_ua[(data_ua['UserID'] == user_id) & (data_ua['Rating'] >= 4)
                     ].sort_values(by='Rating', ascending=False)[['UserID', 'MovieID', 'Title', 'Genres', 'Rating']].head(k)
about_user

Unnamed: 0,UserID,MovieID,Title,Genres,Rating
88175,582,2987,Who Framed Roger Rabbit? (1988),Adventure|Animation|Film-Noir,5
88177,582,3793,X-Men (2000),Action|Sci-Fi,5
88179,582,3005,"Bone Collector, The (1999)",Thriller,5
88184,582,3409,Final Destination (2000),Drama|Thriller,5
88181,582,2093,Return to Oz (1985),Adventure|Children's|Fantasy|Sci-Fi,5
88193,582,2676,Instinct (1999),Drama|Thriller,5
88191,582,3471,Close Encounters of the Third Kind (1977),Drama|Sci-Fi,5
88186,582,2478,Three Amigos! (1986),Comedy|Western,5
88190,582,3617,Road Trip (2000),Comedy,5
88199,582,2841,Stir of Echoes (1999),Thriller,5


## Genre Counts

In [186]:
genres_counts(recommendations_ua)

{'Action': 5,
 'Sci-Fi': 2,
 'Thriller': 3,
 'Comedy': 2,
 'Drama': 4,
 'War': 2,
 'Romance': 1,
 'Adventure': 3,
 'Fantasy': 1}

In [187]:
genres_counts(recommendations_cnn_sncf)

{'Crime': 2,
 'Thriller': 3,
 'Drama': 7,
 'War': 2,
 'Action': 2,
 'Film-Noir': 1,
 'Adventure': 1}

In [188]:
genres_counts(recommendations_cbf)

{'Comedy': 9, 'Drama': 3, 'Thriller': 10, 'Action': 2, 'Sci-Fi': 2}

In [191]:
genres_counts(recommendations_hybrid.head(k))

{'Comedy': 7, 'Drama': 9, 'Action': 2, 'Sci-Fi': 2, 'Thriller': 3}

In [190]:
genres_counts(about_user)

{'Adventure': 2,
 'Animation': 1,
 'Film-Noir': 1,
 'Action': 1,
 'Sci-Fi': 3,
 'Thriller': 4,
 'Drama': 3,
 "Children's": 1,
 'Fantasy': 1,
 'Comedy': 2,
 'Western': 1}