In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [30]:
user_id = 582

In [3]:
def load_and_preprocess_data():
    # Load data
    movie_data = pd.read_csv('data/movies_grouplens/movies.dat',
                             delimiter='::', header=None, engine='python', encoding='latin1')
    rating_data = pd.read_csv('data/movies_grouplens/ratings.dat',
                              delimiter='::', header=None, engine='python', encoding='latin1')
    user_data = pd.read_csv('data/movies_grouplens/users.dat',
                            delimiter='::', header=None, engine='python', encoding='latin1')

    # Set column names
    movie_data.columns = ['MovieID', 'Title', 'Genres']
    rating_data.columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
    user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

    # Merge datasets
    df = pd.merge(rating_data, movie_data, on='MovieID', how='left')
    df = pd.merge(df, user_data, on='UserID', how='left')

    # Convert categorical variables
    df['Gender'] = df['Gender'].map({'M': 0, 'F': 1})

    # Create index mappings
    users = df['UserID'].unique()
    movies = df['MovieID'].unique()

    user_to_idx = {user: idx for idx, user in enumerate(users)}
    movie_to_idx = {movie: idx for idx, movie in enumerate(movies)}

    # Add index columns
    df['User_idx'] = df['UserID'].map(user_to_idx)
    df['Movie_idx'] = df['MovieID'].map(movie_to_idx)

    # Normalize age and occupation values to start from 0
    df['Age'] = df['Age'] - df['Age'].min()
    df['Occupation'] = df['Occupation'] - df['Occupation'].min()

    return df, user_to_idx, movie_to_idx

In [4]:
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
device = torch.device('cuda') if torch.cuda.is_available() else device

In [5]:
ncf_cnn_model = torch.jit.load(
    'models/NCF_CNN_Enchanced.pt', map_location=device)

In [6]:
data, user_to_idx, movie_to_idx = load_and_preprocess_data()
idx_to_movie = {v: k for k, v in movie_to_idx.items()}
idx_to_user = {v: k for k, v in user_to_idx.items()}

In [31]:
def get_recommendations(user_id, model, data, user_to_idx, idx_to_movie, device, path='data/movies_grouplens/users.dat'):
    user_data = pd.read_csv(path,
                            delimiter='::', header=None, engine='python', encoding='latin1')

    user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']
    user_idx = user_to_idx[user_id]

    user_data['Gender'] = user_data['Gender'].map({'M': 0, 'F': 1})
    user_data['Age'] = user_data['Age'] - user_data['Age'].min()
    user_data['Occupation'] = user_data['Occupation'] - \
        user_data['Occupation'].min()

    ud = user_data[(user_data['UserID'] == user_id)].values
    not_watched = []
    list_ratings = []

    movie_idxs = np.arange(data['Movie_idx'].min(),
                           data['Movie_idx'].max()+1, 1)

    for movie in movie_idxs:
        if (not len(data[(data['User_idx'] == user_idx) & (data['Movie_idx'] == movie)]['Movie_idx'].to_list())):
            not_watched.append(movie)
        # else:
        #     print(f'Already Wacthed {movie}')

    for movie in not_watched:
        movie_features = ['Title', 'Genres']
        title, genres = data.iloc[data[data['Movie_idx']
                                       == movie].index[0]][movie_features]
        gender, age, occupation = ud[0][1:4]
        # print(gender, age, occupation)
        rating = model(torch.tensor([user_idx]).to(device), torch.tensor([movie]).to(device), torch.tensor(
            [gender]).to(device), torch.tensor([age]).to(device), torch.tensor([occupation]).to(device))

        list_ratings.append([user_id, idx_to_movie
                             [movie], title,
                            genres, gender, age, occupation, rating.item()])


# Define the column names
    column_names = ['UserID', 'MovieID', 'Title',
                    'Genres', 'Gender', 'Age', 'Occupation', 'Rating']

    # Create a DataFrame
    df_list = pd.DataFrame(list_ratings, columns=column_names)
    return df_list

In [32]:
cf_recommendations = get_recommendations(user_id, ncf_cnn_model, data,
                                         user_to_idx, idx_to_movie, device)

## CBF


In [33]:
from src.modules.CBF import CBF
cbf = CBF()

In [34]:
cbf_recommendations = cbf.cbf_recommend_movies(user_id)

## Hybrid

In [11]:
hybrid_recommendations = cf_recommendations.merge(
    cbf_recommendations, on=['MovieID', 'Title', 'Genres'])

In [21]:
hybrid_recommendations['Ensemble_Rating'] = 0.5 * \
    hybrid_recommendations['Rating'] + 0.5*hybrid_recommendations['Score']

In [22]:
hybrid_recommendations.sort_values(by='Ensemble_Rating', ascending=False)

Unnamed: 0,UserID,MovieID,Title,Genres,Gender,Age,Occupation,Rating,Score,Ensemble_Rating
479,399,2324,Life Is Beautiful (La Vita è bella) (1997),Comedy|Drama,1,17,3,4.176987,3.670873,3.923930
6,399,2804,"Christmas Story, A (1983)",Comedy|Drama,1,17,3,4.048002,3.670873,3.859438
566,399,1276,Cool Hand Luke (1967),Comedy|Drama,1,17,3,3.890202,3.670873,3.780537
798,399,3897,Almost Famous (2000),Comedy|Drama,1,17,3,3.859765,3.670873,3.765319
139,399,3451,Guess Who's Coming to Dinner (1967),Comedy|Drama,1,17,3,3.744151,3.670873,3.707512
...,...,...,...,...,...,...,...,...,...,...
3012,399,1039,Synthetic Pleasures (1995),Documentary,1,17,3,2.341964,0.000000,1.170982
3464,399,977,Moonlight Murder (1936),Mystery,1,17,3,2.340078,0.000000,1.170039
2819,399,3149,Diamonds (1999),Mystery,1,17,3,2.338234,0.000000,1.169117
1318,399,416,Bad Girls (1994),Western,1,17,3,2.322509,0.000000,1.161254


In [40]:
genres_list = list(data[(data['UserID'] == user_id) &
                   (data['Rating'] > 4)]['Genres'].values)

In [41]:
genres = [gen.split('|') for gen in genres_list]
genres = [i for gen in genres for i in gen]

In [42]:
from collections import Counter

In [43]:
counts = Counter(genres)

In [44]:
counts

Counter({'Drama': 17,
         'Comedy': 12,
         'Thriller': 11,
         'Action': 9,
         'Sci-Fi': 8,
         'Adventure': 4,
         'Animation': 2,
         "Children's": 2,
         'Horror': 2,
         'Romance': 2,
         'Crime': 2,
         'War': 2,
         'Film-Noir': 1,
         'Fantasy': 1,
         'Western': 1,
         'Musical': 1})