## CF

In [19]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [18]:
data = pd.read_csv('data/movies_grouplens/ratings.dat',
                   delimiter='::', header=None, engine='python', encoding='latin1')
data.columns = ['user', 'event', 'rating', 'timestamp']
data.head()

Unnamed: 0,user,event,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [20]:
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
device = torch.device('cuda') if torch.cuda.is_available() else device

In [21]:
ncf_cnn_model = torch.jit.load('models/NCF_CNN.pt', map_location=device)

In [22]:
users = data['user'].unique()
events = data['event'].unique()

In [23]:
user_to_idx = {user: idx for idx, user in enumerate(users)}
event_to_idx = {event: idx for idx, event in enumerate(events)}
idx_to_user = {idx: user for user,
               idx in user_to_idx.items()}  # For reverse lookup
idx_to_event = {idx: event for event,
                idx in event_to_idx.items()}  # For reverse lookup

for key, value in user_to_idx.items():
    if value == 0:
        print(f'User: {key}, assigned index: {value}')

for key, value in event_to_idx.items():
    if value == 0:
        print(f'Event: {key}, assigned index: {value}')

User: 1, assigned index: 0
Event: 1193, assigned index: 0


In [46]:
event_to_idx[2080]

608

In [28]:
data['user_idx'] = data['user'].map(user_to_idx)
data['event_idx'] = data['event'].map(event_to_idx)

In [41]:
import torch
import pandas as pd
import numpy as np


def cf_get_recommendations(user_id, data, model, idx_to_event, idx_to_user, user_to_idx, movie_data, k=10):
    model.eval()
    list_ratings = []

    # Get user index from user_to_idx
    user_idx = user_to_idx[user_id]
    print(f"User Index for {user_id}: {user_idx}")

    # Create a list of all event/movie indices
    event_idx = np.arange(
        np.min(data['event_idx']), np.max(data['event_idx'])+1, 1)

    # Iterate over all events (movies)
    for event in event_idx:
        # For each event, predict the rating for this user
        rating = model(torch.tensor([user_idx]).to(device),
                       torch.tensor([event]).to(device))

        # Append the user_id, event/movie, and the predicted rating
        list_ratings.append(
            [user_id, idx_to_event[int(event)], event, rating.item()])

    # Sort the list of ratings in descending order by predicted rating
    recommendations = sorted(list_ratings, key=lambda x: x[3], reverse=True)

    # Convert the recommendations list to a pandas DataFrame
    recommendations_df = pd.DataFrame(recommendations, columns=[
                                      'UserID', 'MovieTitle', 'MovieID', 'PredictedRating'])

    # Merge the recommendations with the movie details (from movie_data)
    recommendations_df = recommendations_df.merge(
        movie_data[['MovieID', 'Title', 'Genres']], on='MovieID', how='left')

    # Return top k recommendations as a DataFrame
    return recommendations_df[['UserID', 'MovieID', 'Title', 'Genres', 'PredictedRating']].head(k)

In [52]:
top_recommendations_df = cf_get_recommendations(
    600, data, ncf_cnn_model, idx_to_event, idx_to_user, user_to_idx, movie_data, k=10)

# Display the top recommendations DataFrame
print(top_recommendations_df)

User Index for 600: 599
   UserID  MovieID                                              Title  \
0     600      259                               Kiss of Death (1995)   
1     600      321  Strawberry and Chocolate (Fresa y chocolate) (...   
2     600       23                                   Assassins (1995)   
3     600      167                               Feast of July (1995)   
4     600      127  Silence of the Palace, The (Saimt el Qusur) (1...   
5     600       47                               Seven (Se7en) (1995)   
6     600      669                                   Aparajito (1956)   
7     600      128                              Jupiter's Wife (1994)   
8     600     1449                         Waiting for Guffman (1996)   
9     600     2231                                    Rounders (1998)   

                 Genres  PredictedRating  
0  Crime|Drama|Thriller         4.557754  
1                 Drama         4.548601  
2              Thriller         4.539856  


## CBF

In [12]:
import pickle
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

with open('models/CBF/tfidf_matrix.pkl', 'rb') as f:
    tfidf_matrix = pickle.load(f)

with open('models/CBF/tfidf_vectorizer.pkl', 'rb') as f:
    tfidf = pickle.load(f)

with open('models/CBF/user_profiles.pkl', 'rb') as f:
    user_profiles = pickle.load(f)

In [13]:
movie_data = pd.read_csv('data/movies_grouplens/movies.dat',
                         delimiter='::', header=None, engine='python', encoding='latin1')
rating_data = pd.read_csv('data/movies_grouplens/ratings.dat',
                          delimiter='::', header=None, engine='python', encoding='latin1')
user_data = pd.read_csv('data/movies_grouplens/users.dat',
                        delimiter='::', header=None, engine='python', encoding='latin1')

In [14]:
movie_data.columns = ['MovieID', 'Title', 'Genres']
rating_data.columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
user_data.columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

In [15]:
user_movie_data = rating_data.merge(movie_data, on='MovieID')

In [36]:
def cbf_recommend_movies(user_id, top_n=5):

    if user_id not in user_profiles:
        print(f"No profile found for UserID {user_id}.")
        return None

    user_profile = user_profiles[user_id]

    similarity_scores = cosine_similarity(
        [user_profile], tfidf_matrix).flatten()

    scaled_scores = similarity_scores * 5

    movie_scores = pd.DataFrame({
        'MovieID': movie_data['MovieID'],
        'Score': scaled_scores
    })

    watched_movie_ids = []
    movie_scores = movie_scores[~movie_scores['MovieID'].isin(
        watched_movie_ids)]

    top_recommendations = movie_scores.sort_values(
        by='Score', ascending=False).head(top_n)

    top_recommendations = top_recommendations.merge(
        movie_data, on='MovieID', how='left')

    return top_recommendations[['MovieID', 'Title', 'Genres', 'Score']]

In [48]:
cbf_recommend_movies(582, 10)

Unnamed: 0,MovieID,Title,Genres,Score
0,1648,"House of Yes, The (1997)",Comedy|Drama|Thriller,4.112984
1,3227,"Not Love, Just Frenzy (Más que amor, frenesí) ...",Comedy|Drama|Thriller,4.112984
2,2699,Arachnophobia (1990),Action|Comedy|Sci-Fi|Thriller,3.797093
3,1876,Deep Impact (1998),Action|Drama|Sci-Fi|Thriller,3.568429
4,1861,Junk Mail (1997),Comedy|Thriller,3.513449
5,1764,Tainted (1998),Comedy|Thriller,3.513449
6,2806,Teaching Mrs. Tingle (1999),Comedy|Thriller,3.513449
7,3893,Nurse Betty (2000),Comedy|Thriller,3.513449
8,2349,Mona Lisa (1986),Comedy|Thriller,3.513449
9,1824,Homegrown (1998),Comedy|Thriller,3.513449


In [49]:
user_movie_data[user_movie_data['UserID'] == 582]

Unnamed: 0,UserID,MovieID,Rating,Timestamp,Title,Genres
88175,582,2987,5,981996257,Who Framed Roger Rabbit? (1988),Adventure|Animation|Film-Noir
88176,582,2050,3,975958919,Herbie Goes Bananas (1980),Adventure|Children's|Comedy
88177,582,3793,5,981996257,X-Men (2000),Action|Sci-Fi
88178,582,3798,4,981996233,What Lies Beneath (2000),Thriller
88179,582,3005,5,981995843,"Bone Collector, The (1999)",Thriller
...,...,...,...,...,...,...
88248,582,2959,5,976891686,Fight Club (1999),Drama
88249,582,86,5,981996257,White Squall (1996),Adventure|Drama
88250,582,2976,1,981996331,Bringing Out the Dead (1999),Drama|Horror
88251,582,1093,5,981995909,"Doors, The (1991)",Drama|Musical
