In [1]:
import os

import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.sparse import hstack
import faiss

import pickle

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [2]:
with open('data/metadata_vector_matrix_all.pkl', 'rb') as f:
    metadata_matrix_all = pickle.load(f)

with open('data/overview_embedding_matrix.pkl', 'rb') as f:
    overview_matrix = pickle.load(f)

with open('data/models/best_SVD_model.pkl', 'rb') as f:
    svd = pickle.load(f)

df_movies = pd.read_csv('data/prepared_movies.csv')
df_ratings = pd.read_csv('data/prepared_ratings.csv', nrows=100)

1. Check if indices from metadata and overview are alinged

In [3]:
with open('data/metadata_indices.pkl', 'rb') as f:
    metadata_indices = pickle.load(f)

with open('data/overview_indices.pkl', 'rb') as f:
    overview_indices = pickle.load(f)

# Compare the two objects using the Pandas equals() method
if metadata_indices.equals(overview_indices):
    print("The contents of metadata_indices.pkl and overview_indices.pkl are the same.")
    indices = overview_indices
else:
    print("The contents of metadata_indices.pkl and overview_indices.pkl are different.")

The contents of metadata_indices.pkl and overview_indices.pkl are the same.


2. Stacking matrices

In [4]:
combined_matrix_all = hstack([metadata_matrix_all.astype(np.float32), np.array(overview_matrix).astype(np.float32)])
combined_matrix_dense = combined_matrix_all.toarray()
print("Shape of metadata matrix:", metadata_matrix_all.shape)
print("Shape of overview matrix:", overview_matrix.shape)
print("Shape of combined matrix:", combined_matrix_dense.shape)

Shape of metadata matrix: (16960, 42151)
Shape of overview matrix: (16960, 768)
Shape of combined matrix: (16960, 42919)


3. Building FAISS Index

In [5]:
faiss.normalize_L2(combined_matrix_dense)
embedding_dimension = combined_matrix_dense.shape[1]
faiss_index = faiss.IndexFlatIP(embedding_dimension)
faiss_index.add(combined_matrix_dense)
print(f"Faiss index created with {faiss_index.ntotal} vectors.")

Faiss index created with 16960 vectors.


In [6]:
# Function to find movie index 
def find_movie_index(title, indices_map):
    if title in indices_map:
        idx = indices_map[title]
        return idx.iloc[0] if isinstance(idx, pd.Series) else idx
    return None

In [7]:
def get_hybrid_recommendations(user_id, title, n=10, c=50):
    # Find the index for the input title
    idx = find_movie_index(title, indices)

    # Get the metadata vector for the query movie
    query_vector = combined_matrix_dense[idx].reshape(1, -1)

    # Search the Faiss index for the k nearest neighbors
    # We search for c+1 because the first result will be the movie itself
    distances, movie_indices = faiss_index.search(query_vector, c + 1)

    # Filter out invalid indices (-1 can be returned by Faiss) and ensure bounds
    valid_mask = (movie_indices[0] != -1) & (movie_indices[0] < len(df_movies))
    movie_indices = movie_indices[0][valid_mask]

    # Exclude self-match if present
    self_mask = (movie_indices != int(idx))
    movie_indices = movie_indices[self_mask]

    # Use 'id' for SVD predictions
    candidates = df_movies.iloc[movie_indices][['title', 'id']].copy()
    candidates['est_rating'] = candidates['id'].apply(lambda x: svd.predict(user_id, x).est)
    
    return candidates.sort_values('est_rating', ascending=False)[['title', 'est_rating']].head(n)

4. Sample Recommendations

In [9]:
id_example = df_ratings['userId'].iloc[0]
print('\n--- Top Picks For You (based on "Toy Story") ---')
display(get_hybrid_recommendations(id_example, "Toy Story"))


--- Top Picks For You (based on "Toy Story") ---


Unnamed: 0,title,est_rating
6529,Toy Story 3,3.732684
1656,Toy Story 2,3.593566
2293,"Monsters, Inc.",3.568384
1348,A Bug's Life,3.240117
14420,Finding Dory,3.199661
9141,Toy Story of Terror!,3.187554
15212,Lorenzo,3.130228
9506,Dug's Special Mission,3.103729
10366,The Madagascar Penguins in a Christmas Caper,3.083525
10177,Small Fry,3.061372
