In [2]:
import numpy as np
import pandas as pd
import pickle

In [3]:
# Load preprocessed data
content_based_data = pd.read_csv('../data/processed/content_based_data.csv')
collaborative_filtering_data = pd.read_csv('../data/processed/collaborative_filtering_data.csv')

In [6]:
# Load the collaborative filtering model
with open('../models/collaborative_filtering_model.pkl', 'rb') as f:
    collaborative_filtering_model = pickle.load(f)

# Load precomputed cosine similarity matrix
with open('../models/cosine_similarity_matrix.pkl', 'rb') as f:
    cosine_sim = pickle.load(f)

In [7]:
# Hybrid recommendation function
def hybrid_recommendation(user_id, n=10, weight_content=0.5, weight_collab=0.5):
    # Get movies the user has already rated
    user_ratings = collaborative_filtering_data[collaborative_filtering_data['user_id'] == user_id]['item_id'].unique()
    all_movies = collaborative_filtering_data['item_id'].unique()

    # Get unseen movies
    unrated_movies = [movie for movie in all_movies if movie not in user_ratings]
    
    # Predict ratings using collaborative filtering
    collab_predictions = [collaborative_filtering_model.predict(user_id, movie_id).est for movie_id in unrated_movies]

    # Get content-based recommendations using precomputed cosine similarity
    content_predictions = [np.mean([cosine_sim[movie_idx][content_based_data[content_based_data['movie_id'] == movie].index[0]] 
                                   for movie_idx in user_ratings]) for movie in unrated_movies]
    
    # Hybrid prediction: weighted average of both predictions
    hybrid_predictions = [(weight_content * content_pred + weight_collab * collab_pred) 
                          for content_pred, collab_pred in zip(content_predictions, collab_predictions)]
    
    # Get top N recommended movies
    top_n_idx = np.argsort(hybrid_predictions)[-n:][::-1]
    top_n_movies = [unrated_movies[idx] for idx in top_n_idx]
    
    return top_n_movies

In [8]:
recommendations = hybrid_recommendation(user_id=196, n=10)
recommendations

[1189, 1639, 357, 134, 64, 1449, 1512, 1594, 178, 427]

In [10]:
# Get movie titles from recommendations
def get_movie_titles(movie_ids):
    return content_based_data[content_based_data['movie_id'].isin(movie_ids)]['movie_title'].tolist()

# Test the hybrid model for a user
recommendations = hybrid_recommendation(user_id=196, n=10)

# Get the recommended movie titles
recommended_movie_titles = get_movie_titles(recommendations)

# Print the recommended movies
for movie_title in recommended_movie_titles:
    print(movie_title)

Shawshank Redemption, The (1994)
Citizen Kane (1941)
12 Angry Men (1957)
One Flew Over the Cuckoo's Nest (1975)
To Kill a Mockingbird (1962)
Prefontaine (1997)
Pather Panchali (1955)
World of Apu, The (Apur Sansar) (1959)
Everest (1998)
Bitter Sugar (Azucar Amargo) (1996)
