In [1]:
import pandas as pd
import pickle
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

train_df = pd.read_csv("../data/processed/train_ratings.csv")
movies = pd.read_csv("../data/raw/movies.csv")

with open("../models/svd_model.pkl", "rb") as f:
    svd = pickle.load(f)

print("Hybrid setup ready ")


Hybrid setup ready 


In [2]:
movies["genres_clean"] = movies["genres"].str.lower().str.replace("|", " ")

tfidf = TfidfVectorizer()
genre_matrix = tfidf.fit_transform(movies["genres_clean"])

genre_sim = cosine_similarity(genre_matrix)


In [3]:
def get_unseen_movies(user_id, train_df, movies_df):
    seen_movies = train_df[train_df["userId"] == user_id]["movieId"].unique()
    all_movies = movies_df["movieId"].unique()
    return list(set(all_movies) - set(seen_movies))


In [4]:
def hybrid_recommend(user_id, k=10, alpha=0.7):
    unseen_movies = get_unseen_movies(user_id, train_df, movies)
    scores = []

    for movie_id in unseen_movies:
        svd_score = svd.predict(user_id, movie_id).est
        
        movie_idx = movies[movies["movieId"] == movie_id].index[0]
        genre_score = genre_sim[movie_idx].mean()
        
        final_score = alpha * svd_score + (1 - alpha) * genre_score
        scores.append((movie_id, final_score))
    
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores[:k]


In [5]:
user_id = 10
hybrid_top10 = hybrid_recommend(user_id, k=5)

for movie_id, score in hybrid_top10:
    title = movies[movies["movieId"] == movie_id]["title"].values[0]
    print(title, round(score, 4))


Boot, Das (Boat, The) (1981) 3.2276
Spotlight (2015) 2.9942
Jaws (1975) 2.9804
Blood Simple (1984) 2.9544
Dallas Buyers Club (2013) 2.9483


Hybrid Recommendation Strategy:
A hybrid recommender was implemented by combining collaborative filtering scores from SVD with content-based genre similarity. This approach balances personalisation with diversity and reduces popularity bias.