In [1]:
import pandas as pd
import pickle

train_df = pd.read_csv("../data/processed/train_ratings.csv")
movies = pd.read_csv("../data/raw/movies.csv")

with open("../models/svd_model.pkl", "rb") as f:
    svd = pickle.load(f)

print("Data and model loaded ")


Data and model loaded 


In [2]:
def get_user_liked_movies(user_id, train_df, min_rating=4.0):
    liked = train_df[
        (train_df["userId"] == user_id) &
        (train_df["rating"] >= min_rating)
    ]
    return liked["movieId"].tolist()


In [3]:
def get_movie_genres(movie_id, movies_df):
    genres = movies_df[movies_df["movieId"] == movie_id]["genres"].values
    if len(genres) == 0:
        return []
    return genres[0].lower().split("|")


In [4]:
def generate_explanation(user_id, recommended_movie_id):
    liked_movies = get_user_liked_movies(user_id, train_df)
    rec_genres = set(get_movie_genres(recommended_movie_id, movies))
    
    reasons = []
    
    for movie_id in liked_movies:
        liked_genres = set(get_movie_genres(movie_id, movies))
        common = rec_genres.intersection(liked_genres)
        
        if len(common) > 0:
            title = movies[movies["movieId"] == movie_id]["title"].values[0]
            reasons.append((title, list(common)))
    
    if len(reasons) == 0:
        return "Recommended based on your overall viewing preferences."
    
    explanation = "Recommended because you liked "
    parts = []
    
    for title, genres in reasons[:2]:
        parts.append(f"'{title}' (shared genres: {', '.join(genres)})")
    
    explanation += " and ".join(parts)
    return explanation


In [5]:
def get_unseen_movies(user_id, train_df, movies_df):
    seen_movies = train_df[train_df["userId"] == user_id]["movieId"].unique()
    all_movies = movies_df["movieId"].unique()
    return list(set(all_movies) - set(seen_movies))


def recommend_top_k(user_id, k=10):
    unseen_movies = get_unseen_movies(user_id, train_df, movies)
    
    predictions = []
    for movie_id in unseen_movies:
        pred = svd.predict(user_id, movie_id)
        predictions.append((movie_id, pred.est))
    
    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions[:k]


In [6]:
user_id = 10
top_10 = recommend_top_k(user_id, k=5)

for movie_id, score in top_10:
    title = movies[movies["movieId"] == movie_id]["title"].values[0]
    explanation = generate_explanation(user_id, movie_id)
    
    print(f" {title}")
    print(f" Predicted Rating: {round(score,2)}")
    print(f" {explanation}")
    print("-" * 60)


 Boot, Das (Boat, The) (1981)
 Predicted Rating: 4.54
 Recommended because you liked 'Intouchables (2011)' (shared genres: drama) and 'Love Actually (2003)' (shared genres: drama)
------------------------------------------------------------
 Spotlight (2015)
 Predicted Rating: 4.23
 Recommended because you liked 'Casino Royale (2006)' (shared genres: thriller) and 'Skyfall (2012)' (shared genres: thriller)
------------------------------------------------------------
 Jaws (1975)
 Predicted Rating: 4.21
 Recommended because you liked 'Batman Begins (2005)' (shared genres: action) and 'Dark Knight, The (2008)' (shared genres: action)
------------------------------------------------------------
 Blood Simple (1984)
 Predicted Rating: 4.18
 Recommended because you liked 'Intouchables (2011)' (shared genres: drama) and 'Love Actually (2003)' (shared genres: drama)
------------------------------------------------------------
 Crumb (1994)
 Predicted Rating: 4.17
 Recommended based on your ov

Explainability Strategy:
For each recommended movie, explanations are generated by identifying previously liked movies with overlapping genres. This provides transparent, user-understandable reasoning behind recommendations, improving trust and interpretability.