In [1]:
import pandas as pd
import numpy as np
import os
import pickle
import sys

# Add project root
PROJECT_ROOT = os.path.abspath("..")
sys.path.append(PROJECT_ROOT)

# CF engines
from src.cf_engines import predict_ratings_user_cf, predict_ratings_item_cf
from sklearn.metrics.pairwise import cosine_similarity

# Paths
PROCESSED_DIR = "../data/processed"
ratings_file = os.path.join(PROCESSED_DIR, "ratings_processed.csv")
movies_file = os.path.join(PROCESSED_DIR, "movies_processed.csv")
sparse_file = os.path.join(PROCESSED_DIR, "user_item_matrix.pkl")


In [2]:
# Load movies and ratings
ratings = pd.read_csv(ratings_file)
movies = pd.read_csv(movies_file)

# Load sparse user-item matrix
with open(sparse_file, "rb") as f:
    user_item_matrix = pickle.load(f)

n_users, n_items = user_item_matrix.shape

# Compute user similarity for User CF
user_sim_matrix = cosine_similarity(user_item_matrix)


In [3]:
# Compute movie popularity (count of ratings)
popularity_df = ratings.groupby("movie_idx")["rating"].count().sort_values(ascending=False).reset_index()
popularity_df = popularity_df.merge(movies, on="movie_idx", how="left")


In [6]:
def recommend_movies(user_idx, top_n=10, k=5, min_rated_user=5, min_rated_item=2):
    """
    Returns top-N movie recommendations for a user.
    
    Logic:
    - If user has enough ratings (>= min_rated_user), use User CF
    - Else if user has at least min_rated_item, use Item CF
    - Else, use Popularity
    """
    # Count how many movies user has rated
    user_rated_count = user_item_matrix[user_idx].getnnz()
    
    if user_rated_count >= min_rated_user:
        # Use User CF
        print(f"Using User CF for user {user_idx}")
        recommendations = predict_ratings_user_cf(
            user_idx=user_idx,
            user_item_matrix=user_item_matrix,
            user_sim_matrix=user_sim_matrix,
            movies_df=movies,
            top_n=top_n,
            k=k
        )
    elif user_rated_count >= min_rated_item:
        # Use Item CF
        print(f"Using Item CF for user {user_idx}")
        # Compute item similarity on the fly
        item_sim_matrix = cosine_similarity(user_item_matrix.T)
        recommendations = predict_ratings_item_cf(
            user_idx=user_idx,
            user_item_matrix=user_item_matrix,
            item_sim_matrix=item_sim_matrix,
            movies_df=movies,
            top_n=top_n,
            k=k
        )
    else:
        # Use Popularity
        print(f"Using Popularity-based recommendations for user {user_idx}")
        recommendations = popularity_df.head(top_n)[["movie_idx", "title"]]
    
    return recommendations



In [7]:
# Example: Test multiple users
for user in [0, 5, 50, 150, 200]:
    print(f"\nTop 10 recommendations for User {user}:")
    display(recommend_movies(user_idx=user, top_n=10, k=5))




Top 10 recommendations for User 0:
Using User CF for user 0


Unnamed: 0,movie_idx,title
49,357,Star Wars (1977)
55,216,Pulp Fiction (1994)
99,49,Fargo (1996)
167,367,Monty Python and the Holy Grail (1974)
193,34,"Sting, The (1973)"
203,247,Back to the Future (1985)
209,347,Indiana Jones and the Last Crusade (1989)
215,166,When Harry Met Sally... (1989)
434,622,Butch Cassidy and the Sundance Kid (1969)
731,366,Dave (1993)



Top 10 recommendations for User 5:
Using User CF for user 5


Unnamed: 0,movie_idx,title
63,239,"Shawshank Redemption, The (1994)"
116,140,"Rock, The (1996)"
190,254,Amadeus (1984)
215,166,When Harry Met Sally... (1989)
233,29,Jaws (1975)
514,58,"Boot, Das (1981)"
519,230,"Great Escape, The (1963)"
565,476,Clear and Present Danger (1994)
658,488,Arsenic and Old Lace (1944)
662,550,Being There (1979)



Top 10 recommendations for User 50:
Using User CF for user 50


Unnamed: 0,movie_idx,title
285,289,"English Patient, The (1996)"
293,95,Liar Liar (1997)
314,325,Apt Pupil (1998)
322,144,Dante's Peak (1997)
324,331,Crash (1996)
327,98,Conspiracy Theory (1997)
330,608,"Edge, The (1997)"
346,184,Wag the Dog (1997)
688,890,"Jackal, The (1997)"
1293,1220,Ayn Rand: A Sense of Life (1997)



Top 10 recommendations for User 150:
Using User CF for user 150


Unnamed: 0,movie_idx,title
55,216,Pulp Fiction (1994)
123,873,Lone Star (1996)
136,148,Big Night (1996)
179,571,Apocalypse Now (1979)
237,329,Raising Arizona (1987)
356,240,One Flew Over the Cuckoo's Nest (1975)
473,5,Dr. Strangelove or: How I Learned to Stop Worr...
477,186,"Philadelphia Story, The (1940)"
495,99,It's a Wonderful Life (1946)
653,496,Chinatown (1974)



Top 10 recommendations for User 200:
Using User CF for user 200


Unnamed: 0,movie_idx,title
116,140,"Rock, The (1996)"
221,12,Star Trek: First Contact (1996)
251,350,"Lost World: Jurassic Park, The (1997)"
257,157,Contact (1997)
299,652,Air Force One (1997)
404,136,Mission: Impossible (1996)
470,120,Courage Under Fire (1996)
865,445,Michael (1996)
1011,644,Private Parts (1997)
1046,336,Multiplicity (1996)


In [8]:
# Interactive input
user_input = int(input("Enter user ID: "))
top_n = int(input("Enter number of recommendations: "))
display(recommend_movies(user_idx=user_input, top_n=top_n, k=5))


Enter user ID:  30
Enter number of recommendations:  4


Using User CF for user 30


Unnamed: 0,movie_idx,title
69,423,Four Weddings and a Funeral (1994)
124,552,Phenomenon (1996)
237,329,Raising Arizona (1987)
327,98,Conspiracy Theory (1997)
