## LightFM fits in as a hybrid/matrix factorization model

In [21]:
import pandas as pd
import pickle
from scipy.sparse import csr_matrix
import os

PROCESSED_DIR = "../data/processed"
movies_file = os.path.join(PROCESSED_DIR, "movies_processed.csv")
sparse_file = os.path.join(PROCESSED_DIR, "user_item_matrix.pkl")

# Load movies
movies = pd.read_csv(movies_file)

# Load user-item sparse matrix
with open(sparse_file, "rb") as f:
    user_item_matrix = pickle.load(f)

# Optionally, compute user similarity for User CF
from sklearn.metrics.pairwise import cosine_similarity
user_sim_matrix = cosine_similarity(user_item_matrix)


In [22]:
import sys
PROJECT_ROOT = os.path.abspath("..")  # notebook is in notebooks/
sys.path.append(PROJECT_ROOT)

from src.cf_engines import predict_ratings_user_cf, predict_ratings_item_cf


In [23]:
ratings = pd.read_csv(ratings_file)
movies = pd.read_csv(movies_file)

print("Ratings shape:", ratings.shape)
print("Movies shape:", movies.shape)


Ratings shape: (100000, 7)
Movies shape: (1682, 5)


In [24]:
# Initialize LightFM Dataset
lfm_dataset = LightFMDataset()
lfm_dataset.fit(
    users=ratings['user_idx'].unique(),
    items=ratings['movie_idx'].unique(),
    user_features=None,     # Optional: 'age', 'occupation' etc.
    item_features=None      # Optional: 'genre' etc.
)

# Build interaction matrix
(interactions, weights) = lfm_dataset.build_interactions(
    [(row.user_idx, row.movie_idx, row.rating) for row in ratings.itertuples()]
)

print("Interactions shape:", interactions.shape)


Interactions shape: (943, 1682)


In [25]:
# Use WARP loss for ranking
model = LightFM(loss='warp', random_state=42)
model.fit(interactions, epochs=30, num_threads=4)  # adjust epochs if needed


<lightfm.lightfm.LightFM at 0x16bc19610>

In [26]:
# Precision and Recall at top 10
precisions = precision_at_k(model, interactions, k=10).mean()
recalls = recall_at_k(model, interactions, k=10).mean()

print(f"LightFM Precision@10: {precisions:.4f}")
print(f"LightFM Recall@10: {recalls:.4f}")


LightFM Precision@10: 0.7716
LightFM Recall@10: 0.1302


In [27]:
def recommend_lightfm(user_idx, model, dataset, movies_df, top_n=10):
    # Get number of items
    n_items = dataset.interactions_shape()[1]
    
    # Predict scores for all items for the given user
    scores = model.predict(user_idx, np.arange(n_items))
    
    # Get top-N item indices
    top_items = np.argsort(-scores)[:top_n]
    
    # Map back to movie IDs
    recommendations = movies_df[movies_df["movie_idx"].isin(top_items)]
    
    return recommendations[["movie_idx", "title"]]



In [28]:
# Example: user 5
recommend_lightfm(5, model, lfm_dataset, movies, top_n=10)

Unnamed: 0,movie_idx,title
0,24,Toy Story (1995)
49,357,Star Wars (1977)
68,191,Forrest Gump (1994)
97,31,"Silence of the Lambs, The (1991)"
171,161,"Empire Strikes Back, The (1980)"
173,101,Raiders of the Lost Ark (1981)
180,52,Return of the Jedi (1983)
203,247,Back to the Future (1985)
422,57,E.T. the Extra-Terrestrial (1982)
495,99,It's a Wonderful Life (1946)


In [29]:
def recommend_movies_hybrid(user_idx, top_n=10, k=5, min_rated_user=5, min_rated_item=2):
    user_rated_count = user_item_matrix[user_idx].getnnz()
    
    if user_rated_count >= min_rated_user:
        print(f"User CF for user {user_idx}")
        return predict_ratings_user_cf(user_idx, user_item_matrix, user_sim_matrix, movies, top_n, k)
    elif user_rated_count >= min_rated_item:
        print(f"Item CF for user {user_idx}")
        item_sim_matrix = cosine_similarity(user_item_matrix.T)
        return predict_ratings_item_cf(user_idx, user_item_matrix, item_sim_matrix, movies, top_n, k)
    else:
        print(f"LightFM for cold-start user {user_idx}")
        return recommend_lightfm(user_idx, model, lfm_dataset, movies, top_n)


In [30]:
for user in [0, 5, 50, 150, 200]:
    print(f"\nTop {10} recommendations for User {user}:")
    display(recommend_movies_hybrid(user, top_n=10, k=5))



Top 10 recommendations for User 0:
User CF for user 0


Unnamed: 0,movie_idx,title
49,357,Star Wars (1977)
55,216,Pulp Fiction (1994)
99,49,Fargo (1996)
167,367,Monty Python and the Holy Grail (1974)
193,34,"Sting, The (1973)"
203,247,Back to the Future (1985)
209,347,Indiana Jones and the Last Crusade (1989)
215,166,When Harry Met Sally... (1989)
434,622,Butch Cassidy and the Sundance Kid (1969)
731,366,Dave (1993)



Top 10 recommendations for User 5:
User CF for user 5


Unnamed: 0,movie_idx,title
63,239,"Shawshank Redemption, The (1994)"
116,140,"Rock, The (1996)"
190,254,Amadeus (1984)
215,166,When Harry Met Sally... (1989)
233,29,Jaws (1975)
514,58,"Boot, Das (1981)"
519,230,"Great Escape, The (1963)"
565,476,Clear and Present Danger (1994)
658,488,Arsenic and Old Lace (1944)
662,550,Being There (1979)



Top 10 recommendations for User 50:
User CF for user 50


Unnamed: 0,movie_idx,title
285,289,"English Patient, The (1996)"
293,95,Liar Liar (1997)
314,325,Apt Pupil (1998)
322,144,Dante's Peak (1997)
324,331,Crash (1996)
327,98,Conspiracy Theory (1997)
330,608,"Edge, The (1997)"
346,184,Wag the Dog (1997)
688,890,"Jackal, The (1997)"
1293,1220,Ayn Rand: A Sense of Life (1997)



Top 10 recommendations for User 150:
User CF for user 150


Unnamed: 0,movie_idx,title
55,216,Pulp Fiction (1994)
123,873,Lone Star (1996)
136,148,Big Night (1996)
179,571,Apocalypse Now (1979)
237,329,Raising Arizona (1987)
356,240,One Flew Over the Cuckoo's Nest (1975)
473,5,Dr. Strangelove or: How I Learned to Stop Worr...
477,186,"Philadelphia Story, The (1940)"
495,99,It's a Wonderful Life (1946)
653,496,Chinatown (1974)



Top 10 recommendations for User 200:
User CF for user 200


Unnamed: 0,movie_idx,title
116,140,"Rock, The (1996)"
221,12,Star Trek: First Contact (1996)
251,350,"Lost World: Jurassic Park, The (1997)"
257,157,Contact (1997)
299,652,Air Force One (1997)
404,136,Mission: Impossible (1996)
470,120,Courage Under Fire (1996)
865,445,Michael (1996)
1011,644,Private Parts (1997)
1046,336,Multiplicity (1996)
