# 04 Hybrid Model: Collaborative Filtering + NLP
Combine SVD predictions with content similarity.

In [None]:
import pandas as pd
from src.nlp_utils import compute_tfidf_matrix
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD

# Load data
inter = pd.read_csv('../data/interactions.csv')
prod = pd.read_csv('../data/products.csv')

# Train SVD
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(inter[['user_id', 'product_id', 'rating']], reader)
trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)

# NLP content similarity
tfidf_matrix, vectorizer = compute_tfidf_matrix(prod['description'])
content_sim = cosine_similarity(tfidf_matrix)

# Hybrid recommendation for a user
def hybrid_recommend(user_id, top_n=5):
    user_items = inter[inter['user_id'] == user_id]['product_id'].values
    product_scores = {}
    for pid in prod['product_id']:
        # SVD score
        svd_score = algo.predict(user_id, pid).est
        # Content-based: max sim to user’s history
        idx_pid = prod[prod['product_id'] == pid].index[0]
        sim_score = max([content_sim[idx_pid, prod[prod['product_id'] == hist].index[0]] for hist in user_items]) if len(user_items) > 0 else 0
        # Hybrid score (weighted sum)
        product_scores[pid] = 0.7 * svd_score + 0.3 * sim_score
    # Top-N recommendations
    recs = sorted(product_scores.items(), key=lambda x: x[1], reverse=True)[:top_n]
    return [pid for pid, score in recs]

print('Hybrid recommendations for user 1:', hybrid_recommend(1))