# Collaborative Filtering Recommender

This notebook implements collaborative filtering techniques for recommendations.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Set display options
pd.set_option('display.max_columns', None)
sns.set_style('whitegrid')

## Load Preprocessed Data

In [None]:
# Load user-item matrix
# user_item_matrix = pd.read_csv('../results/user_item_matrix.csv', index_col=0)
# print(f"User-Item Matrix shape: {user_item_matrix.shape}")
# user_item_matrix.head()

## User-Based Collaborative Filtering

In [None]:
# Calculate user similarity matrix
# user_similarity = cosine_similarity(user_item_matrix)
# user_similarity_df = pd.DataFrame(
#     user_similarity,
#     index=user_item_matrix.index,
#     columns=user_item_matrix.index
# )

# print(f"User similarity matrix shape: {user_similarity_df.shape}")
# user_similarity_df.head()

In [None]:
def get_user_recommendations(user_id, user_item_matrix, user_similarity, top_n=10):
    """
    Get top-N recommendations for a user using user-based collaborative filtering.
    
    Args:
        user_id: Target user ID
        user_item_matrix: User-item rating matrix
        user_similarity: User similarity matrix
        top_n: Number of recommendations
        
    Returns:
        List of recommended item IDs
    """
    # Get similar users
    similar_users = user_similarity[user_id].argsort()[::-1][1:]
    
    # Get items rated by similar users but not by target user
    user_ratings = user_item_matrix.loc[user_id]
    unrated_items = user_ratings[user_ratings == 0].index
    
    # Calculate predicted ratings
    predictions = {}
    for item in unrated_items:
        weighted_sum = 0
        similarity_sum = 0
        
        for similar_user in similar_users[:50]:  # Consider top 50 similar users
            if user_item_matrix.loc[similar_user, item] > 0:
                weighted_sum += user_similarity[user_id, similar_user] * user_item_matrix.loc[similar_user, item]
                similarity_sum += user_similarity[user_id, similar_user]
        
        if similarity_sum > 0:
            predictions[item] = weighted_sum / similarity_sum
    
    # Sort and return top-N
    recommendations = sorted(predictions.items(), key=lambda x: x[1], reverse=True)[:top_n]
    return [item for item, score in recommendations]

# Example usage
# user_id = user_item_matrix.index[0]
# recommendations = get_user_recommendations(user_id, user_item_matrix, user_similarity)
# print(f"Recommendations for user {user_id}: {recommendations}")

## Item-Based Collaborative Filtering

In [None]:
# Calculate item similarity matrix
# item_similarity = cosine_similarity(user_item_matrix.T)
# item_similarity_df = pd.DataFrame(
#     item_similarity,
#     index=user_item_matrix.columns,
#     columns=user_item_matrix.columns
# )

# print(f"Item similarity matrix shape: {item_similarity_df.shape}")
# item_similarity_df.head()

In [None]:
def get_item_recommendations(user_id, user_item_matrix, item_similarity, top_n=10):
    """
    Get top-N recommendations for a user using item-based collaborative filtering.
    
    Args:
        user_id: Target user ID
        user_item_matrix: User-item rating matrix
        item_similarity: Item similarity matrix
        top_n: Number of recommendations
        
    Returns:
        List of recommended item IDs
    """
    user_ratings = user_item_matrix.loc[user_id]
    rated_items = user_ratings[user_ratings > 0].index
    unrated_items = user_ratings[user_ratings == 0].index
    
    predictions = {}
    for item in unrated_items:
        weighted_sum = 0
        similarity_sum = 0
        
        for rated_item in rated_items:
            similarity = item_similarity[item, rated_item]
            weighted_sum += similarity * user_ratings[rated_item]
            similarity_sum += abs(similarity)
        
        if similarity_sum > 0:
            predictions[item] = weighted_sum / similarity_sum
    
    recommendations = sorted(predictions.items(), key=lambda x: x[1], reverse=True)[:top_n]
    return [item for item, score in recommendations]

# Example usage
# recommendations = get_item_recommendations(user_id, user_item_matrix, item_similarity)
# print(f"Item-based recommendations for user {user_id}: {recommendations}")

## Evaluate Recommendations

In [None]:
# Evaluate recommendation quality
# This is a placeholder for evaluation metrics
# You can implement metrics like Precision@K, Recall@K, NDCG, etc.

## Visualize Results

In [None]:
# Visualize similarity matrices
# fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# sns.heatmap(user_similarity_df.iloc[:20, :20], cmap='coolwarm', ax=axes[0])
# axes[0].set_title('User Similarity Matrix (Sample)')

# sns.heatmap(item_similarity_df.iloc[:20, :20], cmap='coolwarm', ax=axes[1])
# axes[1].set_title('Item Similarity Matrix (Sample)')

# plt.tight_layout()
# plt.show()

## Save Results

In [None]:
# Save similarity matrices
# user_similarity_df.to_csv('../results/user_similarity.csv')
# item_similarity_df.to_csv('../results/item_similarity.csv')
# print("Saved similarity matrices")