In [2]:
# This notebook is used essentially to create a lab work that looks into explaining the concepts of user based and item based collaborative filtering
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
# Define a small dataset with user-item ratings
data = {
    'userId': [1, 1, 1, 2, 2, 3, 3, 4],
    'movieId': [101, 102, 103, 101, 102, 103, 104, 104],
    'rating': [5, 3, 2, 4, 5, 1, 4, 3]
}

In [3]:
# Create DataFrame
ratings_df = pd.DataFrame(data)

# Create the User-Item matrix
user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Display the matrix
print("User-Item Matrix:")
print(user_item_matrix)

User-Item Matrix:
movieId  101  102  103  104
userId                     
1        5.0  3.0  2.0  0.0
2        4.0  5.0  0.0  0.0
3        0.0  0.0  1.0  4.0
4        0.0  0.0  0.0  3.0


In [4]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)

# Convert to DataFrame for readability
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)


In [5]:
# Function to recommend items for a given user
def recommend_user_based(user_id, num_recommendations=2):
    # Find the most similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]
    
    # Average the ratings of the similar users for unseen movies
    similar_users_ratings = user_item_matrix.loc[similar_users].mean().sort_values(ascending=False)
    
    # Filter out movies the user has already rated
    user_rated_movies = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] > 0].index
    recommendations = similar_users_ratings.drop(user_rated_movies)
    
    return recommendations.head(num_recommendations)

In [13]:
# Recommend 2 movies for user with user_id = 3
user_based_recommendations = recommend_user_based(3, 2)
print("\nUser-Based Recommendations for User 3:")
print(user_based_recommendations)


User-Based Recommendations for User 3:
movieId
101    3.000000
102    2.666667
dtype: float64


**ITEM BASED COLLABORATIVE FILTERING**
Calculate similarity between items using user ratings. Recommend items similar to those the user has liked.

**ADVANTAGES OF ITEM BASED**
1. More stable and computationally efficient compared to UBCF because the number of items is often smaller than the number of users.
2. More robust to the cold-start problem because it does not require as many ratings from a specific user to generate recommendations.


In [7]:
# Compute cosine similarity between items (movies)
item_similarity = cosine_similarity(user_item_matrix.T)

In [8]:
# Convert to DataFrame for readability
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)


In [9]:
# Function to recommend items based on items the user has already rated (item-based filtering)
def recommend_item_based(user_id, num_recommendations=5):
    # Get the movies the user has rated
    user_ratings = user_item_matrix.loc[user_id]
    
    # For each rated movie, find similar movies
    similar_items = []
    for movie in user_ratings[user_ratings > 0].index:
        similar_items.append(item_similarity_df[movie])
    
    # Concatenate the list of Series into a single Series
    similar_items = pd.concat(similar_items)
    
    # Average the similarity scores and drop movies the user has already rated
    similar_items = similar_items.groupby(similar_items.index).mean().sort_values(ascending=False)
    recommendations = similar_items.drop(user_ratings[user_ratings > 0].index)
    
    return recommendations.head(num_recommendations)


In [15]:
# Recommend 2 movies for user with user_id = 1
item_based_recommendations = recommend_item_based(3, 5)
print("\nItem-Based Recommendations for User 1:")
print(item_based_recommendations)


Item-Based Recommendations for User 1:
movieId
101    0.349215
102    0.230089
dtype: float64


**Matrix Factorization for Recommendations**
Matrix Factorization is a technique used to discover latent factors in user-item interactions. It breaks down the user-item matrix into two smaller matrices, representing users and items in a reduced-dimensional latent space.

Decompose the user-item matrix  into matrices  (user matrix) and  (item matrix).



In [11]:
import numpy as np
from sklearn.decomposition import TruncatedSVD #SVD means singular value decomposition
# Create DataFrame and User-Item Matrix
ratings_df = pd.DataFrame(data)
user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Perform Matrix Factorization using SVD
svd = TruncatedSVD(n_components=2)
user_features = svd.fit_transform(user_item_matrix)
item_features = svd.components_

# Reconstructed matrix
reconstructed_matrix = np.dot(user_features, item_features)
reconstructed_df = pd.DataFrame(reconstructed_matrix, index=user_item_matrix.index, columns=user_item_matrix.columns)
print(reconstructed_df)

movieId       101       102       103       104
userId                                         
1        4.388569  3.927760  0.995440  0.198161
2        4.583866  4.111559  0.969534 -0.191094
3        0.078199 -0.021528  0.730837  4.046982
4       -0.028789 -0.091625  0.506652  2.908573
