In [2]:
import torch
import torch.nn.functional as F

# User Based Collaborative Filtering

User-based collaborative filtering is a technique used to recommend items based on how similar users have rated them in the past. The basic idea is that if two users have rated items similarly in the past, they are likely to have similar preferences in the future.

Understanding the Process of Finding Top-K Similar Users
User Ratings Matrix: The ratings matrix (let's assume it looks something like this for illustration) represents ratings by each user for various items, where 0 indicates an unrated item:

User-Item Ratings Matrix:
```python
tensor([[4., 3., 0., 5., 5.],  # User 0
        [5., 0., 4., 4., 3.],  # User 1
        [4., 2., 0., 4., 5.],  # User 2
        [3., 0., 4., 4., 2.]]) # User 3
```

User Similarity Matrix: As you've provided, the user similarity matrix gives the cosine similarity between each pair of users:

User Similarity Matrix:
```python
tensor([[ 1.0000,  0.0000,  0.6547,  0.0000],  # User 0
        [ 0.0000,  1.0000, -0.4364,  0.5000],  # User 1
        [ 0.6547, -0.4364,  1.0000, -0.4364],  # User 2
        [ 0.0000,  0.5000, -0.4364,  1.0000]]) # User 3
        

Finding Similar Users
When calculating the top-K similar users for User 0, the following happens:

Identifying Similar Users:
The similarity scores for User 0 are [1.0000, 0.0000, 0.6547, 0.0000].
Here, 1.0000 is the self-similarity (User 0 to User 0), which is not considered for prediction.
The highest non-zero similarity score is 0.6547 (User 0 with User 2).
Top Similar Users: For User 0:

The next best similarity score after itself is 0.6547, indicating that User 2 is the most similar user.
Since the scores for User 1 and User 3 are 0.0000, they are not similar to User 0.
Handling Zeros in the Ratings
The presence of 0s in the ratings does not affect the calculation of similarity scores. Instead, it affects how predictions are made:

Unrated Items: The 0 values simply mean that User 0 has not rated those specific items. When making predictions, the algorithm focuses on how similar users (like User 2) rated those items.
Example of Prediction for User 0
If User 0 is being evaluated, the prediction process might work like this:

Top-K Similar Users: Identify the top-K similar users. For User 0, this might just be User 2 (if k=1 or k=2, depending on how many are available).

Weighted Ratings Calculation:

Look at the ratings of User 2 for items that User 0 has not rated. For example, if User 2 rated Item 2 as 5 and Item 3 as 3, those ratings will be considered in the prediction.
Final Prediction: The predicted rating for an unrated item by User 0 is calculated based on the ratings from the similar users (like User 2), weighted by their similarity scores.

In [3]:
# Sample data: rows are users, columns are items, and values are ratings (0 means no rating)
ratings_matrix = torch.tensor(
    [
        [4.0, 3.0, 0.0, 5.0, 0.0],
        [5.0, 0.0, 4.0, 4.0, 3.0],
        [0.0, 2.0, 0.0, 4.0, 5.0],
        [3.0, 0.0, 4.0, 0.0, 2.0],
    ],
    dtype=torch.float32,
)

In [8]:
# Step 1: Normalize ratings to handle different user rating scales
def normalize_ratings(ratings):
    # Calibrate ratings by removing mean rating for each user
    mean_ratings = ratings.sum(dim=1) / (ratings > 0).sum(dim=1).float()
    mean_ratings = mean_ratings.view(-1, 1)
    # Normalize ratings by subtracting mean ratings
    normalized_ratings = torch.where(
        ratings > 0, ratings - mean_ratings, torch.tensor(0.0)
    )
    return normalized_ratings, mean_ratings


# Normalize ratings matrix
# We see things as deviations from mean rating, unless there is no rating
normalized_ratings, user_means = normalize_ratings(ratings_matrix)
print("Normalized ratings:")
print(normalized_ratings)

Normalized ratings:
tensor([[ 0.0000, -1.0000,  0.0000,  1.0000,  0.0000],
        [ 1.0000,  0.0000,  0.0000,  0.0000, -1.0000],
        [ 0.0000, -1.6667,  0.0000,  0.3333,  1.3333],
        [ 0.0000,  0.0000,  1.0000,  0.0000, -1.0000]])


In [9]:
# Step 2: Calculate cosine similarity between users
def user_similarity_matrix(normalized_ratings):
    similarity = F.cosine_similarity(
        normalized_ratings.unsqueeze(1), normalized_ratings.unsqueeze(0), dim=2
    )
    return similarity


# User 0 is most similar to user 3, and least similar to user 2
user_similarity = user_similarity_matrix(normalized_ratings)
print("User similarity:")
print(user_similarity)

User similarity:
tensor([[ 1.0000,  0.0000,  0.6547,  0.0000],
        [ 0.0000,  1.0000, -0.4364,  0.5000],
        [ 0.6547, -0.4364,  1.0000, -0.4364],
        [ 0.0000,  0.5000, -0.4364,  1.0000]])


In [12]:
# Step 3: Make predictions based on similar users
def predict_ratings(ratings, similarity, k=2):
    # Set self-similarity to zero to avoid using the user's own ratings
    similarity.fill_diagonal_(0)

    # For each user, calculate weighted average of ratings from top-K similar users
    pred_ratings = torch.zeros_like(ratings)
    for user in range(ratings.size(0)):
        # Get top-K similar users for the current user
        top_k_users = torch.topk(similarity[user], k=k).indices
        similarity_scores = similarity[user, top_k_users]

        # Calculate weighted ratings for each item
        weighted_ratings = (similarity_scores.view(-1, 1) * ratings[top_k_users]).sum(
            dim=0
        )
        normalization_factor = similarity_scores.sum()

        # Only predict ratings where user has not rated
        pred_ratings[user] = torch.where(
            ratings[user] == 0,
            weighted_ratings / (normalization_factor + 1e-8),
            ratings[user],
        )

    return pred_ratings


# Step 4: Make predictions for all users
# Predict ratings for all users
predicted_ratings = predict_ratings(ratings_matrix, user_similarity, k=2)
print("Predicted ratings:")
print(predicted_ratings)

Predicted ratings:
tensor([[4., 3., 0., 5., 5.],
        [5., 0., 4., 4., 3.],
        [4., 2., 0., 4., 5.],
        [3., 0., 4., 4., 2.]])


In [13]:
# Step 5: Recommend items to a specific user
def recommend_items(pred_ratings, user_id, num_recommendations=2):
    # Get the predicted ratings for the user
    user_ratings = pred_ratings[user_id]
    # Recommend items with the highest predicted ratings that haven't been rated yet
    recommended_items = torch.topk(user_ratings, num_recommendations).indices
    return recommended_items


# Example: Get top-2 recommendations for user 0
user_id = 0
recommendations = recommend_items(predicted_ratings, user_id, num_recommendations=2)
print(f"Recommended items for user {user_id}: {recommendations.tolist()}")

Recommended items for user 0: [4, 3]
