In [None]:
# 1. Basic Setup and Imports

In [1]:
# 1. Import common libraries
import numpy as np
import pandas as pd

# 2. Import PyTorch
import torch
import torch.nn as nn # Neural network module
import torch.optim as optim # Adam or SGD.

# Check PyTorch version and if GPU is available
print(f"PyTorch version: {torch.__version__}")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

PyTorch version: 2.5.1
Using device: cpu


# 2. Creating a Toy Dataset for Recommendations

In [2]:
# Let's define the number of users, items, and interactions
num_users = 5
num_items = 6
num_interactions = 20

# Generate random user-item interactions
np.random.seed(42)
user_ids = np.random.randint(0, num_users, size=num_interactions)
item_ids = np.random.randint(0, num_items, size=num_interactions)

# Let's create some synthetic rating or "interaction" score
ratings = np.random.randint(1, 6, size=num_interactions)  # rating from 1 to 5

# Combine into a Pandas DataFrame for clarity
df = pd.DataFrame({
    'user_id': user_ids,
    'item_id': item_ids,
    'rating': ratings
})

df.head(10)

Unnamed: 0,user_id,item_id,rating
0,3,1,3
1,4,5,5
2,2,4,1
3,4,3,2
4,4,0,4
5,1,0,1
6,2,2,4
7,2,2,2
8,2,1,2
9,4,3,1


# 3. Simple Matrix Factorization Model (PyTorch)
## 3.1 Model Architecture
- User Embedding: Maps each user to a latent vector (e.g., 8 dimensions).
- Item Embedding: Maps each item to a latent vector (8 dimensions).
- We predict the rating by taking the dot product of these two vectors.

In [3]:
class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=8):
        super().__init__()
        self.user_emb = nn.Embedding(num_users, embedding_dim)
        self.item_emb = nn.Embedding(num_items, embedding_dim)
        
        # Initialize embeddings for stability
        nn.init.normal_(self.user_emb.weight, std=0.01)
        nn.init.normal_(self.item_emb.weight, std=0.01)

    def forward(self, user_ids, item_ids):
        # user_ids, item_ids are [batch_size]
        user_vectors = self.user_emb(user_ids)
        item_vectors = self.item_emb(item_ids)
        # Dot product across embedding dim
        preds = torch.sum(user_vectors * item_vectors, dim=1)
        return preds

# 4. Preparing Data for PyTorch
We need tensors for users, items, and ratings. Then we’ll create a minimal DataLoader for batch processing.

## 4.1 Convert DataFrame to Tensors

In [4]:
# Convert DataFrame columns to tensors
user_ids_tensor = torch.tensor(df['user_id'].values, dtype=torch.long)
item_ids_tensor = torch.tensor(df['item_id'].values, dtype=torch.long)
ratings_tensor  = torch.tensor(df['rating'].values, dtype=torch.float)

# Combine into a TensorDataset
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(user_ids_tensor, item_ids_tensor, ratings_tensor)

# Create a DataLoader for batching
batch_size = 4
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# 5. Training Loop
## 5.1 Initialize the Model and Optimizer

In [5]:
model = MatrixFactorization(num_users=num_users, num_items=num_items, embedding_dim=8)
model = model.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


## 5.2. The Training Process

In [6]:
epochs = 5

for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0

    for users, items, ratings in loader:
        # Move data to GPU if available
        users = users.to(device)
        items = items.to(device)
        ratings = ratings.to(device)

        # 1. Zero the gradients
        optimizer.zero_grad()

        # 2. Forward pass
        preds = model(users, items)

        # 3. Compute the loss
        loss = criterion(preds, ratings)

        # 4. Backward pass
        loss.backward()

        # 5. Update parameters
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(loader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")


Epoch [1/5], Loss: 10.6472
Epoch [2/5], Loss: 10.6096
Epoch [3/5], Loss: 10.5049
Epoch [4/5], Loss: 10.3094
Epoch [5/5], Loss: 9.9749


### Step-by-step:

- Zero gradients to avoid accumulation from previous batches.
- Forward pass: Model predicts ratings from user/item embeddings.
- Calculate loss: MSELoss between predicted and actual ratings.
- Backward pass: Compute gradients using loss.backward().
- Step optimizer: Update parameters according to the gradients.

# 6. Quick Evaluation

In [7]:
model.eval()
with torch.no_grad():
    # Let's evaluate predictions on the entire dataset
    preds = model(
        user_ids_tensor.to(device),
        item_ids_tensor.to(device)
    )
    mse = criterion(preds, ratings_tensor.to(device)).item()
    print(f"Final MSE on entire dataset: {mse:.4f}")


Final MSE on entire dataset: 9.6940


What’s Next?
Proper Train/Test Split

Evaluating on the entire dataset (the same one you trained on) does not give a true measure of generalization.
You need to split your data into training and test sets (and possibly a validation set):
python
Copy
# Example split (using scikit-learn):
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Then create separate train_loader and test_loader
Train on the train set, then evaluate on the test set to see how well your model generalizes to unseen data.
Tune Hyperparameters

Increase embedding dimension: Instead of embedding_dim=8, try 16, 32, etc.
Adjust learning rate: Start with 1e-3 or 1e-4.
Change the optimizer: e.g., Adam vs. SGD.
Increase epochs and watch for overfitting or underfitting.
Use a More Meaningful Dataset

If you’re using real user interaction data (with many more users and items), you may see different MSE behaviors.
Real recommendation tasks often also measure ranking metrics (Precision@k, Recall@k, NDCG).
Implement a Ranking Evaluation

For many recommendation systems, the ranking of items (rather than absolute rating predictions) is the key.
Consider implementing metrics like Precision@k, Recall@k, or NDCG.
This usually involves generating top-N item predictions per user and comparing them to the user’s actual next interactions.
Regularization

If MSE is high or if the model is overfitting, you may want to add weight decay or embedding regularization.
E.g., in PyTorch’s Adam optimizer:
python
Copy
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
Try a More Complex Model

A simple matrix factorization is a good starting point.
Next steps could include a neural network (MLP) that uses user and item embeddings plus additional features (like text embeddings from product descriptions or user demographic info).

In a rating prediction context (like the toy example we used), the model’s “result” is a predicted rating for each (user, item) pair. However, in a recommendation system, you often use those predicted ratings to rank items and provide the Top-N recommended items for each user.

In other words, once your matrix factorization model is trained:

You can input a user ID and one (or many) item IDs.
The model outputs a single rating score or relevance score for each user–item combination.
From there, you typically:

Generate predictions for all items (or a subset) for a specific user.
Sort the items by the predicted score (descending).
Select the top-N as the “best recommended” items.
Example: Getting Top-3 Items for a Given User
Below is a quick illustration of how you might produce a “top-3” recommendation list once your model is trained. Suppose we want recommendations for a user with ID u. We’ll:

Create a list of all item IDs.
Predict the scores for (u, item_i).
Sort the items by the prediction.
Return the top 3