# Matrix Factorization in PyTorch

This notebook demonstrates how to perform matrix factorization using PyTorch. It creates synthetic user-item rating data, defines a matrix factorization model using embedding layers, and trains the model with a simple stochastic gradient descent loop.

In [1]:
import numpy as np
from scipy.sparse import rand as sprand
import torch

# Create synthetic ratings data
n_users = 1000
n_items = 1000

# Create a sparse random matrix with 1% density
ratings = sprand(n_users, n_items, density=0.01, format="csr")
# Replace the non-zero entries with random integer ratings from 1 to 4
ratings.data = np.random.randint(1, 5, size=ratings.nnz).astype(np.float64)
ratings = ratings.toarray()

print("Ratings shape:", ratings.shape)

Ratings shape: (1000, 1000)


In [2]:
import torch.nn as nn

class MatrixFactorization(nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super(MatrixFactorization, self).__init__()
        # User and item embeddings represent the latent factors
        self.user_factors = nn.Embedding(n_users, n_factors, sparse=True)
        self.item_factors = nn.Embedding(n_items, n_factors, sparse=True)

    def forward(self, user, item):
        # Dot product of user and item latent vectors
        return (self.user_factors(user) * self.item_factors(item)).sum(1)

# Instantiate the model
model = MatrixFactorization(n_users, n_items, n_factors=20)
print(model)

MatrixFactorization(
  (user_factors): Embedding(1000, 20, sparse=True)
  (item_factors): Embedding(1000, 20, sparse=True)
)


In [3]:
# Define the loss function and optimizer
loss_func = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)

# Get the indices of the non-zero ratings
rows, cols = ratings.nonzero()
# Shuffle the non-zero indices
p = np.random.permutation(len(rows))
rows, cols = rows[p], cols[p]

print(f"Total training samples: {len(rows)}")

# Training loop: iterate over each non-zero rating
for row, col in zip(rows, cols):
    optimizer.zero_grad()
    
    # Convert the rating and indices to PyTorch tensors
    rating = torch.FloatTensor([ratings[row, col]])
    user = torch.LongTensor([row])
    item = torch.LongTensor([col])
    
    # Forward pass: predict the rating
    prediction = model(user, item)
    
    # Compute the loss
    loss = loss_func(prediction, rating)
    
    # Backward pass
    loss.backward()
    
    # Update parameters
    optimizer.step()

print("Training complete.")

  from .autonotebook import tqdm as notebook_tqdm


Total training samples: 10000
Training complete.
