In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import rand as sprand
import torch

In [2]:
# Make up some random explicit feedback ratings
# and convert to a numpy array
n_users = 1000
n_items = 1000
ratings = sprand(n_users, n_items, density=0.01, format="csr")
ratings.data = np.random.randint(0, 2, size=ratings.nnz).astype(np.float64)
ratings = ratings.toarray()

In [3]:
responses = [-1, 0, 1]
p = np.array([1, 5, 1])
m = 1000
n = 1000

# A binary response matrix.
ratings = np.random.choice(responses, size=m*n, p=p / p.sum()).reshape((m, n))
print(ratings)

[[ 0  0  0 ...  0  0  0]
 [ 1  0  0 ... -1  1  0]
 [ 0  0  0 ...  0  0  0]
 ...
 [ 0  0 -1 ...  0  1  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]]


In [4]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, n_factors, sparse=True)
        self.item_factors = torch.nn.Embedding(n_items, n_factors, sparse=True)

    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1)

In [5]:
model = MatrixFactorization(n_users, n_items, n_factors=20)

In [19]:
loss_func = torch.nn.CrossValidationLoss()

AttributeError: module 'torch.nn' has no attribute 'CrossValidationLoss'

In [7]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)  # learning rate

In [17]:
# Sort our data
rows, cols = ratings.nonzero()
p = np.random.permutation(len(rows))
rows, cols = rows[p][:10], cols[p][:10]

for row, col in zip(*(rows, cols)):
    # Set gradients to zero
    optimizer.zero_grad()
    
    # Turn data into tensors
    rating = torch.FloatTensor([ratings[row, col]])
    row = torch.LongTensor([row])
    col = torch.LongTensor([col])

    # Predict and calculate loss
    prediction = model(row, col)
#     print(torch.sign(prediction))
#     print(rating)
    loss = loss_func(torch.sign(prediction), rating)

    # Backpropagate
    loss.backward()

    # Update the parameters
    optimizer.step()

tensor([-1.], grad_fn=<SignBackward>)
tensor([1.], grad_fn=<SignBackward>)
tensor([-1.], grad_fn=<SignBackward>)
tensor([1.], grad_fn=<SignBackward>)
tensor([-1.], grad_fn=<SignBackward>)
tensor([1.], grad_fn=<SignBackward>)
tensor([-1.], grad_fn=<SignBackward>)
tensor([1.], grad_fn=<SignBackward>)
tensor([-1.], grad_fn=<SignBackward>)
tensor([1.], grad_fn=<SignBackward>)


In [141]:
model

MatrixFactorization(
  (user_factors): Embedding(1000, 20, sparse=True)
  (item_factors): Embedding(1000, 20, sparse=True)
)

In [17]:
class BiasedMatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, n_factors, sparse=True)
        self.item_factors = torch.nn.Embedding(n_items, n_factors, sparse=True)
        self.user_biases = torch.nn.Embedding(n_users, 1, sparse=True)
        self.item_biases = torch.nn.Embedding(n_items, 1, sparse=True)

    def forward(self, user, item):
        pred = self.user_biases(user) + self.item_biases(item)
        pred += (
            (self.user_factors(user) * self.item_factors(item))
            .sum(dim=1, keepdim=True)
        )
        return pred.squeeze()

In [75]:
reg_loss_func = torch.optim.SGD(model.parameters(), lr=1e-6, weight_decay=1e-5)

In [76]:
adagrad_loss = torch.optim.Adagrad(model.parameters(), lr=1e-6)

In [78]:
adagrad_loss

Adagrad (
Parameter Group 0
    eps: 1e-10
    initial_accumulator_value: 0
    lr: 1e-06
    lr_decay: 0
    weight_decay: 0
)

In [94]:
rows, cols = ratings.nonzero()
i = 0
for row, col in zip(*(rows, cols)):
    if (i < 1):
        print(row)
        print(col)
        
        # Turn data into tensors
        rating = torch.FloatTensor([ratings[row, col]])
        row = torch.LongTensor([row])
        col = torch.LongTensor([col])
        
        

        # Predict and calculate loss
        prediction = model(row, col)
        
    i += 1


0
0
tensor([-1.])
tensor([0])
tensor([0])


In [100]:
row = 0
col = 15


row = torch.LongTensor([row])
col = torch.LongTensor([col])
rating = torch.FloatTensor([ratings[row, col]])

prediction = model(row, col)

print(rating)
print(prediction)

tensor([-1.])
tensor([3.7548], grad_fn=<SumBackward1>)


In [88]:
ratings.nonzero()

(array([  0,   0,   0, ..., 999, 999, 999], dtype=int64),
 array([  0,  11,  15, ..., 990, 995, 996], dtype=int64))

In [92]:
ratings[999,996]

1

In [149]:
loss = torch.nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output.backward()

In [150]:
input

tensor([[ 0.9043,  0.2982,  0.4213,  0.5203,  2.3286],
        [-0.1868, -0.0185,  0.5554, -0.4583,  0.9682],
        [-1.3282,  0.4659, -0.8215, -0.2858,  0.4146]], requires_grad=True)

In [151]:
target

tensor([1, 3, 4])

In [156]:
torch.randn(3, 5, requires_grad=True)

tensor([[ 0.1274, -2.1894,  1.8774,  0.3813,  0.7972],
        [-0.3498,  0.0927, -1.1307,  0.7418,  0.4693],
        [-0.7773,  0.0807,  0.4787,  1.3048,  0.0729]], requires_grad=True)

In [157]:
torch.randn(3, 5).softmax(dim=1)

tensor([[0.3202, 0.2247, 0.1018, 0.2388, 0.1145],
        [0.5049, 0.0781, 0.0201, 0.2026, 0.1943],
        [0.0821, 0.1649, 0.0278, 0.4249, 0.3004]])

In [18]:
criterion = torch.nn.CrossEntropyLoss()

output = torch.randn(10, 120).float()
target = torch.FloatTensor(10).uniform_(0, 120).long()

loss = criterion(output, target)

In [15]:
output

tensor([[-0.7036, -0.4430, -1.2327,  ...,  1.3873, -0.0058,  0.7460],
        [ 1.3635,  0.8821, -0.0163,  ...,  0.8061, -0.1501,  0.7539],
        [-1.7296, -0.2423,  0.0781,  ...,  0.1397, -0.6892,  0.6489],
        ...,
        [ 1.2435, -0.8681,  0.9451,  ...,  0.3858, -0.2553,  1.9310],
        [-0.8657, -1.4592, -1.9062,  ...,  0.5828,  1.4260, -1.1177],
        [-1.1548, -0.0904, -1.6029,  ..., -1.9163,  0.0331,  1.1120]])

In [16]:
target

tensor([ 78,  48,  46,  26,  65,   1, 106,  59,  79,  96])

In [24]:
class MatrixFactorization:
  def __init__(self, R, k, lr=.0003, l2=.04, seed=777):
    self.R = tf.convert_to_tensor(R, dtype=tf.float32)
    self.mask = tf.not_equal(self.R, 0)
    self.m, self.n = R.shape
    self.k = k
    self.lr = lr
    self.l2 = l2
    self.tol = .001
    # Initialize trainable weights.
    self.weight_init = tf.random_normal_initializer(seed=seed)
    self.P = tf.Variable(self.weight_init((self.m, self.k)))
    self.Q = tf.Variable(self.weight_init((self.n, self.k)))

  def loss(self):
    raise NotImplementedError

  def grad_update(self):
    with tf.GradientTape() as t:
      t.watch([self.P, self.Q])
      self.current_loss = self.loss()
    gP, gQ = t.gradient(self.current_loss, [self.P, self.Q])
    self.P.assign_sub(self.lr * gP)
    self.Q.assign_sub(self.lr * gQ)

  def train(self, n_epoch=5000):
    for epoch in range(n_epoch):
      self.grad_update()
      if self.current_loss < self.tol:
        break

In [20]:
# Make missing more prevail.
responses = [-1, 0, 1]
p = np.array([1, 5, 1])
m = 5
n = 10

# A binary response matrix.
b_ratings = np.random.choice(responses, size=m*n, p=p / p.sum()).reshape((m, n))
print(b_ratings)

[[ 0  0  0  0  0  0  0  0  0  0]
 [-1  0  0  0  0  0  0  0 -1  0]
 [ 0 -1  0  0 -1  0  1 -1  0 -1]
 [ 0  1  1  0  0  1  0  0 -1  0]
 [ 0  0  0  0  1  0  0  0 -1  0]]


In [25]:
class BinaryMF(MatrixFactorization):
  def train(self, n_epoch=5000):
    # Cast 1/-1 as binary encoding of 0/1.
    self.labels = torch.cast(torch.not_equal(torch.boolean_mask(self.R, self.mask), -1), dtype=torch.float32)
    for epoch in range(n_epoch):
      self.grad_update()

  # The implementation is far from optimized since we don't need the product of entire P'Q.
  # We only need scores for non-missing entries.
  # The code is hence for educational purpose only.
  def loss(self):
    """Cross entropy loss."""
    logits = torch.boolean_mask(tf.matmul(self.P, self.Q, transpose_b=True), self.mask)
    logloss = torch.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=logits)
    mlogloss = torch.reduce_mean(logloss)
    l2_norm = torch.reduce_sum(self.P**2) + torch.reduce_sum(self.Q**2)
    return mlogloss + self.l2 * l2_norm

In [26]:
# We increase the learning a bit since logloss has a very different scale than squared error.
# For the same reason we decrease the L2 coefficient.
bmf_model = BinaryMF(b_ratings, k=3, lr=.03, l2=.0001)
bmf_model.train()

b_predictions = tf.sigmoid(tf.matmul(bmf_model.P, bmf_model.Q, transpose_b=True)).numpy()

b_mask = np.zeros_like(b_ratings)
b_mask[b_ratings.nonzero()] = 1

print(np.round(b_predictions * b_mask, 2)) # Check prediction on training entries.

NameError: name 'tf' is not defined