# Recommender Systems 2018/19

### Practice session on MF PyTorch



In [1]:
from urllib.request import urlretrieve
import zipfile

# skip the download
#urlretrieve ("http://files.grouplens.org/datasets/movielens/ml-10m.zip", "data/Movielens_10M/movielens_10m.zip")
dataFile = zipfile.ZipFile("data/Movielens_10M/movielens_10m.zip")
URM_path = dataFile.extract("ml-10M100K/ratings.dat", path = "data/Movielens_10M")
URM_file = open(URM_path, 'r')


def rowSplit (rowString):
    
    split = rowString.split("::")
    split[3] = split[3].replace("\n","")
    
    split[0] = int(split[0])
    split[1] = int(split[1])
    split[2] = float(split[2])
    split[3] = int(split[3])
    
    result = tuple(split)
    
    return result


URM_file.seek(0)
URM_tuples = []

for line in URM_file:
   URM_tuples.append(rowSplit (line))

userList, itemList, ratingList, timestampList = zip(*URM_tuples)

userList = list(userList)
itemList = list(itemList)
ratingList = list(ratingList)
timestampList = list(timestampList)

import scipy.sparse as sps

URM_all = sps.coo_matrix((ratingList, (userList, itemList)))
URM_all = URM_all.tocsr()



from Notebooks_utils.data_splitter import train_test_holdout


URM_train, URM_test = train_test_holdout(URM_all, train_perc = 0.8)

### MF models rely upon latent factors for users and items which are called 'embeddings'

In [2]:
num_factors = 10

n_users, n_items = URM_train.shape

In [3]:
import torch

user_factors = torch.nn.Embedding(num_embeddings = n_users, embedding_dim = num_factors)
item_factors = torch.nn.Embedding(num_embeddings = n_items, embedding_dim = num_factors)

In [4]:
user_factors

Embedding(71568, 10)

In [5]:
item_factors

Embedding(65134, 10)

### To compute the prediction we have to multiply the user factors to the item factors, which is a linear operation.

### We define a single layer and an activation function, which takes the result and transforms it in the final prediction. The activation function can be used to restrict the predicted values (e.g., sigmoid is between 0 and 1)

In [7]:
layer_1 = torch.nn.Linear(in_features = num_factors, out_features = 1)

layer_1

Linear(in_features=10, out_features=1, bias=True)

In [9]:
activation_function = torch.nn.ReLU()

activation_function

ReLU()

## In order to compute the prediction you have to:
* Define a list of user and item indices
* Create a tensor from it
* Create a variable from the tensor
* Get the user and item embedding
* Compute the element-wise product of the embeddings
* Pass the element-wise product to the single layer network
* Pass the output of the single layer network to the activation function

In [23]:
from torch.autograd import Variable


item_index = [15]
user_index = [42]

user_index = torch.Tensor(user_index).type(torch.LongTensor)
item_index = torch.Tensor(item_index).type(torch.LongTensor)

user_index = Variable(user_index)
item_index = Variable(item_index)

current_user_factors = user_factors(user_index)
current_item_factors = item_factors(item_index)

element_wise_product = torch.mul(current_user_factors, current_item_factors)
element_wise_product

tensor([[ 0.0934,  0.3721, -0.0781, -1.2313,  0.8013, -0.5451, -0.7487,
         -1.1881,  0.0999,  0.8536]])

### To take the result of the prediction and transform it into a traditional numpy array you have to first call .detach() and then .numpy()
### The result is an array of 1 cell

In [26]:

prediction = layer_1(element_wise_product)
prediction = activation_function(prediction)

prediction_numpy = prediction.detach().numpy()

print("Prediction is {}".format(prediction_numpy))

Prediction is [[0.]]


# Train a MF MSE model with PyTorch

# Step 1 Create a Model python object

### The model should implement the forward function which computes the prediction as we did before

In [27]:

class MF_MSE_PyTorch_model(torch.nn.Module):

    def __init__(self, n_users, n_items, n_factors):

        super(MF_MSE_PyTorch_model, self).__init__()

        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors

        self.user_factors = torch.nn.Embedding(num_embeddings = self.n_users, embedding_dim = self.n_factors)
        self.item_factors = torch.nn.Embedding(num_embeddings = self.n_items, embedding_dim = self.n_factors)

        self.layer_1 = torch.nn.Linear(in_features = self.n_factors, out_features = 1)

        self.activation_function = torch.nn.ReLU()



    def forward(self, user_coordinates, item_coordinates):

        current_user_factors = self.user_factors(user_coordinates)
        current_item_factors = self.item_factors(item_coordinates)

        prediction = torch.mul(current_user_factors, current_item_factors)

        prediction = self.layer_1(prediction)
        prediction = self.activation_function(prediction)

        return prediction



    def get_W(self):

        return self.user_factors.weight.detach().cpu().numpy()


    def get_H(self):

        return self.item_factors.weight.detach().cpu().numpy()





# Step 2 Setup PyTorch devices and Data Reader

In [29]:
use_cuda = False

if use_cuda and torch.cuda.is_available():
    device = torch.device('cuda')
    print("MF_MSE_PyTorch: Using CUDA")
else:
    device = torch.device('cpu')
    print("MF_MSE_PyTorch: Using CPU")


MF_MSE_PyTorch: Using CPU


### Create an instance of the model and specify the device it should run on

In [30]:
pyTorchModel = MF_MSE_PyTorch_model(n_users, n_items, num_factors).to(device)

### Choose loss functions, there are quite a few to choose from

In [31]:
lossFunction = torch.nn.MSELoss(size_average=False)

### Select the optimizer to be used for the model parameters: Adam, AdaGrad, RMSProp etc... 

In [59]:
learning_rate = 1e-4

optimizer = torch.optim.Adagrad(pyTorchModel.parameters(), lr = learning_rate)

### Define the DatasetIterator, which will be used to iterate over the data

### A DatasetIterator will implement the Dataset class and provide the __getitem__(self, index) method, which allows to get the data points indexed by that index.

### Since we need the data to be a tensor, we pre inizialize everything as a tensor. In practice we save the URM in coordinate format (user, item, rating)

In [60]:
from torch.utils.data import Dataset
import numpy as np

class DatasetIterator_URM(Dataset):

    def __init__(self, URM):

        URM = URM.tocoo()

        self.n_data_points = URM.nnz

        self.user_item_coordinates = np.empty((self.n_data_points, 2))

        self.user_item_coordinates[:,0] = URM.row.copy()
        self.user_item_coordinates[:,1] = URM.col.copy()
        self.rating = URM.data.copy().astype(np.float)

        self.user_item_coordinates = torch.Tensor(self.user_item_coordinates).type(torch.LongTensor)
        self.rating = torch.Tensor(self.rating)





    def __getitem__(self, index):
        """
        Format is (row, col, data)
        :param index:
        :return:
        """

        return self.user_item_coordinates[index, :], self.rating[index]


    def __len__(self):

        return self.n_data_points


### We pass the DatasetIterator to a DataLoader object which manages the use of batches and so on...

In [65]:
from torch.utils.data import DataLoader

batch_size = 200

dataset_iterator = DatasetIterator_URM(URM_train)

train_data_loader = DataLoader(dataset = dataset_iterator,
                   batch_size = batch_size,
                   shuffle = True,
                   #num_workers = 2,
                   )

## And now we ran the usual epoch steps
* Data point sampling
* Prediction computation
* Loss function computation
* Gradient computation
* Update

In [70]:

for num_batch, (input_data, label) in enumerate(train_data_loader, 0):
    
    cumulative_loss = 0

    # On windows requires int64, on ubuntu int32
    #input_data_tensor = Variable(torch.from_numpy(np.asarray(input_data, dtype=np.int64))).to(self.device)
    input_data_tensor = Variable(input_data).to(device)

    label_tensor = Variable(label).to(device)


    user_coordinates = input_data_tensor[:,0]
    item_coordinates = input_data_tensor[:,1]

    # FORWARD pass
    prediction = pyTorchModel(user_coordinates, item_coordinates)

    # Pass prediction and label removing last empty dimension of prediction
    loss = lossFunction(prediction.view(-1), label_tensor)
    

    if num_batch % 100 == 0:
        
        print("Batch {} of {}, loss {:.4f}".format(num_batch, len(train_data_loader), loss.data.item()))
        
        if num_batch == 2000:
            print("Interrupting train")
            break
    

    # BACKWARD pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


Batch 0 of 24994, loss 1299.5726
Batch 100 of 24994, loss 1246.1948
Batch 200 of 24994, loss 1325.1472
Batch 300 of 24994, loss 1316.5590
Batch 400 of 24994, loss 1184.6915
Batch 500 of 24994, loss 1412.0217
Batch 600 of 24994, loss 1351.8953
Batch 700 of 24994, loss 1268.8069
Batch 800 of 24994, loss 1375.4374
Batch 900 of 24994, loss 1439.9543
Batch 1000 of 24994, loss 1506.6323
Batch 1100 of 24994, loss 1488.0021
Batch 1200 of 24994, loss 1183.2865
Batch 1300 of 24994, loss 1492.3884
Batch 1400 of 24994, loss 1337.6088
Batch 1500 of 24994, loss 1466.0822
Batch 1600 of 24994, loss 1326.6989
Batch 1700 of 24994, loss 1308.6276
Batch 1800 of 24994, loss 1500.3684
Batch 1900 of 24994, loss 1470.4662
Batch 2000 of 24994, loss 1407.8556
Interrupting train


## After the train is complete (it may take a while and many epochs), we can get the matrices in the usual numpy format

In [52]:
W = pyTorchModel.get_W()
H = pyTorchModel.get_H()

In [53]:
W

array([[-0.8693999 ,  0.05498629, -0.07996137, ...,  0.80227995,
         0.16469592,  1.5831672 ],
       [-1.7885368 , -1.3893236 , -0.19196971, ..., -1.0810672 ,
         0.70827496,  0.24173023],
       [-0.36013108, -1.4618611 , -0.53237206, ...,  0.7356418 ,
        -0.42919588, -0.02103774],
       ...,
       [ 0.17021644, -0.45737997, -1.2599324 , ...,  1.7705928 ,
         0.47563678, -0.2838048 ],
       [ 0.00311422, -0.40563554, -2.5473928 , ..., -1.3527074 ,
         1.0070924 , -0.5943767 ],
       [ 0.18628068, -0.26068607, -0.49782896, ..., -1.469555  ,
         0.2341343 ,  0.53745097]], dtype=float32)

In [54]:
H

array([[ 1.5917609 ,  0.6629568 ,  0.8474942 , ..., -1.2384548 ,
         0.3644825 , -0.8415124 ],
       [ 2.1821175 , -2.0746005 ,  0.59765303, ..., -0.32873613,
        -0.94762105, -0.57247436],
       [-0.7156964 ,  1.0119928 ,  0.7251737 , ..., -0.19364427,
         1.2719904 ,  1.1096959 ],
       ...,
       [ 0.46778318, -0.18097553, -0.9623822 , ..., -0.26983652,
         0.12652689,  2.4998083 ],
       [ 0.4835092 ,  1.2725722 , -1.172361  , ...,  0.23162012,
        -1.3099946 , -2.0804546 ],
       [ 0.4059417 , -0.0283024 ,  1.3251573 , ..., -0.17705719,
        -0.75827426, -0.07519675]], dtype=float32)