# Recommender Systems 2018/19

### Practice session on MF PyTorch



In [1]:
from Notebooks_utils.data_splitter import train_test_holdout
from Data_manager.Movielens10M.Movielens10MReader import Movielens10MReader

data_reader = Movielens10MReader()
data_reader.load_data()

URM_all = data_reader.get_URM_all()

URM_train, URM_test = train_test_holdout(URM_all, train_perc = 0.8)

DataReader: Verifying data consistency...
DataReader: Verifying data consistency... Passed!
DataReader: current dataset is: <class 'Data_manager.Movielens10M.Movielens10MReader.Movielens10MReader'>
	Number of items: 10680
	Number of users: 69878
	Number of interactions in URM_all: 9973605
	Interaction density: 1.34E-02
	Interactions per user:
		 Min: 1.90E+01
		 Avg: 1.43E+02
		 Max: 7.36E+03
	Interactions per item:
		 Min: 0.00E+00
		 Avg: 9.34E+02
		 Max: 3.49E+04
	Gini Index: 0.57



### MF models rely upon latent factors for users and items which are called 'embeddings'

In [2]:
num_factors = 10

n_users, n_items = URM_train.shape

In [3]:
import torch

user_factors = torch.nn.Embedding(num_embeddings = n_users, embedding_dim = num_factors)
item_factors = torch.nn.Embedding(num_embeddings = n_items, embedding_dim = num_factors)

In [4]:
user_factors

Embedding(69878, 10)

In [5]:
item_factors

Embedding(10680, 10)

### To compute the prediction we have to multiply the user factors to the item factors, which is a linear operation.

### We define a single layer and an activation function, which takes the result and transforms it in the final prediction. The activation function can be used to restrict the predicted values (e.g., sigmoid is between 0 and 1)

In [6]:
layer_1 = torch.nn.Linear(in_features = num_factors, out_features = 1)

layer_1

Linear(in_features=10, out_features=1, bias=True)

In [7]:
activation_function = torch.nn.ReLU()

activation_function

ReLU()

## In order to compute the prediction you have to:
* Define a list of user and item indices
* Create a tensor from it
* Create a variable from the tensor
* Get the user and item embedding
* Compute the element-wise product of the embeddings
* Pass the element-wise product to the single layer network
* Pass the output of the single layer network to the activation function

In [8]:
from torch.autograd import Variable


item_index = [15]
user_index = [42]

user_index = torch.Tensor(user_index).type(torch.LongTensor)
item_index = torch.Tensor(item_index).type(torch.LongTensor)

user_index = Variable(user_index)
item_index = Variable(item_index)

current_user_factors = user_factors(user_index)
current_item_factors = item_factors(item_index)

element_wise_product = torch.mul(current_user_factors, current_item_factors)
element_wise_product

tensor([[-1.1633,  0.7963,  1.9864, -0.8203,  0.1206, -1.0947,  0.4731, -0.3108,
          0.5729, -6.5700]], grad_fn=<MulBackward0>)

### To take the result of the prediction and transform it into a traditional numpy array you have to first call .detach() and then .numpy()
### The result is an array of 1 cell

In [9]:

prediction = layer_1(element_wise_product)
prediction = activation_function(prediction)

prediction_numpy = prediction.detach().numpy()

print("Prediction is {}".format(prediction_numpy))

Prediction is [[0.08442144]]


# Train a MF MSE model with PyTorch

# Step 1 Create a Model python object

### The model should implement the forward function which computes the prediction as we did before

In [10]:

class MF_MSE_PyTorch_model(torch.nn.Module):

    def __init__(self, n_users, n_items, n_factors):

        super(MF_MSE_PyTorch_model, self).__init__()

        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors

        self.user_factors = torch.nn.Embedding(num_embeddings = self.n_users, embedding_dim = self.n_factors)
        self.item_factors = torch.nn.Embedding(num_embeddings = self.n_items, embedding_dim = self.n_factors)

        self.layer_1 = torch.nn.Linear(in_features = self.n_factors, out_features = 1)

        self.activation_function = torch.nn.ReLU()



    def forward(self, user_coordinates, item_coordinates):

        current_user_factors = self.user_factors(user_coordinates)
        current_item_factors = self.item_factors(item_coordinates)

        prediction = torch.mul(current_user_factors, current_item_factors)

        prediction = self.layer_1(prediction)
        prediction = self.activation_function(prediction)

        return prediction



    def get_W(self):

        return self.user_factors.weight.detach().cpu().numpy()


    def get_H(self):

        return self.item_factors.weight.detach().cpu().numpy()





# Step 2 Setup PyTorch devices and Data Reader

In [11]:
use_cuda = False

if use_cuda and torch.cuda.is_available():
    device = torch.device('cuda')
    print("MF_MSE_PyTorch: Using CUDA")
else:
    device = torch.device('cpu')
    print("MF_MSE_PyTorch: Using CPU")


MF_MSE_PyTorch: Using CPU


### Create an instance of the model and specify the device it should run on

In [12]:
pyTorchModel = MF_MSE_PyTorch_model(n_users, n_items, num_factors).to(device)

### Choose loss functions, there are quite a few to choose from

In [13]:
lossFunction = torch.nn.MSELoss(size_average=False)



### Select the optimizer to be used for the model parameters: Adam, AdaGrad, RMSProp etc... 

In [14]:
learning_rate = 1e-4

optimizer = torch.optim.Adagrad(pyTorchModel.parameters(), lr = learning_rate)

### Define the DatasetIterator, which will be used to iterate over the data

### A DatasetIterator will implement the Dataset class and provide the __getitem__(self, index) method, which allows to get the data points indexed by that index.

### Since we need the data to be a tensor, we pre inizialize everything as a tensor. In practice we save the URM in coordinate format (user, item, rating)

In [15]:
from torch.utils.data import Dataset
import numpy as np

class DatasetIterator_URM(Dataset):

    def __init__(self, URM):

        URM = URM.tocoo()

        self.n_data_points = URM.nnz

        self.user_item_coordinates = np.empty((self.n_data_points, 2))

        self.user_item_coordinates[:,0] = URM.row.copy()
        self.user_item_coordinates[:,1] = URM.col.copy()
        self.rating = URM.data.copy().astype(np.float)

        self.user_item_coordinates = torch.Tensor(self.user_item_coordinates).type(torch.LongTensor)
        self.rating = torch.Tensor(self.rating)





    def __getitem__(self, index):
        """
        Format is (row, col, data)
        :param index:
        :return:
        """

        return self.user_item_coordinates[index, :], self.rating[index]


    def __len__(self):

        return self.n_data_points


### We pass the DatasetIterator to a DataLoader object which manages the use of batches and so on...

In [16]:
from torch.utils.data import DataLoader

batch_size = 200

dataset_iterator = DatasetIterator_URM(URM_train)

train_data_loader = DataLoader(dataset = dataset_iterator,
                   batch_size = batch_size,
                   shuffle = True,
                   #num_workers = 2,
                   )

## And now we ran the usual epoch steps
* Data point sampling
* Prediction computation
* Loss function computation
* Gradient computation
* Update

In [17]:

for num_batch, (input_data, label) in enumerate(train_data_loader, 0):
    
    cumulative_loss = 0

    # On windows requires int64, on ubuntu int32
    #input_data_tensor = Variable(torch.from_numpy(np.asarray(input_data, dtype=np.int64))).to(self.device)
    input_data_tensor = Variable(input_data).to(device)

    label_tensor = Variable(label).to(device)


    user_coordinates = input_data_tensor[:,0]
    item_coordinates = input_data_tensor[:,1]

    # FORWARD pass
    prediction = pyTorchModel(user_coordinates, item_coordinates)

    # Pass prediction and label removing last empty dimension of prediction
    loss = lossFunction(prediction.view(-1), label_tensor)
    

    if num_batch % 100 == 0:
        
        print("Batch {} of {}, loss {:.4f}".format(num_batch, len(train_data_loader), loss.data.item()))
        
        if num_batch == 2000:
            print("Interrupting train")
            break
    

    # BACKWARD pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


Batch 0 of 39902, loss 2693.2285
Batch 100 of 39902, loss 2569.2058
Batch 200 of 39902, loss 2712.8464
Batch 300 of 39902, loss 2510.9595
Batch 400 of 39902, loss 2756.1619
Batch 500 of 39902, loss 2772.4038
Batch 600 of 39902, loss 2699.5073
Batch 700 of 39902, loss 2679.5432
Batch 800 of 39902, loss 2543.4636
Batch 900 of 39902, loss 2559.9866
Batch 1000 of 39902, loss 2758.6155
Batch 1100 of 39902, loss 2591.0347
Batch 1200 of 39902, loss 2512.6665
Batch 1300 of 39902, loss 2594.8857
Batch 1400 of 39902, loss 2634.5295
Batch 1500 of 39902, loss 2652.8567
Batch 1600 of 39902, loss 2650.3032
Batch 1700 of 39902, loss 2702.7373
Batch 1800 of 39902, loss 2487.4812
Batch 1900 of 39902, loss 2503.0127
Batch 2000 of 39902, loss 2438.5083
Interrupting train


## After the train is complete (it may take a while and many epochs), we can get the matrices in the usual numpy format

In [18]:
W = pyTorchModel.get_W()
H = pyTorchModel.get_H()

In [19]:
W

array([[ 1.3723019 ,  1.2319666 ,  0.01198455, ..., -0.65794045,
        -0.7925086 ,  0.5564781 ],
       [ 0.8585925 ,  0.16828233,  0.72623336, ...,  0.16024801,
        -0.56704813,  2.2603905 ],
       [ 0.26224038, -0.7109332 ,  0.0401555 , ...,  0.2451228 ,
         0.05146181,  0.5744626 ],
       ...,
       [-1.1616576 , -0.06856933, -1.2849674 , ..., -1.6088682 ,
         0.61678284,  0.8829945 ],
       [ 0.39393863,  0.9772253 ,  0.8669473 , ..., -1.1229942 ,
         1.3125156 , -0.3003385 ],
       [ 1.8895546 ,  0.20086485, -0.8687518 , ..., -1.1064979 ,
        -1.2153162 ,  0.52877593]], dtype=float32)

In [20]:
H

array([[-1.2143894 , -0.58902395,  0.29043895, ...,  0.64703804,
         0.43375066,  0.07141441],
       [-0.22804576,  0.6213643 ,  3.2913234 , ..., -1.7908198 ,
         0.9197715 , -0.89540195],
       [-0.5257621 , -0.14976846, -0.17263614, ..., -0.46387058,
        -0.10895726, -0.780038  ],
       ...,
       [-0.21858367,  0.19782965,  0.29153433, ..., -0.02902405,
         1.1861417 ,  0.3214013 ],
       [-0.7174214 , -0.8532166 , -1.2287543 , ...,  0.17205659,
         0.9493459 ,  0.13268645],
       [-1.4975051 ,  2.3539457 , -0.12908785, ...,  0.17214388,
        -0.1299802 , -0.576439  ]], dtype=float32)