# MLP github repository

for interactive use

Modified version of:
https://github.com/HarshdeepGupta/recommender_pytorch/blob/master/MLP.py

In [34]:
# PyTorch imports
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from torch import nn
from torch.utils.data import Dataset, DataLoader
torch.manual_seed(0)

# Python imports
import argparse
from time import time
import numpy as np
import pickle

# Workspace imports
from src.evaluate import evaluate_model
from src.Dataset import MovieLensDataset
from src.utils import train_one_epoch, test, plot_statistics

# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [29]:
default_args_dict = {
    "path": "data/",
    "dataset": "movielens",
    "epochs": 30,
    "batch_size": 256,
    "layers": [16, 32, 16, 8],
    'weight_decay': 0.00001,
    "num_neg_train": 4, #'Number of negative instances to pair 
                        #with a positive instance while training'
    "num_neg_test": 100,
    "lr": 0.001,
    "dropout": 0.,
    "learner": "adam",
    "verbose": 1,
    "out": 1 #save trained model or not
}

args = default_args_dict
path = args["path"]
dataset = args["dataset"]
layers = args["layers"]
weight_decay = args["weight_decay"]
num_negatives_train = args["num_neg_train"]
num_negatives_test = args["num_neg_test"]
dropout = args["dropout"]
learner = args["learner"]
learning_rate = args["lr"]
batch_size = args["batch_size"]
epochs = args["epochs"]
verbose = args["verbose"]

In [27]:
topK = 10
print("MLP arguments: %s " % (args))

MLP arguments: {'path': 'data/', 'dataset': 'movielens', 'epochs': 30, 'batch_size': 256, 'layers': [16, 32, 16, 8], 'weight_decay': 1e-05, 'num_neg_train': 4, 'num_neg_test': 100, 'lr': 0.001, 'dropout': 0.0, 'learner': 'adam', 'verbose': 1, 'out': 1} 


#TODO:

+ Create a MovieLensDataSet object for other datasets


### Used methods/attributes or MovieLensDataset:

+ trainMatrix, testRatings, testNegatives
+ DataLoader takes it as argument of DataLoader

In [30]:
# Load data

t1 = time()
full_dataset = MovieLensDataset(
    path + dataset, num_negatives_train=num_negatives_train,
    num_negatives_test=num_negatives_test)
train, testRatings, testNegatives = (full_dataset.trainMatrix,
                                     full_dataset.testRatings, full_dataset.testNegatives)
num_users, num_items = train.shape
print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d"
      % (time()-t1, num_users, num_items, train.nnz, len(testRatings)))

training_data_generator = DataLoader(
    full_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

Load data done [4.9 s]. #user=944, #item=1683, #train=99057, #test=943


In [None]:
class MLP(nn.Module):

    def __init__(self, n_users, n_items, layers=[16, 8], dropout=False):
        """
        Simple Feedforward network with Embeddings for users and items
        """
        super().__init__()
        assert (layers[0] % 2 == 0), "layers[0] must be an even number"
        self.__alias__ = "MLP {}".format(layers)
        self.__dropout__ = dropout

        # user and item embedding layers
        embedding_dim = int(layers[0]/2)
        self.user_embedding = torch.nn.Embedding(n_users, embedding_dim)
        self.item_embedding = torch.nn.Embedding(n_items, embedding_dim)

        # list of weight matrices
        self.fc_layers = torch.nn.ModuleList()
        # hidden dense layers
        for _, (in_size, out_size) in enumerate(zip(layers[:-1], layers[1:])):
            self.fc_layers.append(torch.nn.Linear(in_size, out_size))
        # final prediction layer
        self.output_layer = torch.nn.Linear(layers[-1], 1)

    def forward(self, feed_dict):
        users = feed_dict['user_id']
        items = feed_dict['item_id']
        user_embedding = self.user_embedding(users)
        item_embedding = self.item_embedding(items)
        # concatenate user and item embeddings to form input
        x = torch.cat([user_embedding, item_embedding], 1)
        for idx, _ in enumerate(range(len(self.fc_layers))):
            x = self.fc_layers[idx](x)
            x = F.relu(x)
            x = F.dropout(x,  p=self.__dropout__, training=self.training)
        logit = self.output_layer(x)
        rating = torch.sigmoid(logit)
        return rating

    def predict(self, feed_dict):
        # return the score, inputs and outputs are numpy arrays
        for key in feed_dict:
            if type(feed_dict[key]) != type(None):
                feed_dict[key] = torch.from_numpy(
                    feed_dict[key]).to(dtype=torch.long, device=device)
        output_scores = self.forward(feed_dict)
        return output_scores.cpu().detach().numpy()

    def get_alias(self):
        return self.__alias__
    


In [32]:
model = MLP(num_users, num_items, layers=layers, dropout=dropout)
model.to(device)
if verbose:
    print(model)
    
loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), weight_decay=weight_decay)

# Record performance
hr_list = []
ndcg_list = []
BCE_loss_list = []

MLP(
  (user_embedding): Embedding(944, 8)
  (item_embedding): Embedding(1683, 8)
  (fc_layers): ModuleList(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): Linear(in_features=32, out_features=16, bias=True)
    (2): Linear(in_features=16, out_features=8, bias=True)
  )
  (output_layer): Linear(in_features=8, out_features=1, bias=True)
)


In [23]:
device

device(type='cuda', index=0)

In [33]:
# Check Init performance
hr, ndcg = test(model, full_dataset, topK)
hr_list.append(hr)
ndcg_list.append(ndcg)
BCE_loss_list.append(1)
# do the epochs now

for epoch in range(epochs):
    epoch_loss = train_one_epoch(model, training_data_generator,
                                 loss_fn, optimizer, epoch, device)

    if epoch % verbose == 0:
        hr, ndcg = test(model, full_dataset, topK)
        hr_list.append(hr)
        ndcg_list.append(ndcg)
        BCE_loss_list.append(epoch_loss)
        # if hr > best_hr:
        #     best_hr, best_ndcg, best_iter = hr, ndcg, epoch
        #     if args.out > 0:
        #         model.save(model_out_file, overwrite=True)
print("hr for epochs: ", hr_list)
print("ndcg for epochs: ", ndcg_list)
print("loss for epochs: ", BCE_loss_list)

Eval: HR = 0.0859, NDCG = 0.0376 [0.7 s]
Epoch = 0
Epoch completed 6.0 s
Train Loss: 0.4507988940529737
Eval: HR = 0.4019, NDCG = 0.2012 [0.7 s]
Epoch = 1
Epoch completed 6.0 s
Train Loss: 0.3650287753081753
Eval: HR = 0.3934, NDCG = 0.2147 [0.7 s]
Epoch = 2
Epoch completed 5.9 s
Train Loss: 0.3579750397870707
Eval: HR = 0.4062, NDCG = 0.2178 [0.7 s]
Epoch = 3
Epoch completed 6.1 s
Train Loss: 0.3544639602650044
Eval: HR = 0.4008, NDCG = 0.2149 [0.7 s]
Epoch = 4
Epoch completed 6.0 s
Train Loss: 0.3514424908992856
Eval: HR = 0.3977, NDCG = 0.2159 [0.7 s]
Epoch = 5
Epoch completed 5.9 s
Train Loss: 0.34860661123766146
Eval: HR = 0.4019, NDCG = 0.2192 [0.7 s]
Epoch = 6
Epoch completed 5.9 s
Train Loss: 0.34537755880090926
Eval: HR = 0.4295, NDCG = 0.2374 [0.7 s]
Epoch = 7
Epoch completed 6.0 s
Train Loss: 0.3411040868870048
Eval: HR = 0.4571, NDCG = 0.2561 [0.7 s]
Epoch = 8
Epoch completed 6.0 s
Train Loss: 0.3353244619083035
Eval: HR = 0.4740, NDCG = 0.2646 [0.7 s]
Epoch = 9
Epoch compl