## Implementation of Neural Collaborative Filtering
https://arxiv.org/abs/1708.05031

In [1]:
import dataframe
import data
import evaluate

In [2]:
import torch
import torch.nn as nn
from torch import optim

In [3]:
# Select GPU Number
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [4]:
# check if cuda available
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(315)
if device == "cuda":
    torch.cuda.manual_seed_all(912)

device

'cuda'

### Generalized Matrix Factorization
MF can be interpreted as a special case of our NCF framework

In [5]:
class GMF(nn.Module):
    
    def __init__(self, config):
        super(GMF, self).__init__() # run nn.Module.__init__()
        self.num_users = config["num_users"]
        self.num_items = config["num_items"]
        self.f = config["latent_dim"]
        
        # Embedding Layer
        self.embedding_user = nn.Embedding(num_embeddings = self.num_users, embedding_dim = self.f)
        self.embedding_item = nn.Embedding(num_embeddings = self.num_items, embedding_dim = self.f)
        
        # One layer
        self.affine_output = nn.Linear(in_features = self.f, out_features = 1)
        self.logistic = nn.Sigmoid()
        
    def forward(self, u, i):
        
        user_embedding = self.embedding_user(u)
        item_embedding = self.embedding_item(i)
        product = torch.mul(user_embedding, item_embedding) # element-wise product
        logits = self.affine_output(product)
        rating = self.logistic(logits)
        
        return rating
    
    def init_weight(self):
        pass

### Multi-Layer Perceptron

In [6]:
class MLP(nn.Module):
    
    def __init__(self, config):
        super(MLP, self).__init__()
        self.num_users = config["num_users"]
        self.num_items = config["num_items"]
        self.f = config["latent_dim"]
        
        # Build Layers
        ## Embedding Layer
        self.embedding_user = nn.Embedding(num_embeddings = self.num_users, embedding_dim = self.f)
        self.embedding_item = nn.Embedding(num_embeddings = self.num_items, embedding_dim = self.f)
        
        ## Fully Connected Layer
        self.fc_layers = nn.ModuleList() # holds submodules in a list
        for idx, (in_size, out_size) in enumerate(zip(config["layers"][:-1], config["layers"][1:])):
            self.fc_layers.append(nn.Linear(in_size, out_size))
        
        ## Final Layer
        self.affine_output = nn.Linear(in_features = config["layers"][-1], out_features = 1)
        self.logistic = nn.Sigmoid()
        
    
    def forward(self, u, i):
        
        user_embedding = self.embedding_user(u)
        item_embedding = self.embedding_item(i)
        vector = torch.cat([user_embedding, item_embedding], dim=-1) # concatenate user, item
        for idx, _ in enumerate(range(len(self.fc_layers))):
            vector = self.fc_layers[idx](vector)
            vector = nn.ReLU()(vector)
        logits = self.affine_output(vector)
        rating = self.logistic(logits)
        
        return rating
    
    
    def init_weight(self):
        pass

### Neural Matrix Factorization
We allow GMF and MLP to learn separate embeddings, and combine the two models by concatenating their last hidden layer  
This model combines the linearity of MF and non-linearity of Deep Neural Networks for modelling user-item latent structures

In [7]:
class NeuMF(nn.Module):
    
    def __init__(self, config):
        super(NeuMF, self).__init__()
        self.config = config
        self.num_users = config["num_users"]
        self.num_items = config["num_items"]
        self.f_MF = config["latent_dim_MF"]
        self.f_MLP = config["latent_dim_MLP"]
        
        self.embedding_user_MF = nn.Embedding(num_embeddings = self.num_users, embedding_dim = self.f_MF)
        self.embedding_item_MF = nn.Embedding(num_embeddings = self.num_items, embedding_dim = self.f_MF)
        self.embedding_user_MLP = nn.Embedding(num_embeddings = self.num_users, embedding_dim = self.f_MLP)
        self.embedding_item_MLP = nn.Embedding(num_embeddings = self.num_items, embedding_dim = self.f_MLP)
        
        self.fc_layers = nn.ModuleList()
        for idx, (in_size, out_size) in enumerate(zip(config["layers"][:-1], config["layers"][1:])):
            self.fc_layers.append(nn.Linear(in_size, out_size))
            
        self.affine_output = nn.Linear(in_features = config["layers"][-1] + self.f_MF, out_features=1)
        self.logistic = nn.Sigmoid()
        
    
    def forward(self, u, i):
        user_embedding_MF = self.embedding_user_MF(u)
        item_embedding_MF = self.embedding_item_MF(i)
        user_embedding_MLP = self.embedding_user_MLP(u)
        item_embedding_MLP = self.embedding_item_MLP(i)
        
        # Multi-Layer Perceptron part
        MLP_vector = torch.cat([user_embedding_MLP, item_embedding_MLP], dim=-1)
        for idx, _ in enumerate(range(len(self.fc_layers))):
            MLP_vector = self.fc_layers[idx](MLP_vector)
            MLP_vector = nn.ReLU()(MLP_vector)
        
        # Martrix Factorization part
        MF_vector = torch.mul(user_embedding_MF, item_embedding_MF)
        
        vector = torch.cat([MLP_vector, MF_vector], dim=-1)
        logits = self.affine_output(vector)
        rating = self.logistic(logits)
        
        return rating
    
    
    def init_weight(self):
        pass
    
    
    def load_pretrain_weights(self):
        
        """
        load pretrained weights from GMF and MLP
        """
        
        config = self.config
        config['latent_dim'] = config['latent_dim_MLP']
        mlp_model = MLP(config)
        mlp_model.cuda()
        state_dict = torch.load('./checkpoints/MLP_Epoch100_HR0.5836_NDCG0.3299.model', map_location=lambda storage, loc: storage.cuda())
        mlp_model.load_state_dict(state_dict)
        self.embedding_user_MLP.weight.data = mlp_model.embedding_user.weight.data
        self.embedding_item_MLP.weight.data = mlp_model.embedding_item.weight.data
        for idx in range(len(self.fc_layers)):
            self.fc_layers[idx].weight.data = mlp_model.fc_layers[idx].weight.data
        
        config["latent_dim"] = config["latent_dim_MF"]
        gmf_model = GMF(config)
        gmf_model.cuda()
        state_dict = torch.load('./checkpoints/GMF_Epoch40_HR0.6399_NDCG0.3704.model', map_location=lambda storage, loc: storage.cuda())
        gmf_model.load_state_dict(state_dict)
        self.embedding_user_MF.weight.data = gmf_model.embedding_user.weight.data
        self.embedding_item_MF.weight.data = gmf_model.embedding_item.weight.data
        
        
        self.affine_output.weight.data = 0.5 * torch.cat([mlp_model.affine_output.weight.data, gmf_model.affine_output.weight.data], dim=-1)
        self.affine_output.bias.data = 0.5 * (mlp_model.affine_output.bias.data + gmf_model.affine_output.bias.data)

### Training
First, train GMF and MLP with random initializations until convergence  
For training GMF, and MLP from scratch, we adopt the Adaptive Moment Estimation (Adam)  
  
Then use their model parameters as the initialization for the corresponding parts of NeuralMF's parameters  
After feeding pre-trained parameters into NeuralMF, we optimize it with the vanilla SGD

In [8]:
from tensorboardX import SummaryWriter
from timeit import default_timer as timer

In [9]:
# get data
ratings = dataframe.ratings
sample_generator = data.SampleGenerator(data = ratings)
test_data = sample_generator.evaluate_data

#### Train GMF

In [10]:
writer = SummaryWriter(logdir="runs/GMF_v2")

In [11]:
gmf_config = {"num_users":6040,
              "num_items":3706,
              "latent_dim":16,
              "num_negative":4, #increasing number of negatives -> converges in less epochs
              "batch_size":1024
             }


model = GMF(gmf_config).cuda()
loss_function = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

use tensorboard :  
tensorboard --logdir=runs/ --host=0.0.0.0 --port=0

In [12]:
num_epochs = 100
# epoch
for epoch_id in range(1, num_epochs + 1):
    train_loader = sample_generator.instance_a_train_loader(gmf_config["num_negative"], gmf_config["batch_size"])
    start_epoch = timer()
    model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        user, item, rating = user.cuda(), item.cuda(), rating.float().cuda()
        
        # mini-batch update
        optimizer.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction.view(-1), rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    model.eval()
    hit_ratio, ndcg = evaluate.cal_metrics(model, test_data)
    
    writer.add_scalar("loss/ Train_loss", total_loss, epoch_id)
    writer.add_scalar("performace/HitRate", hit_ratio, epoch_id)
    writer.add_scalar("performace/NDCG", ndcg, epoch_id)
    print('[Evluating Epoch {}] HR = {:.4f}, NDCG = {:.4f} time = {:.4f}sec'.format(epoch_id, hit_ratio, ndcg, timer()-start_epoch))
    torch.save(model.state_dict(), "checkpoints/GMF_v2_Epoch{}_HR{:.4f}_NDCG{:.4f}.model".format(epoch_id, hit_ratio, ndcg))

[Evluating Epoch 1] HR = 0.0970, NDCG = 0.0447 time = 115.0248sec
[Evluating Epoch 2] HR = 0.0980, NDCG = 0.0451 time = 115.8239sec
[Evluating Epoch 3] HR = 0.1510, NDCG = 0.0714 time = 114.9313sec
[Evluating Epoch 4] HR = 0.3366, NDCG = 0.1792 time = 113.7466sec
[Evluating Epoch 5] HR = 0.4053, NDCG = 0.2217 time = 114.0880sec
[Evluating Epoch 6] HR = 0.4329, NDCG = 0.2375 time = 113.9665sec
[Evluating Epoch 7] HR = 0.4469, NDCG = 0.2460 time = 115.1512sec
[Evluating Epoch 8] HR = 0.4563, NDCG = 0.2500 time = 115.0018sec
[Evluating Epoch 9] HR = 0.4705, NDCG = 0.2615 time = 113.6703sec
[Evluating Epoch 10] HR = 0.4896, NDCG = 0.2721 time = 114.4799sec
[Evluating Epoch 11] HR = 0.5055, NDCG = 0.2805 time = 114.6054sec
[Evluating Epoch 12] HR = 0.5315, NDCG = 0.2952 time = 114.9387sec
[Evluating Epoch 13] HR = 0.5513, NDCG = 0.3084 time = 116.4373sec
[Evluating Epoch 14] HR = 0.5661, NDCG = 0.3191 time = 115.0321sec
[Evluating Epoch 15] HR = 0.5868, NDCG = 0.3309 time = 114.3812sec
[Evl

#### Train MLP

In [None]:
writer = SummaryWriter(logdir="runs/MLP_v2")

In [None]:
mlp_config = {"num_users":6040,
              "num_items":3706,
              "latent_dim":16,
              "layers":[32,16,8],
              "num_negative":4,
              "batch_size":1024
             }


model = MLP(mlp_config).cuda()
loss_function = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 100
# epoch
for epoch_id in range(1, num_epochs + 1):
    
    train_loader = sample_generator.instance_a_train_loader(mlp_config["num_negative"], mlp_config["batch_size"])
    start_epoch = timer()
    model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        user, item, rating = user.cuda(), item.cuda(), rating.float().cuda()
        
        # mini-batch update
        optimizer.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction.view(-1), rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    model.eval()
    hit_ratio, ndcg = evaluate.cal_metrics(model, test_data)
    
    writer.add_scalar("loss/ Train_loss", total_loss, epoch_id)
    writer.add_scalar("performace/HitRate", hit_ratio, epoch_id)
    writer.add_scalar("performace/NDCG", ndcg, epoch_id)
    print('[Evluating Epoch {}] HR = {:.4f}, NDCG = {:.4f} time = {:.4f}sec'.format(epoch_id, hit_ratio, ndcg, timer()-start_epoch))
    torch.save(model.state_dict(), "checkpoints/MLP_Epoch{}_HR{:.4f}_NDCG{:.4f}.model".format(epoch_id, hit_ratio, ndcg))

#### Train NeuralMF

In [None]:
writer = SummaryWriter(logdir="runs/NeuMF_v2")

In [None]:
neumf_config = {"num_users":6040,
              "num_items":3706,
              "latent_dim_MF":16,
              "latent_dim_MLP":16,
              "layers":[32,16,8],
              "num_negative":4,
              "batch_size":1024
             }


model = NeuMF(neumf_config).cuda()
model.load_pretrain_weights()
loss_function = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [None]:
num_epochs = 100
# epoch
for epoch_id in range(1, num_epochs + 1):
    
    train_loader = sample_generator.instance_a_train_loader(neumf_config["num_negative"], neumf_config["batch_size"])
    start_epoch = timer()
    model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        user, item, rating = user.cuda(), item.cuda(), rating.float().cuda()
        
        # mini-batch update
        optimizer.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction.view(-1), rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    model.eval()
    hit_ratio, ndcg = evaluate.cal_metrics(model, test_data)
    
    writer.add_scalar("loss/ Train_loss", total_loss, epoch_id)
    writer.add_scalar("performace/HitRate", hit_ratio, epoch_id)
    writer.add_scalar("performace/NDCG", ndcg, epoch_id)
    print('[Evluating Epoch {}] HR = {:.4f}, NDCG = {:.4f} time = {:.4f}sec'.format(epoch_id, hit_ratio, ndcg, timer()-start_epoch))
    torch.save(model.state_dict(), "checkpoints/NeuMF_Epoch{}_HR{:.4f}_NDCG{:.4f}.model".format(epoch_id, hit_ratio, ndcg))