In [1]:
import numpy as np
import pandas as pd
import load_rating_data as ld
import time
import torch
from torch import optim, nn
from timeit import default_timer as timer
from sklearn.model_selection import train_test_split


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [27]:
class GMF(nn.Module):
    
    def __init__(self, args):
        super(GMF, self).__init__() # run nn.Module.__init__()
        self.args = args
        self.n_users = args.n_users
        self.n_items = args.n_items
        self.latent_dim = args.latent_dim
        
        # Embedding Layer
        self.embedding_user = nn.Embedding(num_embeddings = self.n_users, embedding_dim = self.latent_dim)
        self.embedding_item = nn.Embedding(num_embeddings = self.n_items, embedding_dim = self.latent_dim)
        
        # One layer
        self.affine_output = nn.Linear(in_features = self.latent_dim, out_features = 1)
        self.logistic = nn.Sigmoid()
        
    def forward(self, u, i):
        
        user_embedding = self.embedding_user(u)
        item_embedding = self.embedding_item(i)
        product = torch.mul(user_embedding, item_embedding) # element-wise product
        logits = self.affine_output(product)
        rating = self.logistic(logits)
        
        return rating

In [28]:
class MLP(nn.Module):
    
    def __init__(self, args):
        super(MLP, self).__init__()
        self.args = args
        self.n_users = args.n_users
        self.n_items = args.n_items
        self.latent_dim = args.latent_dim
        
        # Build Layers
        # Embedding Layer
        self.embedding_user = nn.Embedding(num_embeddings = self.n_users, embedding_dim = self.latent_dim)
        self.embedding_item = nn.Embedding(num_embeddings = self.n_items, embedding_dim = self.latent_dim)
        
        ## Fully Connected Layer
        self.fc_layers = nn.ModuleList() # holds submodules in a list
        for idx, (in_size, out_size) in enumerate(zip(args.layers[:-1], args.layers[1:])):
            self.fc_layers.append(nn.Linear(in_size, out_size))
        
        ## Final Layer
        self.affine_output = nn.Linear(in_features = args.layers[-1], out_features = 1)
        self.logistic = nn.Sigmoid()
        
    
    def forward(self, u, i):
        
        user_embedding = self.embedding_user(u)
        item_embedding = self.embedding_item(i)
        vector = torch.cat([user_embedding, item_embedding], dim=-1) # concatenate user, item
        for idx, _ in enumerate(range(len(self.fc_layers))):
            vector = self.fc_layers[idx](vector)
            vector = nn.ReLU()(vector)
        logits = self.affine_output(vector)
        rating = self.logistic(logits)
        
        return rating
    

In [29]:
class NeuMF(nn.Module):
    
    def __init__(self, args):
        super(NeuMF, self).__init__()
        self.args = args
        self.n_users = args.n_users
        self.n_items = args.n_items
        self.latent_GMF = args.latent_GMF
        self.latent_MLP = args.latent_MLP
        
        self.embedding_user_MF = nn.Embedding(num_embeddings = self.n_users, embedding_dim = self.latent_GMF)
        self.embedding_item_MF = nn.Embedding(num_embeddings = self.n_items, embedding_dim = self.latent_GMF)
        self.embedding_user_MLP = nn.Embedding(num_embeddings = self.n_users, embedding_dim = self.latent_MLP)
        self.embedding_item_MLP = nn.Embedding(num_embeddings = self.n_items, embedding_dim = self.latent_MLP)
        
        ## Fully Connected Layer
        self.fc_layers = nn.ModuleList() # holds submodules in a list
        for idx, (in_size, out_size) in enumerate(zip(args.layers[:-1], args.layers[1:])):
            self.fc_layers.append(nn.Linear(in_size, out_size))
                        
        self.affine_output = nn.Linear(in_features = args.layers[-1] + self.latent_GMF, out_features=1)
        self.logistic = nn.Sigmoid()
        
    
    def forward(self, u, i):
        user_embedding_MF = self.embedding_user_MF(u)
        item_embedding_MF = self.embedding_item_MF(i)
        user_embedding_MLP = self.embedding_user_MLP(u)
        item_embedding_MLP = self.embedding_item_MLP(i)
        
        # Multi-Layer Perceptron part
        MLP_vector = torch.cat([user_embedding_MLP, item_embedding_MLP], dim=-1)
        for idx, _ in enumerate(range(len(self.fc_layers))):
            MLP_vector = self.fc_layers[idx](MLP_vector)
            MLP_vector = nn.ReLU()(MLP_vector)
        
        # Martrix Factorization part
        MF_vector = torch.mul(user_embedding_MF, item_embedding_MF)
        
        vector = torch.cat([MLP_vector, MF_vector], dim=-1)
        logits = self.affine_output(vector)
        rating = self.logistic(logits)
        
        return rating
    
    def load_pretrain_weights(self):
        
        """
        load pretrained weights from GMF and MLP
        """
        
        mlp_model = MLP(self.args)
        state_dict = torch.load('./checkpoints/MLP_Epoch10_HR0.1758_NDCG0.1736.model')
        mlp_model.load_state_dict(state_dict)
        self.embedding_user_MLP.weight.data = mlp_model.embedding_user.weight.data
        self.embedding_item_MLP.weight.data = mlp_model.embedding_item.weight.data
        for idx in range(len(self.fc_layers)):
            self.fc_layers[idx].weight.data = mlp_model.fc_layers[idx].weight.data
        
        gmf_model = GMF(args)
        gmf_model
        state_dict = torch.load('./checkpoints/GMF_Epoch10_HR0.1479_NDCG0.1472.model')
        gmf_model.load_state_dict(state_dict)
        self.embedding_user_MF.weight.data = gmf_model.embedding_user.weight.data
        self.embedding_item_MF.weight.data = gmf_model.embedding_item.weight.data
        
        
        self.affine_output.weight.data = 0.5 * torch.cat([mlp_model.affine_output.weight.data, gmf_model.affine_output.weight.data], dim=-1)
        self.affine_output.bias.data = 0.5 * (mlp_model.affine_output.bias.data + gmf_model.affine_output.bias.data)

In [17]:
from easydict import EasyDict as edict

args = edict()

# training options
args.df = ld.load_rating_1m()
args.latent_dim = 32
args.epoch = 10                      # training epoch.
args.train_n_neg = 3     #memory issue
args.test_n_neg = 100     #memory issue
args.batch_size = 256
args.layers = [64,32,16]
args.latent_GMF = 16
args.latent_MLP = 16

In [19]:
from torch.utils.data import DataLoader, Dataset

info = ld.information(args)

In [8]:
model = GMF(args)
loss_function = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:

# epoch
for epoch_id in range(1, args.epoch + 1):
    train_loader = info.train_dataloader
    start_epoch = timer()
    model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        # mini-batch update
        optimizer.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction.view(-1), rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    model.eval()
    hit = 0
    ndcg = 0
    test_loader = info.test_dataloader
    for batch_idx, batch in enumerate(test_loader):
        user, item, neg_items = batch[0], batch[1], batch[2]
        # mini-batch update
        test_pred = model(user, item)
        neg_pred = model(user.reshape(-1, 1).expand(-1, args.test_n_neg), neg_items).view(-1, args.test_n_neg)
        concat = torch.cat([test_pred, neg_pred], axis=1)
        
        _, indices = torch.topk(concat, 10)  # 10
        indices = indices.numpy()
        hit += (indices.size - np.count_nonzero(indices))
        zero_idx = np.where(indices.reshape(-1) == 0)[0] % 10
        ndcg += np.sum(np.reciprocal(np.log2(zero_idx + 2)))

    hit_ratio = hit / len(info.test_dataset)
    ndcg = ndcg / len(info.test_dataset)

    print('[Evluating Epoch {}] total_loss = {:.4f}, HR = {:.4f}, NDCG = {:.4f} time = {:.4f}sec'.format(epoch_id, total_loss, hit_ratio, ndcg, timer()-start_epoch))
    torch.save(model.state_dict(), "checkpoints/GMF_Epoch{}_HR{:.4f}_NDCG{:.4f}.model".format(epoch_id, hit_ratio, ndcg))

[Evluating Epoch 1] total_loss = 1884.3949, HR = 0.0899, NDCG = 0.0736 time = 159.3931sec
[Evluating Epoch 2] total_loss = 0.4397, HR = 0.1287, NDCG = 0.1271 time = 156.5461sec
[Evluating Epoch 3] total_loss = 0.0001, HR = 0.1427, NDCG = 0.1419 time = 217.7750sec
[Evluating Epoch 4] total_loss = 0.0000, HR = 0.1452, NDCG = 0.1444 time = 210.7375sec
[Evluating Epoch 5] total_loss = 0.0000, HR = 0.1458, NDCG = 0.1450 time = 228.6337sec
[Evluating Epoch 6] total_loss = 0.0000, HR = 0.1457, NDCG = 0.1450 time = 214.5030sec
[Evluating Epoch 7] total_loss = 0.0000, HR = 0.1459, NDCG = 0.1452 time = 238.8860sec
[Evluating Epoch 8] total_loss = 0.0000, HR = 0.1469, NDCG = 0.1462 time = 225.0136sec
[Evluating Epoch 9] total_loss = 0.0000, HR = 0.1479, NDCG = 0.1472 time = 219.4985sec
[Evluating Epoch 10] total_loss = 0.0000, HR = 0.1479, NDCG = 0.1472 time = 159.1954sec


In [20]:
model = MLP(args)
loss_function = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [21]:

# epoch
for epoch_id in range(1, args.epoch + 1):
    train_loader = info.train_dataloader
    start_epoch = timer()
    model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        # mini-batch update
        optimizer.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction.view(-1), rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    model.eval()
    hit = 0
    ndcg = 0
    test_loader = info.test_dataloader
    for batch_idx, batch in enumerate(test_loader):
        user, item, neg_items = batch[0], batch[1], batch[2]
        # mini-batch update
        test_pred = model(user, item)
        neg_pred = model(user.reshape(-1, 1).expand(-1, args.test_n_neg), neg_items).view(-1, args.test_n_neg)
        concat = torch.cat([test_pred, neg_pred], axis=1)
        
        _, indices = torch.topk(concat, 10)  # 10
        indices = indices.numpy()
        hit += (indices.size - np.count_nonzero(indices))
        zero_idx = np.where(indices.reshape(-1) == 0)[0] % 10
        ndcg += np.sum(np.reciprocal(np.log2(zero_idx + 2)))

    hit_ratio = hit / len(info.test_dataset)
    ndcg = ndcg / len(info.test_dataset)

    print('[Evluating Epoch {}] total_loss = {:.4f}, HR = {:.4f}, NDCG = {:.4f} time = {:.4f}sec'.format(epoch_id, total_loss, hit_ratio, ndcg, timer()-start_epoch))
    torch.save(model.state_dict(), "checkpoints/MLP_Epoch{}_HR{:.4f}_NDCG{:.4f}.model".format(epoch_id, hit_ratio, ndcg))

[Evluating Epoch 1] total_loss = 1637.5050, HR = 0.1399, NDCG = 0.1206 time = 150.2823sec
[Evluating Epoch 2] total_loss = 42.8607, HR = 0.1359, NDCG = 0.1263 time = 151.2220sec
[Evluating Epoch 3] total_loss = 22.5133, HR = 0.1568, NDCG = 0.1490 time = 222.1022sec
[Evluating Epoch 4] total_loss = 18.3688, HR = 0.1571, NDCG = 0.1514 time = 135.6831sec
[Evluating Epoch 5] total_loss = 9.6168, HR = 0.1618, NDCG = 0.1576 time = 148.7477sec
[Evluating Epoch 6] total_loss = 17.6119, HR = 0.1678, NDCG = 0.1636 time = 163.9576sec
[Evluating Epoch 7] total_loss = 13.9895, HR = 0.1716, NDCG = 0.1688 time = 158.4741sec
[Evluating Epoch 8] total_loss = 5.6223, HR = 0.1736, NDCG = 0.1711 time = 167.8705sec
[Evluating Epoch 9] total_loss = 5.4288, HR = 0.1772, NDCG = 0.1748 time = 170.5959sec
[Evluating Epoch 10] total_loss = 5.9292, HR = 0.1758, NDCG = 0.1736 time = 176.3812sec


In [30]:
model = NeuMF(args)
model.load_pretrain_weights()
loss_function = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [32]:

# epoch
for epoch_id in range(1, args.epoch + 1):
    train_loader = info.train_dataloader
    start_epoch = timer()
    model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        # mini-batch update
        optimizer.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction.view(-1), rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    model.eval()
    hit = 0
    ndcg = 0
    test_loader = info.test_dataloader
    for batch_idx, batch in enumerate(test_loader):
        user, item, neg_items = batch[0], batch[1], batch[2]
        # mini-batch update
        test_pred = model(user, item)
        neg_pred = model(user.reshape(-1, 1).expand(-1, args.test_n_neg), neg_items).view(-1, args.test_n_neg)
        concat = torch.cat([test_pred, neg_pred], axis=1)
        
        _, indices = torch.topk(concat, 10)  # 10
        indices = indices.numpy()
        hit += (indices.size - np.count_nonzero(indices))
        zero_idx = np.where(indices.reshape(-1) == 0)[0] % 10
        ndcg += np.sum(np.reciprocal(np.log2(zero_idx + 2)))

    hit_ratio = hit / len(info.test_dataset)
    ndcg = ndcg / len(info.test_dataset)

    print('[Evluating Epoch {}] total_loss = {:.4f}, HR = {:.4f}, NDCG = {:.4f} time = {:.4f}sec'.format(epoch_id, total_loss, hit_ratio, ndcg, timer()-start_epoch))
    torch.save(model.state_dict(), "checkpoints/NeuMF_Epoch{}_HR{:.4f}_NDCG{:.4f}.model".format(epoch_id, hit_ratio, ndcg))

[Evluating Epoch 1] total_loss = 7186.6720, HR = 0.2203, NDCG = 0.2036 time = 162.5721sec
[Evluating Epoch 2] total_loss = 2912.6853, HR = 0.2107, NDCG = 0.1975 time = 286.6672sec
[Evluating Epoch 3] total_loss = 2274.2859, HR = 0.2041, NDCG = 0.1924 time = 291.1469sec
[Evluating Epoch 4] total_loss = 2159.6998, HR = 0.2014, NDCG = 0.1900 time = 283.0974sec
[Evluating Epoch 5] total_loss = 2115.6284, HR = 0.2024, NDCG = 0.1923 time = 277.3113sec
[Evluating Epoch 6] total_loss = 1845.9927, HR = 0.2012, NDCG = 0.1912 time = 279.4174sec
[Evluating Epoch 7] total_loss = 1584.5817, HR = 0.2022, NDCG = 0.1922 time = 317.0147sec
[Evluating Epoch 8] total_loss = 1569.8509, HR = 0.2011, NDCG = 0.1912 time = 276.6856sec
[Evluating Epoch 9] total_loss = 1562.9941, HR = 0.2011, NDCG = 0.1914 time = 278.0450sec
[Evluating Epoch 10] total_loss = 1554.9438, HR = 0.2001, NDCG = 0.1902 time = 278.6895sec
