In [401]:
import torch
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile
import pandas as pd
from os.path import join as jp
import os
import matplotlib.pyplot as plt 
import seaborn as sns 
import matplotlib
import pandas as pd

In [118]:
import numpy as np
from sklearn.model_selection import train_test_split

import torch
import torch.nn.functional as F
from torch import nn, optim, Tensor

from torch_geometric.utils import structured_negative_sampling, structured_negative_sampling_feasible
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.nn import LGConv
import scipy.sparse as sp

In [435]:
from importlib import reload
import utils
utils = reload(utils)
get_metrics = utils.get_metrics

In [3]:
seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x7fe6609c5990>

In [4]:
import warnings
warnings.filterwarnings("ignore")

### Loss function

In [271]:
def bpr_loss(emb_users_final, emb_users, 
             emb_pos_items_final, emb_pos_items, 
             emb_neg_items_final, emb_neg_items
):
    # Tensors size
    #    emb_users_final: [batch_size, embedding_dim]
    
    # Regularization term (norm of the ORIGINAL embedding, not the propagated one)
    reg_loss = LAMBDA * (emb_users.norm().pow(2) +
                        emb_pos_items.norm().pow(2) +
                        emb_neg_items.norm().pow(2))
    
    # We want the dot product to be computed like:
    # emb_users_final[0] @ emb_pos_items_final[0]
    # emb_users_final[1] @ emb_pos_items_final[1]
    # to achieve this, this is the same as doing pairwise multiplication (torch.mul)
    # of the batched tensors and then apply a sumation over the rows
    # so that we will end up with a pos_ratings of size [batch_size]
    # which each index (0, 1) will have the dot product of user embedding vs. item embedding

    # Dot product of the propagated user embedding with the propagated positive item embedding
    pos_ratings = torch.mul(emb_users_final, emb_pos_items_final).sum(dim=-1)
    
    # Dot product of the propagated user embedding with the propagated negative item embedding
    neg_ratings = torch.mul(emb_users_final, emb_neg_items_final).sum(dim=-1)

    
    # careful using this, because bpr_loss here is always positive in softplus
    # hence, we always want the loss function to contribute positively
    # bpr_loss = torch.mean(torch.nn.functional.softplus(pos_ratings - neg_ratings))
    
    # if we choose the paper implementation, we use LogSigmoid (always negative)
    # this is why we will change the sign from negative to positive in the final loss computation
    bpr_loss = torch.mean(torch.nn.functional.logsigmoid(pos_ratings - neg_ratings))
    
    # consider adding regularization loss (always positive since we are adding norms)
    return -bpr_loss + reg_loss

### Model

In [453]:
class LightGCN(nn.Module):
    def __init__(self, 
                 num_users, num_items, 
                 edge_index, edge_values,
                 edge_index_val=None, edge_values_val=None,
                 num_layers=4, dim_h=64, batch_size=2):
        super().__init__()
        
        self.num_users = num_users
        self.num_items = num_items
        self.num_layers = num_layers
        self.emb_users = nn.Embedding(num_embeddings=self.num_users, embedding_dim=dim_h)
        self.emb_items = nn.Embedding(num_embeddings=self.num_items, embedding_dim=dim_h)
        self.edge_index = edge_index
        self.edge_values = edge_values
        self.adj_mat = self.compute_norm_adj_matrix(edge_index, edge_values)
        self.sp_adj_mat = self._convert_sp_mat_to_sp_tensor(self.adj_mat)
        self.alpha = 1/(self.num_layers+1)
        self.batch_size = batch_size

        # self.convs = nn.ModuleList(LGConv() for _ in range(num_layers))

        nn.init.normal_(self.emb_users.weight, std=0.01)
        nn.init.normal_(self.emb_items.weight, std=0.01)
        
        # Construct positive and negative edges
        self._generate_positives_negative_edges()
        
        # Validation adjacency matrix
        self.edge_index_val = edge_index_val
        self.edge_values_val = edge_values_val
        self.val_adj_mat = self.compute_norm_adj_matrix(edge_index_val, edge_values_val, is_valid=True)        
        self.sp_val_adj_mat = self._convert_sp_mat_to_sp_tensor(self.val_adj_mat)
    
    def _generate_positives_negative_edges(self):
        edge_index = self.edge_index

        # Generate negative sample indices
        # IMPORTANT! let's consider only as num_nodes the size of the items set
        # this is to avoid the case where num_users > num_items and we sample
        # from a user_id that is higher than a item_id and we will be providing 
        # and index for the item_id that does not exist
        edge_index = structured_negative_sampling(edge_index, num_nodes=self.num_items)
        
        # edge_index: Tuple of 3 tensors
        # tensor1: indices of user node
        # tensor2: indices of item node (positive interaction with user)
        # tensor3: indices of the item node (negative interaction with user)
        self.pos_neg_edges = torch.stack(edge_index, dim=0)       
        
        
    @staticmethod
    def _convert_sp_mat_to_sp_tensor(X):
        coo = X.tocoo().astype(np.float32)
        row = torch.Tensor(coo.row).long()
        col = torch.Tensor(coo.col).long()
        index = torch.stack([row, col])
        data = torch.FloatTensor(coo.data)
        return torch.sparse.FloatTensor(index, data, torch.Size(coo.shape))
    
    def compute_norm_adj_matrix(self, edge_index, edge_values, is_valid=False):
        num_users = self.num_users
        num_items = self.num_items
        # Interaction matrix
        R = sp.coo_matrix((
            edge_values, 
            (edge_index[0], edge_index[1])),
            shape=(num_users, num_items))
        R = R.tolil()

        # Save interaction matrix
        if not is_valid:
            self.R = R
        else:
            self.R_valid = R
        
        # Adjacency matrix
        MN = self.num_users + self.num_items
        adj_mat = sp.dok_matrix((MN, MN), dtype=np.float32)
        adj_mat = adj_mat.tolil()
        
        # Fill adjacency matrix
        adj_mat[:num_users, num_users:] = R
        adj_mat[num_users:, :num_users] = R.T
        
        # Degrees
        rowsum = np.array(adj_mat.sum(1))
        
        # Inverse of the Degree matrix
        d_inv = np.power(rowsum, -0.5).flatten()
        d_inv[np.isinf(d_inv)] = 0.
        d_mat = sp.diags(d_inv)
        
        # Normalized Adjacency Matrix
        norm_adj = d_mat.dot(adj_mat)
        norm_adj = norm_adj.dot(d_mat)
        return norm_adj
    
    def sample_mini_batch(self):
        # Generate BATCH_SIZE random indices (size: [batch_size])
        index = np.random.choice(range(self.edge_index.shape[1]), size=self.batch_size)
        
        # With such index, select the positive-negative edge pairs generated before
        # first dimension is user, second is positive item, third is negative item
        # sampled_pos_neg_edges: [3 x len(indices)]
        sampled_pos_neg_edges = model.pos_neg_edges[:, index]
        
        # user_indices: [[1 x len(indices)]] (first row of pos_neg_edges sampled)
        # etc...
        user_indices, pos_item_indices, neg_item_indices = sampled_pos_neg_edges.numpy()

        return user_indices, pos_item_indices, neg_item_indices
        

    def forward(self, is_valid=False):
        # Keep track of starting embeddings for feeding into the BPR Loss 
        # for regularizing the learned embedding params
        emb0_users = self.emb_users.weight
        emb0_items = self.emb_items.weight
                
        # Embedding is dimension M + N
        emb = torch.cat([emb0_users, emb0_items])
        embs = [emb]

        # For each layer
        for layer_i in range(self.num_layers):
            if not is_valid:
                emb = torch.sparse.mm(self.sp_adj_mat, emb)
            else:
                emb = torch.sparse.mm(self.sp_val_adj_mat, emb)
            embs.append(emb)

        emb_final = self.alpha * torch.mean(torch.stack(embs, dim=1), dim=1)

        embf_users, embf_items = torch.split(emb_final, [self.num_users, self.num_items])

        return embf_users, emb0_users, embf_items, emb0_items
    
    # EVALUATION
    
    # Validation loss
    def valid_loss(self):
        # Forward pass using the validation adjacency matrix
        emb_users_final, emb_users, emb_items_final, emb_items = self.forward(is_valid=True)

        # Choose negative sampling
        user_indices, pos_item_indices, neg_item_indices = structured_negative_sampling(
            self.edge_index_val, 
            num_nodes=self.num_items,
            contains_neg_self_loops=False
        )

        # Applying sample indices
        s_embf_users, s_emb0_users = embf_users[user_indices], emb0_users[user_indices]
        s_embf_items_pos, s_emb0_items_pos = embf_items[pos_item_indices], emb0_items[pos_item_indices]
        s_embf_items_neg, s_emb0_items_neg = embf_items[neg_item_indices], emb0_items[neg_item_indices]


        # Loss computation
        valid_loss = bpr_loss(
            s_embf_users, s_emb0_users, 
            s_embf_items_pos, s_emb0_items_pos, 
            s_embf_items_neg, s_emb0_items_neg
        ).item()

        #recall, ndcg = get_metrics(model, edge_index, exclude_edge_indices)

        return valid_loss # , recall, ndcg
    
    def get_val_metrics(self, epoch: int, topk_recs=10, k_list=[1,2,3]):
    
        # Get ratings by embeddings dot products
        ratings = torch.matmul(self.emb_users.weight, self.emb_items.weight.T)


        # Exclude interactions in the train_set
        excl_user_indices, excl_item_indices = self.edge_index
        ratings[excl_user_indices, excl_item_indices] = -1024

        # get the top k recommended items for each user
        _, top_K_items = torch.topk(ratings, k=topk_recs)

        # Get metrics
        model.edge_index_val
        l_metrics = get_metrics(
            top_rec_items=top_K_items,
            ground_truth=self.edge_index_val,
            k_list=k_list
        )
        l_metrics = [(epoch,) + tup for tup in l_metrics]
        # Convert to dataframe
        #df_metrics_epoch = pd.DataFrame(l_metrics, columns=["epoch", "K", "TP", "FP", "P", "precision", "recall"])

        return l_metrics

### Instantiate the class

In [454]:
toy_edge_index = torch.tensor(
    [[0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
     [0, 1, 0, 2, 1, 2, 1, 2, 0, 2]]
)

toy_valid_edge_index = torch.tensor(
    [[0, 1, 2],
     [2, 1, 0]]
)

toy_edge_index = torch.LongTensor(toy_edge_index) 
toy_edge_values = torch.ones_like(toy_edge_index[0])
toy_valid_edge_values = torch.ones_like(toy_valid_edge_index[0])

nu = 5
ni = 3
embdi = 3
layers = 1 

model = LightGCN(
    num_users=nu, 
    num_items=ni, 
    edge_index=toy_edge_index,
    edge_values=toy_edge_values,
    edge_index_val=toy_valid_edge_index,
    edge_values_val=toy_valid_edge_values,
    num_layers=1, 
    dim_h=embdi
)

In [455]:
# Forward
embf_users, emb0_user, embf_items, emb0_items = model.forward(is_valid=True)

### Parameters

In [465]:
LAMBDA = 1e-6
K_LIST = [1,2]
BATCH_SIZE = 2
EPOCHS = 30
n_samples_interaction = toy_edge_index.shape[1]
n_batches = n_samples_interaction // BATCH_SIZE

1

### Training Loop

In [457]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = LightGCN(
    num_users=nu, 
    num_items=ni, 
    edge_index=toy_edge_index,
    edge_values=toy_edge_values,
    edge_index_val=toy_valid_edge_index,
    edge_values_val=toy_valid_edge_values,
    num_layers=1,
    dim_h=embdi
)
model = model.to(device)
model.edge_index = model.edge_index.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [470]:
# Metrics loss
l_metrics = []

for epoch in range(EPOCHS):
    model.train()
    
    # Let's do a forward pass for all nodes
    # and sample nodes to see if when testing the embedding BPR losses
    # between two positive edges nodes, their embeddings are closer 
    # than a negative pair sampled too
    for _ in range(n_batches):
        # Forward pass
        embf_users, emb0_users, embf_items, emb0_items = model.forward()
        
        # Getting sample indices
        user_indices, pos_item_indices, neg_item_indices = model.sample_mini_batch()
        
        # Applying sample indices
        s_embf_users, s_emb0_users = embf_users[user_indices], emb0_users[user_indices]
        s_embf_items_pos, s_emb0_items_pos = embf_items[pos_item_indices], emb0_items[pos_item_indices]
        s_embf_items_neg, s_emb0_items_neg = embf_items[neg_item_indices], emb0_items[neg_item_indices]
        
        # Loss computation
        train_loss = bpr_loss(
            s_embf_users, s_emb0_users, 
            s_embf_items_pos, s_emb0_items_pos, 
            s_embf_items_neg, s_emb0_items_neg
        )
        
        train_loss.backward()
        optimizer.step()
        
    if epoch % 5 == 0:

        # Precision and recall on validation (generate all items recs)
        l_epoch_metrics = model.get_val_metrics(
            epoch=epoch, 
            topk_recs=model.num_items,
            k_list=[1,2]
        )
        l_metrics.append(l_epoch_metrics)


        # If we want to print Prec@K with K=2, we will select the second item of k_list
        k_print = 2
        idx_k = np.where(np.array(K_LIST)==k_print)[0][0]
        prec = l_epoch_metrics[idx_k][-2]
        rec = l_epoch_metrics[idx_k][-1]

        print(f"Epoch - {epoch}", f"Precision@{k_print} - {prec}", f"Recall@{k_print} - {rec}")     

# df = pd.DataFrame(np.vstack(np.array(l_metrics)), columns=["epoch", "K", "TP", "FP", "P", "precision", "recall"])

Epoch - 0 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 0 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 0 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 0 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 0 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 5 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 5 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 5 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 5 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 5 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 10 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 10 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 10 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 10 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 10 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 15 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 15 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 15 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 15 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 15 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 20 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 20 Precision@2 - 0.5 Recall@2 - 1.0
Epoch - 20 Precision@2 - 0.5 Recall@2 - 1.

Unnamed: 0,epoch,K,TP,FP,P,precision,recall
0,0.0,1.0,3.0,0.0,3.0,1.0,1.0
1,0.0,2.0,3.0,3.0,3.0,0.5,1.0
2,0.0,1.0,3.0,0.0,3.0,1.0,1.0
3,0.0,2.0,3.0,3.0,3.0,0.5,1.0
4,0.0,1.0,3.0,0.0,3.0,1.0,1.0
5,0.0,2.0,3.0,3.0,3.0,0.5,1.0
6,0.0,1.0,3.0,0.0,3.0,1.0,1.0
7,0.0,2.0,3.0,3.0,3.0,0.5,1.0
8,0.0,1.0,3.0,0.0,3.0,1.0,1.0
9,0.0,2.0,3.0,3.0,3.0,0.5,1.0


# get metrics

In [577]:
top_rec_items = torch.tensor([
            [2, 0, 1],
            [0, 1, 2],
            [0, 1, 2],
            [0, 1, 2],
            [1, 0, 2]])


ground_truth = torch.tensor(
    [[0, 0, 1, 1, 2],
     [2, 0, 1, 2, 0]]
)

In [638]:
ndcg_at_k(top_rec_items, ground_truth, 2)

tensor(0.7956)

In [639]:
a = [2.3, 2.1]

In [641]:
a = [(aa,) for aa in a]

In [642]:
a

[(2.3,), (2.1,)]