<a href="https://colab.research.google.com/github/SytzeAndr/NGCF_RP32/blob/master/NGCF_take2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import csv
import scipy.sparse as sp

from pathlib import Path

In [102]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Loading

In [0]:
# real data
# path = Path('./drive/My Drive/RP_data/backup')
# toy data
path = Path('./drive/My Drive/RP_data/toy_data')
train_file = path/'train.txt'
test_file = path/'test.txt'

class DataLoader:
  def __init__(self, file, batch_size):
    self.file = file
    self.batch_size = batch_size
    self.n_users, self.n_items, self.n_data = 0, 0, 0
    self.users = []
    self.pos_items = {}
    self.neg_items = {}
    self.load()

  def load(self):
    with open(self.file) as f:
      for l in f.readlines():
        if len(l) == 0: break
        l = l.strip('\n').split(' ')
        uid = int(l[0])
        items = [int(i) for i in l[1:]]
        self.users.append(uid)
        self.n_items = max(self.n_items, max(items))
        self.n_users = max(self.n_users, uid)
        self.n_data += len(items)
        self.pos_items[uid] = items
    self.n_users += 1
    self.n_items += 1

    # R is the Rating matrix in Dict Of Keys form, either 1. or 0. for each (user, item) pair
    self.R = sp.dok_matrix((self.n_users, self.n_items), dtype=np.float32)
    for u in self.users:
      for i in self.pos_items[u]:
        self.R[u, i] = 1.

  def compute_norm_adj_matrix(self, adj):
    # rowsum = out-degree of the node    
    rowsum = np.array(adj.sum(1))
    # inverted and set to 0 if no connections
    d_inv = np.power(rowsum, -1).flatten()
    d_inv[np.isinf(d_inv)] = 0.
    # sparse diagonal matrix with the normalizing factors in the diagonal
    d_mat_inv = sp.diags(d_inv)
    # dot product resulting in a row-normalised version of the input matrix
    norm_adj = d_mat_inv.dot(adj)
    return norm_adj.tocoo()
  
  def compute_adj_matrix(self):
    # A is the Adjecency matrix in Dict Of Keys form, used when computing the Laplacian norm
    A = sp.dok_matrix((self.n_users + self.n_items, self.n_users + self.n_items), dtype=np.float32).tolil()
    A[:self.n_users, self.n_users:] = self.R.tolil()
    A[self.n_users:, :self.n_users] = self.R.tolil().T
    A = A.todok()

    # norm_adj = self.compute_norm_adj_matrix(A + sp.eye(A.shape[0]))
    mean_adj = self.compute_norm_adj_matrix(A)
    # L is the Laplacian used for normalizing message construction
    # ngcf_adj = mean_adj + sp.eye(mean_adj.shape[0])
    self.adj_matrix = mean_adj + sp.eye(mean_adj.shape[0])

  def sample_pos(self, u, amount):
    # Sample a batch of <amount> positive items for user u
    high = len(self.pos_items[u])
    pos_sample = []
    while len(pos_sample) < amount:
      id = np.random.randint(low=0, high=high, size=1)[0]
      item = self.pos_items[u][id]
      if item not in pos_sample:
        pos_sample.append(item)
    return pos_sample

  def sample_neg(self, u, amount):
    # Sample a batch of <amount> negative items for user u
    high = self.n_items
    neg_sample = []
    while len(neg_sample) < amount:
      item = np.random.randint(low=0, high=high, size=1)[0]
      if item not in self.pos_items[u] and item not in neg_sample:
        neg_sample.append(item)
    return neg_sample

  def sample(self):
    # Sample a batch of batch_size users, each with a positive and negative item
    users = np.random.choice(self.users, size=self.batch_size)
    pos_sample, neg_sample = [], []
    for u in users:
      pos_sample += self.sample_pos(u, 1)
      neg_sample += self.sample_neg(u, 1)
    return users, pos_sample, neg_sample

train_data = DataLoader(train_file, batch_size=1024)
train_data.compute_adj_matrix()

In [0]:
test_data = DataLoader(test_file, batch_size=1024)

# Optimization

In [0]:
import torch
from torch.nn import init, LeakyReLU, Linear, Module, ModuleList, Parameter
import torch.nn.functional as F

class NGCF(Module):
  def __init__(self, n_users, n_items, embed_size, n_layers, adj_matrix):
    super().__init__()
    self.n_users = n_users
    self.n_items = n_items
    self.embed_size = embed_size
    self.n_layers = n_layers
    self.adj_matrix = adj_matrix

    # The (user/item)_embeddings are the initial embedding matrix E
    self.user_embeddings = Parameter(torch.rand(n_users, embed_size))
    self.item_embeddings = Parameter(torch.rand(n_items, embed_size))
    # The (user/item)_embeddings_final are the final concatenated embeddings [E_1..E_L]
    # Stored for easy tracking of final embeddings throughout optimization and eval
    self.user_embeddings_final = Parameter(torch.zeros((n_users, embed_size * (n_layers + 1))))
    self.item_embeddings_final = Parameter(torch.zeros((n_items, embed_size * (n_layers + 1))))

    # The linear transformations for each layer
    self.W1 = ModuleList([Linear(self.embed_size, self.embed_size) for _ in range(0, self.n_layers)])
    self.W2 = ModuleList([Linear(self.embed_size, self.embed_size) for _ in range(0, self.n_layers)])

    self.act = LeakyReLU()
    
    # Initialize each of the trainable weights with the Xavier initializer
    self.init_weights()

  def init_weights(self):
    for name, parameter in self.named_parameters():
      if ('bias' not in name):
        init.xavier_uniform_(parameter)

  def split_mtx(self, X, n_folds=100):
    # Split a matrix/Tensor into n_folds (for the user embeddings and the R matrices)
    X_folds = []
    fold_len = X.shape[0]//n_folds
    for i in range(n_folds):
      start = i * fold_len
      if i == n_folds -1:
        end = X.shape[0]
      else:
        end = (i + 1) * fold_len
      X_folds.append(X[start:end])
    return X_folds

  def to_sparse_tensor(self, X):
    coo = X.tocoo().astype(np.float32)
    i = torch.LongTensor(np.mat((coo.row, coo.col)))
    v = torch.FloatTensor(coo.data)
    return torch.sparse.FloatTensor(i, v, coo.shape).cuda()

  def compute_loss(self, batch_user_emb, batch_pos_emb, batch_neg_emb):
    pos_y = torch.mul(batch_user_emb, batch_pos_emb).sum(dim=1)
    neg_y = torch.mul(batch_user_emb, batch_neg_emb).sum(dim=1)
    # Unregularized loss
    bpr_loss = -(torch.log(torch.sigmoid(pos_y - neg_y))).mean()
    return bpr_loss

  def forward(self, u, i, j):
    adj_splits = self.split_mtx(self.adj_matrix)
    embeddings = torch.cat((self.user_embeddings, self.item_embeddings))
    final_embeddings = [embeddings]

    for l in range(self.n_layers):
      embedding_parts = []
      for part in adj_splits:
        embedding_parts.append(torch.sparse.mm(self.to_sparse_tensor(part), embeddings))
      # Message construction
      # t1_embeddings = torch.sparse.mm(self.to_sparse_tensor(self.adj_matrix), embeddings)
      t1_embeddings = torch.cat(embedding_parts, 0)
      t1 = self.W1[l](t1_embeddings)
      t2_embeddings = embeddings.mul(t1_embeddings)
      t2 = self.W2[l](t2_embeddings)

      # Message aggregation
      embeddings = self.act(t1 + t2)
      normalized_embeddings = F.normalize(embeddings, p=2, dim=1)
      final_embeddings.append(normalized_embeddings)

    # Make sure to update the (user/item)_embeddings(_final)
    final_embeddings = torch.cat(final_embeddings, 1)
    final_u_embeddings, final_i_embeddings = final_embeddings.split((self.n_users, self.n_items), 0)
    self.user_embeddings_final = Parameter(final_u_embeddings)
    self.item_embeddings_final = Parameter(final_i_embeddings)

    batch_user_emb = final_u_embeddings[u]
    batch_pos_emb = final_i_embeddings[i]
    batch_neg_emb = final_i_embeddings[j]

    return self.compute_loss(batch_user_emb, batch_pos_emb, batch_neg_emb)

In [0]:
# optional: restore previously trained model
def restore_model(path):
  model = NGCF(n_users=train_data.n_users, n_items=train_data.n_items, embed_size=64, n_layers=2, adj_matrix=adj_matrix).to(device)
  model.load_state_dict(torch.load(model_path))

In [110]:
from time import time

# path to save model to
models_path = path/'models'

# norm_adj, mean_adj, ngcf_adj = train_data.compute_adj_matrix()
device = torch.device('cuda')
model = NGCF(n_users=train_data.n_users, n_items=train_data.n_items, embed_size=64, n_layers=2, adj_matrix=train_data.adj_matrix).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
n_epochs = 10

model.train()
n_batch = train_data.n_data // train_data.batch_size + 1

def compute_ndcg(top_items, test_items, test_indices, k):
  ratings = (test_items * top_items).gather(1, test_indices)
  norm = torch.from_numpy(np.log2(np.arange(2, k+2))).float().to(device)
  dcg = (ratings / norm).sum(1)
  dcg_max = (torch.sort(ratings, dim=1, descending=True)[0] / norm).sum(1)
  ndcg = dcg / dcg_max
  ndcg[torch.isnan(ndcg)] = 0
  return ndcg

def evaluate(user_embeddings, item_embeddings, k):
  user_parts = model.split_mtx(user_embeddings)
  train_parts = model.split_mtx(train_data.R)
  test_parts = model.split_mtx(test_data.R)

  recall_parts, ndcg_parts = [], []

  for user_part, train_part, test_part in zip(user_parts, train_parts, test_parts):

    # Get the prediction scores for the users and items as a cuda float
    non_train_items = torch.from_numpy(1 - (train_part.todense())).float().to(device)
    predictions = torch.mm(user_part, item_embeddings.t()) * non_train_items
    # Get the k highest scores, scatter them as a float tensor in the GPU
    _, test_indices = torch.topk(predictions, dim=1, k=k)
    top_items = torch.zeros_like(predictions).float()
    top_items.scatter_(dim=1, index=test_indices, src=torch.tensor(1.0).to(device))
  
    test_items = torch.from_numpy(test_part.todense()).float().to(device)
    TP = (test_items * top_items).sum(1)
    recall = TP / test_items.sum(1)
    ndcg = compute_ndcg(top_items, test_items, test_indices, k)
  
    recall_parts.append(recall)
    ndcg_parts.append(ndcg)

  mean_recall = torch.cat(recall_parts).mean()
  mean_ndcg = torch.cat(ndcg_parts).mean()
  print('Recall:\t' + str(mean_recall.item()))
  print('NDCG\t:' + str(mean_ndcg.item()))

def save_state(model, optimizer, epoch):
  torch.save(model.state_dict(), models_path/'model1.pt')
  torch.save(optimizer.state_dict(), models_path/'optimizer.pt')
  torch.save(torch.IntTensor(epoch), models_path/'epoch.pt')

def train(model, data, t):
  total_loss = 0
  start = time()
  timings = []
  for b in range(10):
    batch_start = time()
    u, i, j = data.sample()
    u = torch.from_numpy(u).long().to(device)
    i = torch.LongTensor(i).to(device)
    j = torch.LongTensor(j).to(device)
    optimizer.zero_grad()
    loss = model(u, i, j)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
    timings.append(time()-batch_start)

  avg_batch = np.average(timings)
  print('Finished epoch ' + str(t+1) + ' in\t' + str(time()-start) + ' sec')
  print('Total BPR loss:\t\t' + str(total_loss))
  print('Average batch time:\t' + str(avg_batch))

for t in range(n_epochs):
  print('Starting epoch: ' + str(t+1))
  train(model, train_data, t)
  save_state(model, optimizer, t)
  evaluate(model.user_embeddings_final.detach(), model.item_embeddings_final.detach(), k=20)
  print('\n============\n')





Starting epoch: 1
Finished epoch 1 in	2.793128490447998 sec
Total BPR loss:		6.303795278072357
Average batch time:	0.2793026685714722
Recall:	0.007790080271661282
NDCG	:0.022036591544747353


Starting epoch: 2
Finished epoch 2 in	2.8095545768737793 sec
Total BPR loss:		5.715576708316803
Average batch time:	0.28094685077667236
Recall:	0.00840833317488432
NDCG	:0.019862934947013855


Starting epoch: 3
Finished epoch 3 in	2.8440659046173096 sec
Total BPR loss:		5.492582559585571
Average batch time:	0.28439621925354003
Recall:	0.01031746156513691
NDCG	:0.02105947583913803


Starting epoch: 4
Finished epoch 4 in	2.808786630630493 sec
Total BPR loss:		5.352205455303192
Average batch time:	0.2808708906173706
Recall:	0.0096726194024086
NDCG	:0.0199887678027153


Starting epoch: 5
Finished epoch 5 in	2.775261878967285 sec
Total BPR loss:		5.235935628414154
Average batch time:	0.27751827239990234
Recall:	0.009200001135468483
NDCG	:0.019457748159766197


Starting epoch: 6
Finished epoch 6 in	2.83

# Evaluation

tensor(0.0072, device='cuda:0')
tensor(0.0150, device='cuda:0')


In [0]:
def split_mtx(X, n_folds=100):
  # Split a matrix/Tensor into n_folds (for the user embeddings and the R matrices)
  X_folds = []
  fold_len = X.shape[0]//n_folds
  for i in range(n_folds):
    start = i * fold_len
    if i == n_folds -1:
      end = X.shape[0]
    else:
      end = (i + 1) * fold_len
    X_folds.append(X[start:end])
  return X_folds

def ndcg_at_k_gpu(pred_items, test_items, test_indices, k):
  # to calculate ndcg@k
  r = (test_items * pred_items).gather(1, test_indices)
  f = torch.from_numpy(np.log2(np.arange(2, k+2))).float().cuda()
  dcg = (r[:, :k]/f).sum(1)
  dcg_max = (torch.sort(r, dim=1, descending=True)[0][:, :k]/f).sum(1)
  ndcg = dcg/dcg_max
  ndcg[torch.isnan(ndcg)] = 0
  return ndcg

def test_GPU(u_emb, i_emb, Rtr, Rte, Ks):
  ue_folds = split_mtx(u_emb)
  tr_folds = split_mtx(Rtr)
  te_folds = split_mtx(Rte)

  fold_prec, fold_rec, fold_ndcg, fold_hr = \
    defaultdict(list), defaultdict(list), defaultdict(list), defaultdict(list)
  for ue_f, tr_f, te_f in zip(ue_folds, tr_folds, te_folds):
    scores = torch.mm(ue_f, i_emb.t())
    test_items = torch.from_numpy(te_f.todense()).float().cuda()
    non_train_items = torch.from_numpy(1-(tr_f.todense())).float().cuda()
    scores = scores * non_train_items
    _, test_indices = torch.topk(scores, dim=1, k=max(Ks))
    pred_items = torch.zeros_like(scores).float()
    pred_items.scatter_(dim=1,index=test_indices,src=torch.tensor(1.0).cuda())

    for k in Ks:
      topk_preds = torch.zeros_like(scores).float()
      topk_preds.scatter_(dim=1,index=test_indices[:, :k],src=torch.tensor(1.0))

      TP = (test_items * topk_preds).sum(1)
      prec = TP/k
      rec = TP/test_items.sum(1)
      hit_r = (TP > 0).float()
      ndcg = ndcg_at_k_gpu(pred_items, test_items, test_indices, k)

      fold_prec[k].append(prec)
      fold_rec[k].append(rec)
      fold_ndcg[k].append(ndcg)
      fold_hr[k].append(hit_r)

  result = {'precision': [], 'recall': [], 'ndcg': [], 'hit_ratio': []}
  for k in Ks:
    result['precision'].append(torch.cat(fold_prec[k]).mean())
    result['recall'].append(torch.cat(fold_rec[k]).mean())
    result['ndcg'].append(torch.cat(fold_ndcg[k]).mean())
    result['hit_ratio'].append(torch.cat(fold_hr[k]).mean())
  return result

def early_stopping(log_value, best_value, stopping_step, expected_order='asc', patience=10):
  # better is higher or lower
  assert expected_order in ['asc', 'dec']
  if (expected_order == 'asc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value):
    stopping_step = 0
    best_value = log_value
  else:
    stopping_step += 1
  if stopping_step >= patience:
    print("Early stopping is trigger at step: {} log:{}".format(patience, log_value))
    should_stop = True
  else:
    should_stop = False
  return best_value, stopping_step, should_stop



In [0]:
# with early stopping
print_every, eval_every, save_every = 1, 1, 10
Ks = [10, 20]

model = NGCF(n_users=1000, n_items=2000, embed_size=64, n_layers=2, adj_matrix=train_data.L)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
n_epochs = 20

model.train()
n_batch = train_data.n_data // train_data.batch_size + 1
print('Total batches: ' + str(n_batch))

cur_best_loss, stopping_step, should_stop = 1e3, 0, False

for t in range(n_epochs):
  epoch_loss = train(model, train_data)
  print(str(t) + ': ' + str(epoch_loss))
  if epoch % eval_every  == (eval_every - 1):
    result = test_GPU(model.user_embeddings_final.detach(), model.item_embeddings_final.detach(), train_data.R, test_data.R, Ks)
    log_value = result['recall'][0]
    cur_best_metric, stopping_step, should_stop = early_stopping(log_value, cur_best_metric, stopping_step, args.patience)
  if should_stop == True: 
    break
  

In [0]:
def recall_at_k(pred, k, interactions):
  pred = np.asfarray(pred)[:k]
  return np.sum(pred) / interactions

def dcg_at_k(pred, k):
  pred = np.asfarray(pred)[:k]
  return np.sum(pred / np.log2(np.arange(2, pred.size + 2)))

def ndcg_at_k(pred, k):
  max_dcg = dcg_at_k(sorted(pred, reverse=True), k)
  return dcg_at_k(pred, k) / max_dcg

In [0]:
r = np.random.choice(2, 20, p=[0.7, 0.3])
k = 10
interactions = 20
print(recall_at_k(r, k, interactions))
print(ndcg_at_k(r, k))

0.2
0.3995166168199184
