Next, we clone the public Github code that will help us download the data and do some preprocessing. We move the required files outside of the cloned folder to use them later

In [1]:
import shutil
import os
files_to_move = ['util_functions.py', 'data_utils.py', 'preprocessing.py']
for f in files_to_move:
  if not os.path.exists(f):
    shutil.move(os.path.join('IGMC', f), f)

Next, load the required torch and torch_geometric libraries. In addition, we load a few useful functions from the GitHub code that we've cloned above.



In [2]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.data import DataLoader
from torch_geometric.nn import RGCNConv, GCNConv
from torch_geometric.utils import dropout_adj
from util_functions import *
from data_utils import *
from preprocessing import *

from sklearn.metrics import precision_score, ndcg_score
import numpy as np


In [3]:
import warnings

warnings.simplefilter("ignore", UserWarning)

Define the variables: learning rate, epochs, and batch size.
LR_DECAY_STEP and LR_DECAY_VALUE help decrease the learning rate over time to improve the training process/
In the original experiment, I've trained the model for 80 epochs, here replacing it by 5 for the code to run fast.

In [12]:
# Arguments
EPOCHS=10
BATCH_SIZE=50
LR=1e-3
LR_DECAY_STEP = 20
LR_DECAY_VALUE = 10

Define a seed, it will help with the reporoducibility of the results. In addition, define a device (cpu vs. cuda)

In [5]:
torch.manual_seed(123)
device = torch.device('cpu')
if torch.cuda.is_available():
    torch.cuda.manual_seed(123)
    torch.cuda.synchronize()
    device = torch.device('cuda')
device

device(type='cpu')

Use the code from the GitHub to download and clean the MovieLens 100k dataset

In [6]:
(u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, val_labels,
val_u_indices, val_v_indices, test_labels, test_u_indices, test_v_indices, class_values
) = load_official_trainvaltest_split('ml_100k', testing=True)

Downloading ml_100k dataset
User features shape: (943, 23)
Item features shape: (1682, 18)


In [7]:
print

<function print(*args, sep=' ', end='\n', file=None, flush=False)>

Next, we use the predefined code from the Github to extract an enclosing subgraph for a given graph G. This step was described in details in the section 2 of the Medium Blogpost.

In [8]:
train_dataset = eval('MyDynamicDataset')(root='data/ml_100k/testmode/train', A=adj_train,
    links=(train_u_indices, train_v_indices), labels=train_labels, h=1, sample_ratio=1.0,
    max_nodes_per_hop=200, u_features=None, v_features=None, class_values=class_values)


print(train_dataset.links)

(array([ 63, 492,  86, ..., 552, 654, 143]), array([ 435,  263,  577, ...,  495, 1559,  212]))


In [9]:
print(train_labels.shape)

(80000,)


In [11]:
test_dataset = eval('MyDataset')(root='data/ml_100k/testmode/test', A=adj_train,
    links=(test_u_indices, test_v_indices), labels=test_labels, h=1, sample_ratio=1.0,
    max_nodes_per_hop=200, u_features=None, v_features=None, class_values=class_values)

len(test_dataset)

Enclosing subgraph extraction begins...


Processing...
100%|██████████| 40/40 [00:07<00:00,  5.65it/s]


Time elapsed for subgraph extraction: 7.31951904296875s
Transforming to pytorch_geometric graphs...


100%|██████████| 20000/20000 [00:02<00:00, 8079.68it/s] 


Time elapsed for transforming to pytorch_geometric graphs: 2.4783411026000977s


 88%|████████▊ | 17604/20000 [00:21<00:00, 11661.92it/s]Done!


20000

In [13]:
print(test_u_indices.shape)

(20000,)


In [14]:
print(test_dataset.links)

(array([  0,   0,   0, ..., 458, 459, 461]), array([  5,   9,  11, ..., 933,   9, 681]))


Now, we define the IGMC model architecture. It consists of several steps:

1.  Optionally add the graph-level dropout layer. It randomly drops edges from the graph, helping avoid overfitting and making the model more robust.
2. The message passing layer that extracts node information for each node in the subgraph. As proposed in the table, we implement it using R-GCN layer to handle different edge types.
3. Pass it through the tanh non-linearity
4. We stack the outputs of step 2 and 3 at each message passing layer
5. Concatenate the node representations at each layer in the final node representation h.
6. Pull the graph level features g by concatenating target user and item representations.
7. Add a linear layer, ReLU non-linearity, Dropout to avoid overfitting, and final linear layer

All the model parameters were chosen following the IGMC paper.



In [15]:
class IGMC(torch.nn.Module):
    def __init__(self):
        super(IGMC, self).__init__()
        self.rel_graph_convs = torch.nn.ModuleList()
        self.rel_graph_convs.append(RGCNConv(in_channels=4, out_channels=32, num_relations=5, num_bases=4))
        self.rel_graph_convs.append(RGCNConv(in_channels=32, out_channels=32, num_relations=5, num_bases=4))
        self.rel_graph_convs.append(RGCNConv(in_channels=32, out_channels=32, num_relations=5, num_bases=4))
        self.rel_graph_convs.append(RGCNConv(in_channels=32, out_channels=32, num_relations=5, num_bases=4))
        self.linear_layer1 = Linear(256, 128)
        self.linear_layer2 = Linear(128, 1)

    def reset_parameters(self):
        self.linear_layer1.reset_parameters()
        self.linear_layer2.reset_parameters()
        for i in self.rel_graph_convs:
            i.reset_parameters()

    def forward(self, data):
        num_nodes = len(data.x)
        edge_index_dr, edge_type_dr = dropout_adj(data.edge_index, data.edge_type, p=0.2, num_nodes=num_nodes, training=self.training)

        out = data.x
        h = []
        for conv in self.rel_graph_convs:
            out = conv(out, edge_index_dr, edge_type_dr)
            out = torch.tanh(out)
            h.append(out)
        h = torch.cat(h, 1)
        h = [h[data.x[:, 0] == True], h[data.x[:, 1] == True]]
        g = torch.cat(h, 1)
        out = self.linear_layer1(g)
        out = F.relu(out)
        out = F.dropout(out, p=0.5, training=self.training)
        out = self.linear_layer2(out)
        out = out[:,0]
        return out

model = IGMC()

Use a DataLoader to prepare train and test data batches

In [16]:
train_loader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, BATCH_SIZE, shuffle=False, num_workers=2)

Make sure model is using GPU. Reset the model parameters and define the optimizer. We are using Adam optimizer here

In [17]:
model.to(device)
model.reset_parameters()
optimizer = Adam(model.parameters(), lr=LR, weight_decay=0)


Train the model for number of epochs defined at the beginning.
At each epoch we predict the labels for the batch, find the training MSE loss, do the backpropagation step and update the learnable parameters. Print the training loss at each epoch.

After each LR_DECAY_STEP we decrease the learning rate by a factor of LR_DECAY_VALUE.

In [18]:
for epoch in range(1, EPOCHS+1):
    model.train()
    train_loss_all = 0
    for train_batch in train_loader:
        optimizer.zero_grad()
        train_batch = train_batch.to(device)
        y_pred = model(train_batch)
        y_true = train_batch.y
        train_loss = F.mse_loss(y_pred, y_true)
        train_loss.backward()
        train_loss_all += BATCH_SIZE * float(train_loss)
        optimizer.step()
        torch.cuda.empty_cache()
    train_loss_all = train_loss_all / len(train_loader.dataset)

    print('epoch', epoch,'; train loss', train_loss_all)

    if epoch % LR_DECAY_STEP == 0:
      for param_group in optimizer.param_groups:
          param_group['lr'] = param_group['lr'] / LR_DECAY_VALUE

epoch 1 ; train loss 1.2406210604310035
epoch 2 ; train loss 1.0620688022300602
epoch 3 ; train loss 1.0222321453318
epoch 4 ; train loss 0.979115405548364
epoch 5 ; train loss 0.949212177824229
epoch 6 ; train loss 0.9278120959922671
epoch 7 ; train loss 0.9117765583470464
epoch 8 ; train loss 0.8991473077610135
epoch 9 ; train loss 0.8858545563556254
epoch 10 ; train loss 0.8802188859693706


In [19]:
print(y_pred.shape)

torch.Size([50])


Assess the performance of the model using the test set by predicting the labels and finding a MSE loss

In [20]:
model.eval()

test_loss = 0
all_y_true = []
all_y_pred = []

for test_batch in test_loader:
    test_batch = test_batch.to(device)
    with torch.no_grad():
        y_pred = model(test_batch)
    y_true = test_batch.y
    test_loss += F.mse_loss(y_pred, y_true, reduction='sum').item()

    all_y_true.append(y_true.cpu().numpy())
    all_y_pred.append(y_pred.cpu().numpy())

mse_loss = test_loss / len(test_loader.dataset)

all_y_true = np.concatenate(all_y_true)
all_y_pred = np.concatenate(all_y_pred)



In [21]:
print((all_y_pred).astype('int'))

[3 3 3 ... 4 4 4]


In [22]:
print((all_y_true).astype('int'))

[5 3 3 ... 5 3 5]


In [24]:
import collections

def personalized_topk(pred, K, user_indices, edge_index):

    user_item_pairs = set(zip(edge_index[0], edge_index[1]))

    per_user_preds = collections.defaultdict(list)
    for index, user in enumerate(user_indices):
        per_user_preds[user.item()].append(pred[index].item())

    precisions = 0.0
    recalls = 0.0
    num_users = len(per_user_preds) 

    for user, preds in per_user_preds.items():
        while len(preds) < K:
            preds.append(random.choice(range(max(edge_index[1]) + 1)))
        top_ratings, top_items = torch.topk(torch.tensor(preds), K)
        items_indices = top_items.tolist()

        correct_preds = sum((user, item) in user_item_pairs for item in items_indices)
        total_pos = sum(1 for item in range(max(edge_index[1]) + 1) if (user, item) in user_item_pairs)

        precisions += correct_preds / K
        recalls += correct_preds / total_pos if total_pos != 0 else 0

    return precisions / num_users, recalls / num_users


In [25]:
precision_score, recall_score = personalized_topk(all_y_pred,10,test_u_indices,test_dataset.links)

In [26]:
precision_score

0.0673202614379086

In [60]:
import collections
import random
import torch

def personalized_topk_metrics(pred, K, user_indices, edge_index):

    user_item_pairs = set(zip(edge_index[0], edge_index[1]))

    per_user_preds = collections.defaultdict(list)
    for index, user in enumerate(user_indices):
        per_user_preds[user.item()].append((pred[index].item(), index))

    ndcgs = []
    precisions = []
    recalls = []
    num_users = len(per_user_preds) 

    for user, user_preds in per_user_preds.items():

        sorted_preds = sorted(user_preds, key=lambda x: x[0], reverse=True)

        top_k_preds = sorted_preds[:K]
        if len(top_k_preds) < K:
            top_k_preds.extend([(0, random.choice(range(max(edge_index[1]) + 1))) for _ in range(K - len(top_k_preds))])

        dcg = 0.0
        correct_preds = 0
        for i, (_, item_index) in enumerate(top_k_preds):
            if (user, item_index) in user_item_pairs:
                dcg += 1 / torch.log2(torch.tensor(i + 2)).item()  # log base 2 of (i+1+1)
                correct_preds += 1

        # Calculate IDCG
        relevant_items = [item for item in range(max(edge_index[1]) + 1) if (user, item) in user_item_pairs]
        idcg = 0.0
        for i in range(min(K, len(relevant_items))):
            idcg += 1 / torch.log2(torch.tensor(i + 2)).item()

        # Calculate NDCG
        ndcg = dcg / idcg if idcg > 0 else 0
        ndcgs.append(ndcg)

        # Calculate precision and recall
        total_pos = len(relevant_items)
        precision = correct_preds / K if K > 0 else 0
        recall = correct_preds / total_pos if total_pos > 0 else 0
        precisions.append(precision)
        recalls.append(recall)

    # Return the average NDCG, precision, and recall
    average_ndcg = sum(ndcgs)
    average_precision = sum(precisions) / num_users if precisions else 0
    average_recall = sum(recalls) / num_users if recalls else 0

    return average_ndcg


In [61]:
ndcg_score = personalized_topk_metrics(all_y_pred.astype('int'), 10, test_u_indices.astype('int'), test_dataset.links)


 88%|████████▊ | 17604/20000 [1:47:56<14:41,  2.72it/s] 


In [63]:
print(ndcg_score)

0.037239678818512076
