In [1]:
from math import ceil
import json
import os
import sys
import random
from itertools import combinations


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve, classification_report
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from datasets import link_prediction
from layers import MeanAggregator, LSTMAggregator, MaxPoolAggregator, MeanPoolAggregator
from models import DGNN, AAGNN, EGNNC
from models_variants import EAAGNN, EAACGNN
import models
import utils

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set up arguments for datasets, models and training.
is_train = True
is_test = False
is_val = True
is_debug = False
conf_device = None
hidden_dim = [8]
batch_size = 32
dataset_folder = "ground_truth"

num_layers = len(hidden_dim) + 1


if False and torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'
conf_device = device

# Get the dataset, dataloader and model.
if not is_val and not is_test:
    dataset_args = ('train', num_layers)

if is_val:
    dataset_args = ('val', num_layers)

if is_test:
    dataset_args = ('test', num_layers)

datasets = utils.get_dataset_gcn(dataset_args, dataset_folder, is_debug=is_debug)


2
--------------------------------
Reading edge dataset from Val\P28_10_5
Finished reading data.
Setting up graph.
self.features.shape: torch.Size([1852, 4])
Finished setting up graph.
Setting up examples.
self.mode != 'train'
Finished setting up examples.
Dataset properties:
Mode: val
Number of vertices: 1852
Number of edges: 5514
Number of triangles: 3671
Number of positive/negative datapoints: 412/5102
Number of examples/datapoints: 5514
--------------------------------


In [3]:
# load data
loaders = []
for i in range(len(datasets)):
    loaders.append(DataLoader(dataset=datasets[i], batch_size=batch_size,
                    shuffle=True, collate_fn=datasets[i].collate_wrapper))
                    
loader = DataLoader(dataset=datasets[0], batch_size=batch_size,
                    shuffle=False, collate_fn=datasets[0].collate_wrapper)
for (idx, batch) in enumerate(loader):
    adj, features, edge_features, adj_relative_cos, edges, labels, dist = batch
    break

In [4]:
class TriangularMotifsCNN(nn.Module):
    def __init__(self, num_channels = 4, output_dim = 1, dropout=0.5, device='cpu'):
        super(TriangularMotifsCNN, self).__init__()
        self.conv1 = nn.Conv1d(num_channels, 16, kernel_size=3, stride=1, padding=1).to(device)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1).to(device)
        self.fc1 = nn.Linear(32 * 64, 128, bias=True).to(device)  # Adjust the input size based on your data
        self.fc2 = nn.Linear(128, output_dim, bias=True).to(device)  # Output size is 1  for regression
        self.dropout = dropout
        self.training = True
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        
        print("x shape:",x.shape)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = F.dropout(x, self.dropout, training=self.training)
        print(self.training)
        x = self.fc2(x)
         
        out = x.reshape(-1)
         
        return out

In [9]:
# load model
directory = "models/"
fname = "n1_e1234_egnnc_mlp_entropy_density_drop05_epoch100_hid16_out16_70_30_split_exp_saved_model.pth"
path = os.path.join(directory, fname)
config = dict()
config['hidden_dims'], config['out_dim'] = [16], 16
config['device'] = 'cpu'
config['dropout'] = 0.5
config['task'] = 'link_prediction'

input_dim, hidden_dim,  output_dim = datasets[0].get_dims()[0], config['hidden_dims'][0], config['out_dim']
channel_dim = datasets[0].get_channel()

model = EGNNC(input_dim, hidden_dim, output_dim,
              channel_dim, 3, config['dropout'], config['device'])
model.to(config['device'])


mlp = models.MLPTwoLayers(input_dim=channel_dim*output_dim*4, hidden_dim=output_dim*4, output_dim=1, dropout=0.5)
mlp.to(config["device"])

cnn_classifier = models.TriangularMotifsCNN(num_channels = 4)
cnn_classifier.to(config["device"])

sigmoid = nn.Sigmoid()

criterion = utils.get_criterion(config['task'])

self.features.shape: torch.Size([1852, 4])
input_dims (input dimension) -> self.features.shape[1] = 4


In [10]:

def create_TriangularMotifsCNN_input(features, edges, triangles, device="cpu"):
   
    _u = torch.FloatTensor().to(device)
    _v = torch.FloatTensor().to(device)
    
    _z = torch.FloatTensor().to(device)
    _w = torch.FloatTensor().to(device)
    
    count = 0
    for u, v in edges:
        
        t12 = triangles.get(frozenset((u,v)))
        #print("Triangles: ", t12)
        #print("Edge: ", (u,v))
        if not t12 is None:
            z, w = t12[0], t12[1]
        else:
            #print("------ Padding ------")
            print("Edge: ", (u,v))
            count += 1
            z, w = u, v

        _u = torch.cat((_u, features[u].reshape(1, -1)), dim=0)
        _v = torch.cat((_v, features[v].reshape(1, -1)), dim=0)
        _z = torch.cat((_z, features[int(z)].reshape(1, -1)), dim=0)
        _w = torch.cat((_w, features[int(w)].reshape(1, -1)), dim=0)
        

    if count> 0:
        print("Padding count: ", count)
    
    input_data = torch.stack([_u,_v,_z,_w], dim=1)
    input_data = input_data.permute(0,1,2)
    return input_data

In [11]:
config['lr'] = 1e-3
config['weight_decay'] = 1e-4
config['batch_size'] = 32

optimizer = optim.Adam(model.parameters(), lr=config['lr'],
                            weight_decay=config['weight_decay'])
epochs = 1

#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1500, gamma=0.8)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 10, 15, 20, 25], gamma=0.5) # Epoch decay
model.train()
cnn_classifier.train()
print('--------------------------------')
print('Training.')
for epoch in range(epochs):
    print('Epoch {} / {}'.format(epoch+1, epochs))
    epoch_loss = 0.0
    epoch_roc = 0.0
    epoch_batches = 0
    shuffle = list(range(len(loaders)))
    random.shuffle(shuffle) # Shuffle order of graphs
    for i in shuffle:
        num_batches = int(ceil(len(datasets[i]) / config['batch_size']))
        epoch_batches += num_batches
        graph_roc = 0.0
        running_loss = 0.0
        for (idx, batch) in enumerate(loaders[i]):
            adj, features, edge_features, edges, labels, dist, triangles = batch
            labels = labels.to(device)
            optimizer.zero_grad()

                    # EGNN
            features, edge_features = features.to(device), edge_features.to(device)
            out = model(features, edge_features)

                    # CNN
                    
            cnn_input = create_TriangularMotifsCNN_input(out, edges, triangles, device)
            #print("cnn_input.shape: ", cnn_input.shape)
            cnn_out = cnn_classifier(cnn_input)
            #print("cnn_out.shape: ", cnn_out.shape)
            #print("labels.shape: ", labels.shape)         
            scores = sigmoid(cnn_out)
                        

            loss = criterion(scores, labels.float()) # Loss function for BCE loss
            #loss = utils.get_focal_loss_criterion(scores, labels.float())  # Loss function for Focal Loss 
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                running_loss += loss.item()
                epoch_loss += loss.item()
                if (torch.sum(labels.long() == 0).item() > 0) and (torch.sum(labels.long() == 1).item() > 0):
                    area = roc_auc_score(labels.detach().cpu().numpy(), scores.detach().cpu().numpy())
                    epoch_roc += area
                    graph_roc += area
        running_loss /= num_batches
        print('    Graph {} / {}: loss {:.4f}'.format(
                i+1, len(datasets), running_loss))
        print('    ROC-AUC score: {:.4f}'.format(graph_roc/num_batches))

    scheduler.step()
    print("Epoch avg loss: {}".format(epoch_loss / epoch_batches))
    print("Epoch avg ROC_AUC score: {}".format(epoch_roc / epoch_batches))

print('Finished training.')
print('--------------------------------')

--------------------------------
Training.
Epoch 1 / 1
    Graph 1 / 1: loss 0.3908
    ROC-AUC score: 0.5170
Epoch avg loss: 0.3908496451257281
Epoch avg ROC_AUC score: 0.5169597886335174
Finished training.
--------------------------------


In [12]:
scores, labels 

(tensor([0.0827, 0.0236, 0.0234, 0.0119, 0.0327, 0.0778, 0.0811, 0.0703, 0.0160,
         0.2096], grad_fn=<SigmoidBackward0>),
 tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))

In [38]:
out_norm = (out - out.mean(dim=0)) / out.std(dim=0)
out_norm, out

(tensor([[ 1.1365, -0.4436, -0.9728,  ...,  1.6593, -2.1159,  2.0860],
         [ 1.8839, -1.4095, -1.7701,  ...,  1.6381, -1.9976,  1.9533],
         [ 0.8478, -0.4772, -0.8002,  ...,  1.4776, -2.0392,  2.2021],
         ...,
         [-0.1480,  0.0464,  0.0748,  ...,  0.0928, -0.0188,  0.3521],
         [ 1.0319, -1.1962, -0.9603,  ...,  0.3912,  0.2173,  0.2191],
         [-0.8643,  0.7457,  0.7227,  ..., -0.9609,  0.0982,  0.0715]],
        grad_fn=<DivBackward0>),
 tensor([[ -8.0259,   1.1088,   8.6465,  ...,  -2.8202,   6.0319,  -6.4001],
         [  2.2959,  -4.9667,  -4.3359,  ...,  -2.8708,   6.4991,  -7.0512],
         [-12.0124,   0.8972,  11.4571,  ...,  -3.2548,   6.3346,  -5.8304],
         ...,
         [-25.7646,   4.1905,  25.7066,  ...,  -6.5681,  14.3165, -14.9083],
         [ -9.4710,  -3.6248,   8.8504,  ...,  -5.8540,  15.2491, -15.5608],
         [-35.6560,   8.5886,  36.2573,  ...,  -9.0891,  14.7786, -16.2850]],
        grad_fn=<AddBackward0>))

In [32]:
cnn_input = create_TriangularMotifsCNN_input(out, edges, triangles, device)
print("cnn_input.shape: ", cnn_input.shape)
cnn_out = cnn_classifier(cnn_input)
print("cnn_out.shape: ", cnn_out.shape)

cnn_input.shape:  torch.Size([10, 4, 64])
x shape: torch.Size([10, 2048])
torch.Size([10, 128])
torch.Size([10, 1])
out: torch.Size([10])
cnn_out.shape:  torch.Size([10])


In [2]:
uv = torch.ones(32, 64)
_z = torch.zeros(32, 64)
_w = torch.ones(32, 64)


input_data = torch.stack([uv, _z], dim=1)
#input_data = input_data.view(-1, 2, 16, 16)
input_data = input_data.permute(0,1,2)

In [3]:
input_data.shape

torch.Size([32, 2, 64])

In [None]:
node_repr1 = torch.randn(32, 192)
node_repr2 = torch.randn(32, 192)
triangle_repr1 = torch.randn(32, 192)
triangle_repr2 = torch.randn(32, 192)

# Concatenate the node representations and reshape into a 4-channel input for the CNN
input_data = torch.stack([node_repr1, node_repr2, triangle_repr1, triangle_repr2], dim=1)
input_data = input_data.view(-1, 4*3, 8, 8)

input_data.shape

In [11]:
features = torch.Tensor([
    [1, 2, 3, 4],
    [1, 1, 1, 1],
    [1, 2, 2, 1]
])

weight0 = torch.Tensor([
    [1, 1, 1, 1, 1],
    [1, 2, 3, 4, 5],
    [2, 2, 2, 2, 2],
    [5, 4, 3, 2, 1]
])

weight00 = torch.randn(4, 3)

weight1 = torch.Tensor([
    [1, 1, 1, 1, 1],
    [0, 0, 0, 0, 0],
    [2, 2, 2, 2, 2],
    [0, 0, 0, 0, 0]
])

edge_features = torch.Tensor([
    [
        [1, 2, 3],
        [3, 2, 1],
        [2, 2, 2]
    ],
    [
        [1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]
    ]
])

In [12]:
x0 = torch.matmul(features, weight0)
x1 = torch.matmul(features, weight1)
x2 = torch.matmul(edge_features, x0)
output = torch.cat([xi for xi in x2], dim=1)

In [24]:
x00 = torch.matmul(features, weight00)
x10 = torch.matmul(features, weight00)
x20 = torch.matmul(edge_features, x00)
output0 = torch.cat([xi for xi in x20], dim=0)


In [7]:
output

tensor([[ 83.,  84.,  85.,  86.,  87.,  50.,  49.,  48.,  47.,  46.],
        [117., 112., 107., 102.,  97., 100.,  98.,  96.,  94.,  92.],
        [100.,  98.,  96.,  94.,  92., 150., 147., 144., 141., 138.]])

In [8]:
x0

tensor([[29., 27., 25., 23., 21.],
        [ 9.,  9.,  9.,  9.,  9.],
        [12., 13., 14., 15., 16.]])

In [9]:
x1

tensor([[7., 7., 7., 7., 7.],
        [3., 3., 3., 3., 3.],
        [5., 5., 5., 5., 5.]])

In [10]:
x2

tensor([[[ 83.,  84.,  85.,  86.,  87.],
         [117., 112., 107., 102.,  97.],
         [100.,  98.,  96.,  94.,  92.]],

        [[ 50.,  49.,  48.,  47.,  46.],
         [100.,  98.,  96.,  94.,  92.],
         [150., 147., 144., 141., 138.]]])

In [16]:
x2 + x1

tensor([[[ 90.,  91.,  92.,  93.,  94.],
         [120., 115., 110., 105., 100.],
         [105., 103., 101.,  99.,  97.]],

        [[ 57.,  56.,  55.,  54.,  53.],
         [103., 101.,  99.,  97.,  95.],
         [155., 152., 149., 146., 143.]]])

In [None]:
p = utils.parse_args()

In [17]:
a = torch.Tensor([
    [1, 2, 3],
    [2, 3, 4],
    [3, 4, 5]
])

In [18]:
a.reshape(-1, 3, 3)

tensor([[[1., 2., 3.],
         [2., 3., 4.],
         [3., 4., 5.]]])