In [36]:
!python cluster_experiments.py --model_name 'seal' --tgm_type 'Planetoid' --name 'cora' --epochs 10

Output: AUC
Epoch: 00, Loss: 0.6941, Valid: 58.81%, Test: 54.64%
AUC
Epoch: 01, Loss: 0.6928, Valid: 63.00%, Test: 59.05%
AUC
Epoch: 02, Loss: 0.6904, Valid: 65.45%, Test: 63.14%
AUC
Epoch: 03, Loss: 0.6858, Valid: 66.56%, Test: 65.85%
AUC
Epoch: 04, Loss: 0.6712, Valid: 67.09%, Test: 67.34%
AUC
Epoch: 05, Loss: 0.6530, Valid: 67.64%, Test: 68.08%
AUC
Epoch: 06, Loss: 0.6415, Valid: 67.99%, Test: 68.30%
AUC
Epoch: 07, Loss: 0.6332, Valid: 68.30%, Test: 68.61%
AUC
Epoch: 08, Loss: 0.6220, Valid: 68.57%, Test: 68.92%
AUC
Epoch: 09, Loss: 0.6128, Valid: 69.10%, Test: 69.27%

  if osp.exists(f) and torch.load(f) != _repr(self.pre_transform):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_filter):
  return torch.load(f, map_location)
  self.data, self.slices = torch.load(self.processed_paths[0])



In [5]:
import os.path as osp
import os
import pickle
import torch
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
import argparse
import scipy.sparse as ssp
from tqdm import tqdm
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import DataLoader
from models.train_gnn import train_and_predict
from torch_geometric import seed_everything
from utils.seal_utils import *
from utils.data_utils import data_loader
from utils.seal_datasets import SEALDataset
from models.gcn import GCN, GCN_woBatch
from torch_geometric.utils.convert import to_networkx

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

from scipy.sparse import SparseEfficiencyWarning
warnings.simplefilter('ignore', SparseEfficiencyWarning)

def train_wBatch():
    model.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        x = data.x
        edge_weight = None
        node_id = data.node_id if emb else None
        logits = model(data.z, data.edge_index, data.batch, x, edge_weight, node_id)
        loss = BCEWithLogitsLoss()(logits.view(-1), data.y.to(torch.float))
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs

    return total_loss / len(train_dataset)

def train():
    model.train()

    optimizer.zero_grad()
    x = train_dataset.x 
    edge_weight =  None
    node_id = train_dataset.node_id if emb else None
    logits = model(train_dataset.z, train_dataset.data.edge_label_index, x, edge_weight, node_id)
    loss = BCEWithLogitsLoss()(logits.view(-1), train_dataset.edge_label.to(torch.float))
    loss.backward()
    optimizer.step()

    return loss


@torch.no_grad()
def test_wBatch():
    model.eval()

    y_pred, y_true = [], []
    for data in val_loader:
        data = data.to(device)
        x = data.x 
        edge_weight = None
        node_id = data.node_id if emb else None
        logits = model(data.z, data.edge_index, data.batch, x, edge_weight, node_id)
        y_pred.append(logits.view(-1).cpu())
        y_true.append(data.y.view(-1).cpu().to(torch.float))
    val_pred, val_true = torch.cat(y_pred), torch.cat(y_true)
    pos_val_pred = val_pred[val_true==1]
    neg_val_pred = val_pred[val_true==0]

    y_pred, y_true = [], []
    for data in test_loader:
        data = data.to(device)
        x = data.x 
        edge_weight =  None
        node_id = data.node_id if emb else None
        logits = model(data.z, data.edge_index, data.batch, x, edge_weight, node_id)
        y_pred.append(logits.view(-1).cpu())
        y_true.append(data.y.view(-1).cpu().to(torch.float))
    test_pred, test_true = torch.cat(y_pred), torch.cat(y_true)
    pos_test_pred = test_pred[test_true==1]
    neg_test_pred = test_pred[test_true==0]
    
    results = evaluate_auc(val_pred, val_true, test_pred, test_true)

    return results

@torch.no_grad()
def test():
    model.eval()

    x = val_dataset.x 
    edge_weight =  None
    node_id = val_dataset.node_id if emb else None
    logits = model(val_dataset.z, val_dataset.data.edge_label_index, x, edge_weight, node_id)
    val_pred = logits.view(-1).cpu()
    val_true = val_dataset.y.view(-1).cpu().to(torch.float)
    pos_val_pred = val_pred[val_true==1]
    neg_val_pred = val_pred[val_true==0]

    x = test_dataset.x 
    edge_weight = None
    node_id = test_dataset.node_id if emb else None
    logits = model(test_dataset.z, test_dataset.data.edge_label_index, x, edge_weight, node_id)
    test_pred = logits.view(-1).cpu()
    test_true = test_dataset.y.view(-1).cpu().to(torch.float)
    pos_test_pred = test_pred[test_true==1]
    neg_test_pred = test_pred[test_true==0]
    
    results = evaluate_auc(val_pred, val_true, test_pred, test_true)

    return results

# check device
if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

# set seed
seed_everything(60)

# Load dataset
dataset = data_loader(tgm_type="Planetoid", name="cora", transform=None)
split_edge = do_edge_split(dataset, True)
data = dataset[0]
data.edge_index = split_edge['train']['edge'].t()
directed = False

# convert the data in seal_datasets, i.e. with the subgraphs done
path = f"data/Planetoid/cora/SEAL"
dataset_class = 'SEALDataset'
train_dataset = eval(dataset_class)(
    path, 
    data, 
    split_edge, 
    num_hops=1, 
    percent=100, 
    split='train') 
if False:  # visualize some graphs
    import networkx as nx
    from torch_geometric.utils import to_networkx
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
    for g in loader:
        f = plt.figure(figsize=(20, 20))
        limits = plt.axis('off')
        g = g.to(device)
        node_size = 100
        with_labels = True
        G = to_networkx(g, node_attrs=['z'])
        labels = {i: G.nodes[i]['z'] for i in range(len(G))}
        nx.draw(G, node_size=node_size, arrows=True, with_labels=with_labels,
                labels=labels)
        f.savefig('tmp_vis.png')
        pdb.set_trace()

val_dataset = eval(dataset_class)(
    path, 
    data, 
    split_edge, 
    num_hops=1, 
    percent=100, 
    split='valid')

test_dataset = eval(dataset_class)(
    path, 
    data, 
    split_edge, 
    num_hops=1, 
    percent=100, 
    split='test')
    
train_loader = DataLoader(train_dataset, batch_size=32, 
                           shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

# define the training process
emb = None
max_z = 1000  # set a large max_z so that every z has embeddings to look up
model = GCN(32, 2, max_z, train_dataset, 
                True).to(device)
parameters = list(model.parameters())
optimizer = torch.optim.Adam(params=parameters, lr=0.0001)

for epoch in range(1):
    loss = train_wBatch()
    results = test_wBatch()
    for key, result in results.items():
        valid_res, test_res = result
        to_print = (f'Epoch: {epoch:02d}, ' +
                    f'Loss: {loss:.4f}, Valid: {100 * valid_res:.2f}%, ' +
                    f'Test: {100 * test_res:.2f}%')
        print(key)
        print(to_print)

# get the graph results
train_data = next(iter(DataLoader(train_dataset, batch_size=len(train_dataset)))).to(device)
val_data = next(iter(DataLoader(val_dataset, batch_size=len(val_dataset)))).to(device)
test_data = next(iter(DataLoader(test_dataset, batch_size=len(test_dataset)))).to(device)

G = to_networkx(train_data, to_undirected=True)

val_logits = model(val_data.z, val_data.edge_index, val_data.batch, val_data.x, None, val_data.node_id)
val_predictions = val_logits.view(-1).sigmoid().detach().cpu().numpy()
val_label = val_data.y.view(-1).cpu().numpy().astype("float32")
pos_val_edges, neg_val_edges = get_pos_neg_edges('valid', split_edge, val_data.edge_index, val_data.num_nodes, percent=100)
val_edges = torch.cat((pos_val_edges, neg_val_edges), dim=1).numpy()

test_logits = model(test_data.z, test_data.edge_index, test_data.batch, test_data.x, None, test_data.node_id)
test_predictions = test_logits.view(-1).sigmoid().detach().cpu().numpy()
test_label = test_data.y.view(-1).cpu().numpy().astype("float32")
pos_test_edges, neg_test_edges = get_pos_neg_edges('test', split_edge, test_data.edge_index, test_data.num_nodes, percent=100)
test_edges = torch.cat((pos_test_edges, neg_test_edges), dim=1).numpy()


results = {"train_graph": G,
        "test_predictions": test_predictions,
        "test_labels": test_label,
        "val_predictions": val_predictions,
        "val_labels": val_label,
        "val_edges": val_edges,
        "test_edges": test_edges}


  if osp.exists(f) and torch.load(f) != _repr(self.pre_transform):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_filter):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_transform):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_filter):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_transform):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_filter):


AUC
Epoch: 00, Loss: 0.6941, Valid: 58.81%, Test: 54.64%
AUC
Epoch: 01, Loss: 0.6928, Valid: 63.00%, Test: 59.05%
AUC
Epoch: 02, Loss: 0.6904, Valid: 65.45%, Test: 63.14%
AUC
Epoch: 03, Loss: 0.6858, Valid: 66.56%, Test: 65.85%
AUC
Epoch: 04, Loss: 0.6712, Valid: 67.09%, Test: 67.34%


In [8]:
import networkx as nx

train_graph = results["train_graph"]

num_nodes = train_graph.number_of_nodes()
num_edges = train_graph.number_of_edges()
is_directed = nx.is_directed(train_graph)

print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")
print(f"Is the graph directed? {is_directed}")

Number of nodes: 70628
Number of edges: 70707
Is the graph directed? False


In [15]:
train_data

DataBatch(x=[70628, 1433], edge_index=[2, 70707], y=[8976], edge_weight=[70707], z=[70628], node_id=[70628], num_nodes=70628, batch=[70628], ptr=[8977])

In [34]:
len(results["test_labels"])

1054

In [35]:
len(results["val_predictions"])

526

In [5]:
results["val_labels"]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [41]:
np.shape(results["val_edges"])

(2, 526)

In [44]:
np.shape(results["test_edges"])

(2, 1054)

In [16]:
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))

for daat in test_loader:
    print(daat.batch)

tensor([   0,    0,    0,  ..., 1053, 1053, 1053])


In [17]:
print(len(daat.batch))

7939


In [19]:
test_dataset.data

Data(x=[7939, 1433], edge_index=[2, 7786], y=[1054], edge_weight=[7786], z=[7939], node_id=[7939], num_nodes=7939, edge_label_index=[2, 527], edge_label=[527])

In [32]:
# get the graph results
train_data = next(iter(DataLoader(train_dataset, batch_size=len(train_dataset))))
val_data = next(iter(DataLoader(val_dataset, batch_size=len(val_dataset))))
test_data = next(iter(DataLoader(test_dataset, batch_size=len(test_dataset))))

test_logits = model(test_data.z, test_data.edge_index, test_data.batch, test_data.x, None, test_data.node_id)
test_predictions = test_logits.view(-1).sigmoid().detach().cpu().numpy()
test_label = test_data.y.view(-1).cpu().to(torch.float)

val_logits = model(val_data.z, val_data.edge_index, val_data.batch, val_data.x, None, val_data.node_id)
val_predictions = val_logits.view(-1).sigmoid().detach().cpu().numpy()
val_label = val_data.y.view(-1).cpu().to(torch.float)

G = to_networkx(train_data, to_undirected=True)
val_edges = val_data.edge_index.cpu().numpy()

all_test_edges = test_data.edge_index.cpu().numpy()

In [34]:
test_label

tensor([1., 1., 1.,  ..., 0., 0., 0.])