In [1]:
import torch
from nets import CTNet, GAPNet

import torch.nn.functional as F
from torch_geometric.loader import DataLoader
from transform_features import FeatureDegree
from torch_geometric.datasets import TUDataset

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [2]:
@torch.no_grad()
def test(modelo, loader, device):
    modelo.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        pred, mc_loss, o_loss = modelo(data.x, data.edge_index, data.batch)
        loss = F.nll_loss(pred, data.y.view(-1)) + mc_loss + o_loss
        correct += pred.max(dim=1)[1].eq(data.y.view(-1)).sum().item()

    return loss, correct / len(loader.dataset)


## Import dataset

In [209]:
dataset = TUDataset(root='data_colab/TUDataset',name="REDDIT-BINARY", pre_transform=FeatureDegree(), use_node_attr=True)
BATCH_SIZE = 64
num_of_centers = 420

dataset = TUDataset(root='data_colab/TUDataset',name="MUTAG")
BATCH_SIZE = 32
num_of_centers = 17

In [188]:
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}, {dataset.num_node_features}, {dataset.num_node_attributes}')
print(f'Number of classes: {dataset.num_classes}')

print()
datum = dataset[0]  # Get the first graph object.
print(datum)
print('=============================================================')
# Gather some statistics about the first graph.
print(f'Number of nodes: {datum.num_nodes}')
print(f'Number of edges: {datum.num_edges}')
print(f'Average node degree: {datum.num_edges / datum.num_nodes:.2f}')
print(f'Has isolated nodes: {datum.has_isolated_nodes()}')
print(f'Has self-loops: {datum.has_self_loops()}')
print(f'Is undirected: {datum.is_undirected()}')


print(dataset)

Dataset: MUTAG(188):
Number of graphs: 188
Number of features: 7, 7, 0
Number of classes: 2

Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])
Number of nodes: 17
Number of edges: 38
Average node degree: 2.24
Has isolated nodes: False
Has self-loops: False
Is undirected: True
MUTAG(188)


## Stratify

600

(-0.36352052393546647, 0.1747312785712479)

## Embeddings CTNet

In [None]:
test_loader =  DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [17]:
#Epoch: 059, Train Loss: 165.455, Train Acc: 0.711, Test Loss: 150.841, Test Acc: 0.750
model =  CTNet(dataset.num_features, dataset.num_classes, k_centers=num_of_centers).to(device)
model.load_state_dict(torch.load("models/REDDIT-BINARY_CTNet_iter0.pth", map_location=torch.device(device)))
model.eval()

CTNet(
  (conv1): DenseGraphConv(32, 32)
  (conv2): DenseGraphConv(32, 32)
  (pool1): Linear(in_features=32, out_features=420, bias=True)
  (pool2): Linear(in_features=32, out_features=16, bias=True)
  (lin1): Linear(in_features=1, out_features=32, bias=True)
  (lin2): Linear(in_features=32, out_features=32, bias=True)
  (lin3): Linear(in_features=32, out_features=2, bias=True)
)

In [15]:
loss, acc = test(model, test_loader)

In [16]:
loss, acc

(tensor(1.0622), 0.794)

# Embeddings GapNet Laplacian

In [None]:
test_loader =  DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
test_loader =  DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
model =  GAPNet(dataset.num_features, dataset.num_classes, derivative="laplacian",device=device)
model.load_state_dict(torch.load("models/REDDIT-BINARY_GAPNet_laplacian_iter0.pth", map_location=torch.device('cpu')))
model.eval()

# Degree assortativity

In [None]:
import networkx as nx
from torch_geometric import utils
import numpy as np


d_assortativities = np.empty(len(dataset))
for i, graph in enumerate(dataset):
    G = utils.to_networkx(graph, to_undirected=True)
    d_assortativities[i] = nx.degree_assortativity_coefficient(G)
d_assortativities.mean(), d_assortativities.std()

# Stratify

In [210]:
from sklearn.model_selection import train_test_split

def class_distrib(dataset):
    d = dict()
    for data in dataset:
        d[int(data.y.numpy())] = d.get(int(data.y.numpy()),0) + 1
    return d

def batch_class_distrib(batch):
    d = dict()
    for label in batch.y:
        d[int(label.numpy())] = d.get(int(label.numpy()),0) + 1
    return d

len(dataset.data.y)

600

In [211]:
train_indices, val_indices = train_test_split(list(range(len(dataset.data.y))), test_size=0.2, stratify=dataset.data.y)
train_dataset = torch.utils.data.Subset(dataset, train_indices)
val_dataset = torch.utils.data.Subset(dataset, val_indices)

In [212]:
class_distrib(dataset), class_distrib(train_dataset), class_distrib(val_dataset)

({5: 100, 4: 100, 0: 100, 1: 100, 2: 100, 3: 100},
 {0: 80, 5: 80, 4: 80, 1: 80, 2: 80, 3: 80},
 {0: 20, 1: 20, 3: 20, 5: 20, 4: 20, 2: 20})

**Optional**

In [213]:
from torch.utils.data import SequentialSampler

In [214]:
class StratifiedSampler():
    """Stratified Sampling
    Provides equal representation of target classes in each batch
    """
    def __init__(self, class_vector, batch_size):
        """
        Arguments
        ---------
        class_vector : torch tensor
            a vector of class labels
        batch_size : integer
            batch_size
        """
        self.n_splits = int(class_vector.size(0) / batch_size)
        self.class_vector = class_vector

    def gen_sample_array(self):
        try:
            from sklearn.model_selection import StratifiedShuffleSplit
        except:
            print('Need scikit-learn for this functionality')
        import numpy as np
        
        s = StratifiedShuffleSplit(n_splits=self.n_splits, test_size=0.5)
        X = torch.randn(self.class_vector.size(0),2).numpy()
        y = self.class_vector.numpy()
        s.get_n_splits(X, y)

        train_index, test_index = next(s.split(X, y))
        return np.hstack([train_index, test_index])

    def __iter__(self):
        return iter(self.gen_sample_array())

    def __len__(self):
        return len(self.class_vector)

In [215]:
sampler = StratifiedSampler(class_vector=dataset.data.y, batch_size=BATCH_SIZE)


In [227]:
loader = DataLoader(dataset, batch_size=BATCH_SIZE, #train_dataset
                        shuffle=False, sampler=sampler)

{0: 9, 1: 19}

In [230]:
npocha = [0.638, 0.788, 0.65, 0.82, 0.816, 0.83, 0.772, 0.704, 0.8, 0.696]
n = [0.76, 0.84, 0.624, 0.614, 0.768, 0.776, 0.836, 0.776, 0.706, 0.71]

In [231]:
np.mean(npocha), np.std(npocha)

(0.7514, 0.06902202547013525)

In [232]:
np.mean(n), np.std(n)

(0.7409999999999999, 0.0737360156233031)