In [1]:
import os
import json
import pickle
import torch
import glob
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split
import time

import torchvision
import torchvision.transforms as transforms
import scipy.io as sio
import argparse
from modules.models import Model

ModuleNotFoundError: No module named 'scipy'

In [2]:
# ! conda install -y pytorch==1.5.0 torchvision==0.6.0 cudatoolkit=10.1 -c pytorch

In [3]:
# ! pip3 install torch-scatter==2.0.4 -f https://pytorch-geometric.com/whl/torch-1.5.0+cu101.html
# ! pip3 install torch-sparse==0.6.4 -f https://pytorch-geometric.com/whl/torch-1.5.0+cu101.html
# ! pip3 install torch-cluster==1.5.4 -f https://pytorch-geometric.com/whl/torch-1.5.0+cu101.html
# ! pip3 install torch-spline-conv==1.2.0 -f https://pytorch-geometric.com/whl/torch-1.5.0+cu101.html
# ! pip3 install torch-geometric==1.5.0

In [4]:
# ! python -V

In [1]:
import torch_geometric as geo

import random
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from matplotlib import pyplot as plt
%matplotlib inline
os.environ["CUDA_VISIBLE_DEVICES"] = "0"#, 1, 2"

%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(out, color):
    z = TSNE(n_components=2).fit_transform(out.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

OSError: /home/twp/anaconda3/envs/guaa/lib/python3.6/site-packages/torch_sparse/_version.so: undefined symbol: _ZN3c105ErrorC1ENS_14SourceLocationERKSs

In [6]:
# torch.distributed.init_process_group(backend='nccl', init_method='tcp://localhost:23456', rank=0, world_size=1)

In [7]:
dataset_name = 'PROTEINS'

In [8]:
def getDataset(root, name, transform):
    if name.lower() in ['cora', 'pubmed', 'citeseer']:
        dataset = geo.datasets.Planetoid(root=root, name=name, transform=transform)
    elif name.lower() in ['mutag', 'imdb-binary', 'ethanol', 'proteins']:
        dataset =geo.datasets.TUDataset(root=root, name=name, transform=transform,use_node_attr=True)
    else:
        raise NotImplementedError("{} not supported!".format(name))
    return dataset

In [9]:
dataset = getDataset('data', dataset_name, None)

In [10]:
print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: PROTEINS(1113):
Number of graphs: 1113
Number of features: 4
Number of classes: 2

Data(edge_index=[2, 162], x=[42, 4], y=[1])
Number of nodes: 42
Number of edges: 162
Average node degree: 3.86
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


In [11]:
if dataset_name == 'PROTEINS':
    print("USING Hierarchical Graph Pooling with Structure Learning")
    with open('config-{}.pickle'.format(dataset_name), 'rb') as handle:
        args = pickle.load(handle)
    args.device='cpu'
    print(args)
    num_training = int(len(dataset) * 0.8)
    num_val = int(len(dataset) * 0.1)
    num_test = len(dataset) - (num_training + num_val)
    training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test])
    
    train_loader = geo.data.DataLoader(training_set, batch_size=args.batch_size, shuffle=True)
    val_loader = geo.data.DataLoader(validation_set, batch_size=args.batch_size, shuffle=False)
    test_loader = geo.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False)
    try:
        model = Model(args).to(args.device)
    except RuntimeError:
        args.device = 'cpu'
        model = Model(args).to(args.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    
    def train():
        min_loss = 1e10
        patience_cnt = 0
        val_loss_values = []
        best_epoch = 0

        t = time.time()
        model.train()
        for epoch in range(args.epochs):
            loss_train = 0.0
            correct = 0
            for i, data in enumerate(train_loader):
                optimizer.zero_grad()
                data = data.to(args.device)
                out = model(data)
                loss = F.nll_loss(out, data.y)
                loss.backward()
                optimizer.step()
                loss_train += loss.item()
                pred = out.max(dim=1)[1]
                correct += pred.eq(data.y).sum().item()
            acc_train = correct / len(train_loader.dataset)
            acc_val, loss_val = compute_test(val_loader)
            print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.6f}'.format(loss_train),
                  'acc_train: {:.6f}'.format(acc_train), 'loss_val: {:.6f}'.format(loss_val),
                  'acc_val: {:.6f}'.format(acc_val), 'time: {:.6f}s'.format(time.time() - t))

            val_loss_values.append(loss_val)
            torch.save(model.state_dict(), 'models/PROTEINS/{}.pth'.format(epoch))
            if val_loss_values[-1] < min_loss:
                min_loss = val_loss_values[-1]
                best_epoch = epoch
                patience_cnt = 0
            else:
                patience_cnt += 1

            if patience_cnt == args.patience:
                break

            files = glob.glob('models/PROTEINS/*.pth')
            for f in files:
                epoch_nb = int(f.split('/')[-1].split('.')[0])
                if epoch_nb < best_epoch:
                    os.remove(f)

        files = glob.glob('models/PROTEINS/*.pth')
        for f in files:
            epoch_nb = int(f.split('/')[-1].split('.')[0])
            if epoch_nb > best_epoch:
                os.remove(f)
        print('Optimization Finished! Total time elapsed: {:.6f}'.format(time.time() - t))

        return best_epoch


    def compute_test(loader):
        model.eval()
        correct = 0.0
        loss_test = 0.0
        for data in loader:
            data = data.to(args.device)
            out = model(data)
            pred = out.max(dim=1)[1]
            correct += pred.eq(data.y).sum().item()
            loss_test += F.nll_loss(out, data.y).item()
        return correct / len(loader.dataset), loss_test
    
else:
    class GCN_node(torch.nn.Module):
        def __init__(self, hidden_channels):
            super(GCN_node, self).__init__()
            torch.manual_seed(0)
            self.conv1 = geo.nn.GCNConv(dataset.num_features, hidden_channels)
            self.conv2 = geo.nn.GCNConv(hidden_channels, dataset.num_classes)

        def forward(self, x, edge_index):
            x = self.conv1(x, edge_index)
            x = x.relu()
            x = nn.functional.dropout(x, p=0.5, training=self.training)
            x = self.conv2(x, edge_index)
            return x

    class GCN_graph(torch.nn.Module):
        def __init__(self, hidden_channels):
            super(GCN_graph, self).__init__()
            torch.manual_seed(0)
            self.conv1 = geo.nn.GCNConv(dataset.num_node_features, hidden_channels)
            self.conv2 = geo.nn.GCNConv(hidden_channels, hidden_channels)
            self.conv3 = geo.nn.GCNConv(hidden_channels, hidden_channels)
            self.lin = nn.Linear(hidden_channels, dataset.num_classes)

        def forward(self, x, edge_index, batch):
            # not using edge attributes as it's hard to fake
            # 1. Obtain node embeddings 
            x = self.conv1(x, edge_index)
            x = x.relu()
            x = self.conv2(x, edge_index)
            x = x.relu()
            x = self.conv3(x, edge_index)

            # 2. Readout layer
            x = geo.nn.global_mean_pool(x, batch)  # [batch_size, hidden_channels]

            # 3. Apply a final classifier
            x = nn.functional.dropout(x, p=0.5, training=self.training)
            x = self.lin(x)

            return x
    device = torch.device('cuda')
    victim_model = GCN_graph(hidden_channels=512).to(device)
    optimizer = torch.optim.Adam(victim_model.parameters(), lr=0.01)
    criterion = torch.nn.CrossEntropyLoss()

    def train():
        victim_model.train()

        for data in train_loader:  # Iterate in batches over the training dataset.
            data = data.to(device)
            out = victim_model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
            loss = criterion(out, data.y)  # Compute the loss.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            optimizer.zero_grad()  # Clear gradients.

    def test(loader):
        victim_model.eval()
        correct = 0
        for data in loader:  # Iterate in batches over the training/test dataset.
            data = data.to(device)
            out = victim_model(data.x, data.edge_index, data.batch)  
            pred = out.argmax(dim=1)  # Use the class with highest probability.
            correct += int((pred == data.y).sum())  # Check against ground-truth labels.
        return correct / len(loader.dataset)  # Derive ratio of correct predictions.

USING Hierarchical Graph Pooling with Structure Learning
Namespace(batch_size=512, dataset='PROTEINS', device='cpu', dropout_ratio=0.0, epochs=1000, lamb=1.0, lr=0.001, nhid=128, num_classes=2, num_features=4, patience=100, pooling_ratio=0.5, sample_neighbor=True, seed=777, sparse_attention=True, structure_learning=True, weight_decay=0.001)


In [12]:
trained = True

In [13]:
args#.lr = 5e-5

Namespace(batch_size=512, dataset='PROTEINS', device='cpu', dropout_ratio=0.0, epochs=1000, lamb=1.0, lr=0.001, nhid=128, num_classes=2, num_features=4, patience=100, pooling_ratio=0.5, sample_neighbor=True, seed=777, sparse_attention=True, structure_learning=True, weight_decay=0.001)

In [14]:
if not trained:
    if dataset_name != "PROTEINS":
        for epoch in range(1, 501):
            train()
            if epoch % 20 == 0:
                train_acc = test(train_loader)
                test_acc = test(test_loader)
                print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
    else:
        # Model training
        best_model = train()
        # Restore best model for test set
        model.load_state_dict(torch.load('models/PROTEINS/{}.pth'.format(best_model)))
        test_acc, test_loss = compute_test(test_loader)
        print('Test set results, loss = {:.6f}, accuracy = {:.6f}'.format(test_loss, test_acc))
else:
    model.load_state_dict(torch.load('models/PROTEINS/220.pth', map_location=args.device))
    model.eval()
    test_acc, test_loss = compute_test(test_loader)
    print('Test set results, loss = {:.6f}, accuracy = {:.6f}'.format(test_loss, test_acc))

Test set results, loss = 0.392510, accuracy = 0.830357


In [15]:
test_acc, _ = compute_test(test_loader)
print(f'Test Accuracy: {test_acc:.4f}')

Test Accuracy: 0.8304


In [16]:
def idx2adj(data):
    device = data.edge_index.device
    edge_index = torch.zeros(size=(data.num_nodes, data.num_nodes), device=device)
    for i in range(data.edge_index.shape[1]):
        edge_index[data.edge_index[0][i]][data.edge_index[1][i]] = 1
    return edge_index

In [17]:
def adj2idx(edge_index):
    assert edge_index.shape[0] == edge_index.shape[1]
    tmp = []
    for i in range(edge_index.shape[0]):
        for j in range(edge_index.shape[0]):
            if edge_index[i][j] == 1:
                tmp.append([i, j])
    return torch.Tensor(tmp).permute(1,0).to(edge_index.device)

## random initialize an edge index, zero-initialize node features

In [18]:
def getNodes(n):    
    # node features are 4D vectors, the first dimension means the van de wall force and the next 3 are one-hot-encoded category
    tmp = torch.cat((torch.randint(-500, 800, (n,1), device=args.device),
                     torch.nn.functional.one_hot(torch.randint(0, 3, (n,), device=args.device), num_classes=3)), dim=1)
    return tmp.float().clone()

## explore the data

In [19]:
cnt = {
    '0': {},
    '1': {},
    '2': {},
    '3': {}
}
for i in range(len(dataset)):
    for j in range(dataset[i].x.shape[0]):
        for k in range(4):
            try:
                cnt[str(k)][str(dataset[i].x[j, k].item())] += 1
            except KeyError:
                cnt[str(k)][str(dataset[i].x[j, k].item())] = 1
#             print(sum([dataset[i].x[j, idx].item() for idx in range(1,4)]))

In [20]:
dataset[i].x[j, k].item()

1.0

In [21]:
max([float(i) for i in list(cnt['0'].keys())])

798.0

In [22]:
min([float(i) for i in list(cnt['0'].keys())])

-538.0

## Load generated surrogate data to torch geometric dataset (not used)

In [23]:
class surrogateData(geo.data.Dataset):
    def __init__(self, dataList):
        super().__init__()
        self.data = dataList
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        self.data[index].x.long
        return self.data[index]

## freeze weights in victim model, the model will return node embedding after freezing

In [24]:
model.eval()
for param in model.parameters():
    param.requires_grad = False
    print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [25]:
def getOutput(embed):
    outp = F.relu(model.lin1(embed))
    outp = F.dropout(outp, p=model.dropout_ratio, training=model.training)
    outp = F.relu(model.lin2(outp))
    outp = F.dropout(outp, p=model.dropout_ratio, training=model.training)
    outp = F.log_softmax(model.lin3(outp), dim=-1)
    return outp

## Aggregate previous algos together

In [None]:
# for every class, generate certain amount of class imporessions
for cur_class in range(dataset.num_classes):
    # update class impressions individually
    for idx in range(500):
        
        # create an random adjacency matrix for given nodes & labels
        num_nodes = random.randint(1, 600)
        sample = geo.data.Batch()
        adv_adj = torch.zeros(size=(num_nodes, num_nodes), device=args.device).bool()
        for i in range(num_nodes):
            adv_adj[i, i:].random_(0, 2)
        adv_adj = adv_adj.int()
        if adv_adj.sum().item() == 0: 
            idx -= 1
            continue
        for i in range(num_nodes):
            for j in range(i, num_nodes):
                adv_adj[j, i] = adv_adj[i, j]
        sample.edge_index = adj2idx(adv_adj).long()
        sample.x = getNodes(num_nodes)
        sample.x.requires_grad_()
#         sample.to(args.device)
        
        cl_optim = torch.optim.Adam([sample.x], lr=0.1)
        cur_pred = model(sample)
        cur_pred = getOutput(cur_pred)
        cur_tar = random.uniform(0.55, 0.99)
        cnt = 0
        while F.softmax(cur_pred)[:, cur_class].item() < cur_tar and cnt < 5000: # cur_tar:
            cl_optim.zero_grad()
            cur_pred = getOutput(model(sample))
            # loss = cl_lossfunc(cur_pred, cur_class)
            # loss = dif_trans_lossfunc(loaded_model, cls_impr, cur_class)
            loss = F.nll_loss(cur_pred, torch.Tensor([cur_class]).long().to(args.device))
            loss.backward()
            cl_optim.step()
            if cnt % 500 == 0:
                print(sample.x.grad.sum())
                print('{} |　Epoch {} | Target Class {} | Current Logits for target class {}'.format(
                    idx, cnt, cur_class, F.softmax(cur_pred)[0,cur_class].item()))
            cnt += 1
            with torch.no_grad():
                sample.x.clamp_(-500, 600)
#             print(F.softmax(cur_pred), F.softmax(cur_pred)[:, cur_class], cur_tar)
        torch.save(sample, os.path.join('data', dataset_name, 'classImpression', str(cur_class), '{}.pt'.format(idx)))
        print('Epoch {} | Target Class {} | Current Logits for target class {}'.format(
                    cnt, cur_class, F.softmax(cur_pred)[0,cur_class].item()))


Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
tensor(-7.0770)
3 |　Epoch 0 | Target Class 0 | Current Logits for target class 0.00018038782582152635
Epoch 14 | Target Class 0 | Current Logits for target class 0.8888623118400574
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
Epoch 0 | Target Class 0 | Current Logits for target class 1.0
tensor(-2.9774)
13 |　Epoch 0 | Target Class 0 | Current Logits 

In [43]:
print("Class impression generation finished!")

Class impression generation finished!


## New idea: complete graph with edge feature as probability of edge

In [None]:
# for every class, generate certain amount of class imporessions
for cur_class in range(dataset.num_classes):
    # update class impressions individually
    for idx in range(500):
        
        # create an random adjacency matrix for given nodes & labels
        num_nodes = random.randint(1, 600)
        sample = geo.data.Batch()
        adv_adj = torch.ones(size=(num_nodes, num_nodes), device=args.device).bool()
        sample.edge_index = geo.utils.remove_self_loops(adj2idx(adv_adj).long())[0]
        sample.x = getNodes(num_nodes)
        sample.edge_attr = torch.rand(sample.edge_index.shape[1], ).to(args.device)
        sample.edge_attr.requires_grad_()
#         sample.to(args.device)
        
        cl_optim = torch.optim.Adam([sample.edge_attr], lr=0.1)
        cur_pred = model(sample)
        cur_pred = getOutput(cur_pred)
        cur_tar = random.uniform(0.55, 0.99)
        cnt = 0
        while F.softmax(cur_pred)[:, cur_class].item() < cur_tar and cnt < 5000: # cur_tar:
            cl_optim.zero_grad()
            cur_pred = getOutput(model(sample))
            # loss = cl_lossfunc(cur_pred, cur_class)
            # loss = dif_trans_lossfunc(loaded_model, cls_impr, cur_class)
            loss = F.nll_loss(cur_pred, torch.Tensor([cur_class]).long().to(args.device))
            loss.backward()
            cl_optim.step()
#             if cnt % 500 == 0:
#                 print(sample.edge_attr.grad.sum())
#                 print('{} |　Epoch {} | Target Class {} | Current Logits for target class {}'.format(
#                     idx, cnt, cur_class, F.softmax(cur_pred)[0,cur_class].item()))
            cnt += 1
            with torch.no_grad():
                sample.edge_attr.clamp_(0, 1)
#             print(F.softmax(cur_pred), F.softmax(cur_pred)[:, cur_class], cur_tar)
        sample = geo.data.Data(x=sample.x, edge_attr=sample.edge_attr, edge_index=sample.edge_index, y=torch.Tensor([cur_class]).long())
        torch.save(sample, os.path.join('data', dataset_name, 'classImpression', str(cur_class)+'_tropology', '{}.pt'.format(idx)))
#         print('Epoch {} | Target Class {} | Current Logits for target class {}'.format(
#                     cnt, cur_class, F.softmax(cur_pred)[0,cur_class].item()))


In [None]:
sample