In [3]:
# Install required packages.
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric


In [5]:
import pandas as pd
import numpy as np
import torch
from torch.nn import functional as F
from sklearn.metrics import roc_auc_score
from torch_geometric.data import Data, DataLoader
import torch.nn as nn
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

In [None]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch
from torch.nn import Linear
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj, dense_to_sparse, add_self_loops, degree
from torch_geometric.nn import MessagePassing

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
print(torch.cuda.get_device_name(0))

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(out.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()

from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')



class FGCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(FGCNConv, self).__init__(aggr='add')  # "Add" aggregation (Step 5).
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index, edge_attr):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        # Step 1: Add self-loops to the adjacency matrix.
        
        # Step 2: Linearly transform node feature matrix.
        x = self.lin(x)

        # Step 3: Compute normalization.
        row, col = edge_index
        deg_row = degree(row, x.size(0), dtype=x.dtype)
        deg_col = degree(col, x.size(0), dtype=x.dtype)
        deg_row_inv_sqrt = deg_row.pow(-0.5)
        deg_col_inv_sqrt = deg_col.pow(-0.5)
        norm = deg_row_inv_sqrt[row] * deg_col_inv_sqrt[col]
        norm = norm * edge_attr

        # Step 4-5: Start propagating messages.
        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        # x_j has shape [E, out_channels]

        # Step 4: Normalize node features.
        return norm.view(-1, 1) * x_j

beta = 1
def PowerInd(edge_index,beta):
    # returns the edge list in COO format with shape [2,num_edges,num_2_hop neighbors]
    # the edge weighted is stored in the edge_attr, 
    row, col = edge_index
    edge_attr = torch.ones(edge_index.size(1))
    edge_attr = torch.mul(edge_attr,-2*beta)
    adj = to_dense_adj(edge_index)
    adj = torch.matmul(adj,adj)
    adj = torch.reshape(adj,(adj.size(1),adj.size(2)))
    adj = adj+torch.mul(torch.eye(adj.size(0),dtype=torch.float),beta*beta)
    edge_index_f, edge_attr_f = dense_to_sparse(adj)
    print(edge_attr_f)
    edge_index = torch.cat((edge_index,edge_index_f),dim=1)
    edge_attr = torch.cat((edge_attr,edge_attr_f),dim=0)
    return edge_index, edge_attr


edge_index_f, edge_attr_f = PowerInd(data.edge_index,beta)

data_f = Data(edge_index=edge_index_f,x= data.x,edge_attr=edge_attr_f, test_mask=data.test_mask,train_mask=data.train_mask,y=data.y)



class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)

        self.fconv1 = FGCNConv(dataset.num_features, hidden_channels)       
        self.fconv2 = FGCNConv(hidden_channels, dataset.num_classes)
        self.w = nn.Parameter(torch.rand(1,1,requires_grad=True))
        #self.lin1 = Linear(dataset.num_classes,dataset.num_classes)

    def forward(self, x, edge_index, edge_index_f, edge_attr_f):
        uf = self.conv1(x, edge_index)
        uf = uf.relu()
        uf = F.dropout(uf, p=0.5, training=self.training)
        uf = self.conv2(uf, edge_index)
        

        f = self.fconv1(x, edge_index_f, edge_attr_f)
        f = f.relu()
        f = F.dropout(f, p=0.5, training=self.training)
        f = self.fconv2(f, edge_index_f, edge_attr_f)
        
        uf = f +torch.mul(uf,self.w)
        print(self.w)
        #uf = torch.cat((uf,f),dim=1)
        #uf = uf.relu()
        #uf = self.lin1(uf)

        return uf

#out = model(data.x, data.edge_index)
#visualize(out, color=data.y)

#from IPython.display import Javascript  # Restrict height of output cell.
#display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GCN(hidden_channels=16).to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index, data_f.edge_index, data_f.edge_attr)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x, data.edge_index, data_f.edge_index, data_f.edge_attr)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc

data = data.to(device)
data_f = data_f.to(device)
for epoch in range(1, 800):
    loss = train()
    test_acc = test()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Test Acc:{test_acc:.4f}')


test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

In [None]:
fname = '/content/drive/My Drive/pan/USAir'

In [None]:

GCN_hidden1 = 400;
GCN_hidden2 = 200;
GCN_hidden3 = 100;
GCN_hidden4 = 50;

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
print(torch.cuda.get_device_name(0))

def non_ptb_data(index, n, x, y):
    # prepare the non-perturbed data
    edge_index = np.concatenate((index,index[:,[1,0]]),axis=0)
    edge_index = torch.tensor(edge_index,dtype=torch.long)
    x = torch.as_tensor(x,dtype=torch.float)
    y = torch.as_tensor(y,dtype=torch.float)
    data = Data(x=x,edge_index=edge_index.t().contiguous(),num_nodes=n, y=y)
    return data

def ptb_data_train(index, n, x, label):
    # prepare the perturbed data with the edge (s,t) added or removed
    # if label == 1 then remove the edge, and add the edge otherwise
    labels = np.zeros((1,2))
    labels[0,0] = label[2]
    labels[0,1] = label[2]
    if label[2] == 0:
        edge = np.zeros((1,2))
        edge[0,0] = label[0]
        edge[0,1] = label[1]
        edge_index = np.concatenate((index,edge),axis=0)
        data = non_ptb_data(edge_index,n,x,labels)
    else:
        ind = np.where((index == (label[0], label[1])).all(axis=1))
        edge_index = np.delete(index,ind[0],0)
        data = non_ptb_data(edge_index,n,x,labels)
    return data

def ptb_data_test(index, n, x, label):
    # prepare the perturbed data with the edge (s,t) added or removed
    # if label == 1 then remove the edge, and add the edge otherwise
    labels = np.zeros((1,2))
    labels[0,0] = 0
    labels[0,1] = label[2]

    edge = np.zeros((1,2))
    edge[0,0] = label[0]
    edge[0,1] = label[1]
    edge_index = np.concatenate((index,edge),axis=0)
    data = non_ptb_data(edge_index,n,x,labels)
    return data





class MLP(torch.nn.Module):
    def __init__(self,input_size):
        super(MLP, self).__init__()
        self.linear1 = torch.nn.Linear(input_size,200)
        self.linear2 = torch.nn.Linear(200,100)
        self.linear3 = torch.nn.Linear(100,80)
        self.linear4 = torch.nn.Linear(80,20)
        self.linear5 = torch.nn.Linear(20,1)
        self.act1= nn.ReLU()
        #self.act2= nn.ReLU()
        #self.act3= nn.ReLU()
        self.act2= torch.sin
        self.act3= torch.sin
        self.act4= nn.ReLU()

    def forward(self, x):
        out= self.linear1(x)
        out = self.act1(out)
        out= self.linear2(out)
        out = self.act2(out)
        out = self.linear3(out)
        out = self.act3(out)
        out = self.linear4(out)
        out = self.act4(out)
        out = self.linear5(out)
        out = torch.sigmoid(out)
        return out

class Net(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(Net, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(non_perturb_train.num_node_features, GCN_hidden1)
        self.conv2 = GCNConv(GCN_hidden1, GCN_hidden2)
        self.conv3 = GCNConv(GCN_hidden2, GCN_hidden3)
        self.conv4 = GCNConv(GCN_hidden3, GCN_hidden4)
        self.mlp = MLP((GCN_hidden1+GCN_hidden2+GCN_hidden3+GCN_hidden4)*n)

    def forward(self, x_p, x_np, y, edge_index_p,edge_index_np):
        o_p_1 = self.conv1(x_p, edge_index_p)
        o_p_1 = o_p_1.relu()
        #o_p_1 = torch.sin(o_p_1)
        #o_p_1 = F.dropout(o_p_1, p=0.1, training=self.training)
    
        o_p_2 = self.conv2(o_p_1, edge_index_p)
        o_p_2 = o_p_2.relu()
        #o_p_2 = torch.sin(o_p_2)
        #o_p_2 = F.dropout(o_p_2, p=0.5, training=self.training)

        o_p_3 = self.conv3(o_p_2, edge_index_p)
        o_p_3 = o_p_3.relu()

        o_p_4 = self.conv4(o_p_3, edge_index_p)
       
        o_np_1 = self.conv1(x_np, edge_index_np)
        #o_np_1 = torch.sin(o_np_1)
        o_np_1 = o_np_1.relu()
        #o_np_1 = F.dropout(o_np_1, p=0.5, training=self.training)
    
        o_np_2 = self.conv2(o_np_1, edge_index_np)
        o_np_2 = o_np_2.relu()
        #o_np_2 = torch.sin(o_np_2)
        #o_np_2 = F.dropout(o_np_2, p=0.5, training=self.training)

        o_np_3 = self.conv3(o_np_2, edge_index_np)
        o_np_3 = o_np_3.relu()

        o_np_4 = self.conv4(o_np_3, edge_index_np)


        o_p = torch.cat((o_p_1,o_p_2,o_p_3,o_p_4),dim= 1)
        o_np = torch.cat((o_np_1,o_np_2,o_np_3,o_np_4),dim= 1)
        
        factor = y.size(0)
        z = o_np.repeat(factor,1)
        z = z- o_p
        out = torch.reshape(torch.mul(z[0:n,:],(y[0,0]-0.5)*2),(1,n*z.size(1)))
        for i in range(1,factor):
            tmp = torch.reshape(torch.mul(z[i*n:i*n+n,:],(y[i,0]-0.5)*2),(1,n*z.size(1)))
            out = torch.cat((out,tmp),dim = 0)
        z = self.mlp(out)

        return z

cuda
Tesla P100-PCIE-16GB


In [None]:
for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()

NameError: ignored

In [None]:
n = 332
batch_size = 64
x = torch.eye(n,dtype=torch.float)
#x = torch.ones((n,1))
train_data = pd.read_csv(fname+'_train_el.csv',header=None)
train_edge_index = np.array(train_data.iloc[:,0:])
non_perturb_train = non_ptb_data(train_edge_index,n,x,np.zeros((1,2))).to(device)

train_labels = pd.read_csv(fname+'_train_labels.csv',header=None)
train_labels = np.array(train_labels.iloc[:,0:])
test_labels = pd.read_csv(fname+'_test_labels.csv',header=None)
test_labels = np.array(test_labels.iloc[:,0:])

train_data_list = [ptb_data_train(train_edge_index,n,x,train_labels[i,0:3]) for i in range(0,len(train_labels))]
train_loader = DataLoader(train_data_list,batch_size=batch_size,shuffle=True)
test_data_list = [ptb_data_test(train_edge_index,n,x,test_labels[i,0:3]) for i in range(0,len(test_labels))]
test_loader = DataLoader(test_data_list,batch_size=int(len(test_data_list)),shuffle=True)

In [None]:
# def labelx(x,non_adj,edge_index):
#     with torch.no_grad():
#         x_new=x.clone().detach()
        
#         per_adj=torch.zeros_like(non_adj)
#         per_small_adj=to_dense_adj(edge_index)
#         a,b=per_small_adj[0].shape
#         per_adj[0][:a,:b]=per_small_adj[0]
#         diffadj=non_adj-per_adj
#         diffnode=torch.where(diffadj[0]!=0)
#         for nodes in diffnode:
#             x_new[nodes[0]][nodes[1]]=-diffadj[0][nodes[0]][nodes[1]]
#             #x_new[nodes[0]][nodes[0]]=1
#             #x_new[nodes[1]][nodes[1]]=1
#     return x_new

        


# n = 332
# batch_size = 32
# x = torch.eye(n,dtype=torch.float)
# #x = torch.ones((n,1))
# train_data = pd.read_csv(fname+'_train_el.csv',header=None)
# train_edge_index = np.array(train_data.iloc[:,0:])
# non_perturb_train = non_ptb_data(train_edge_index,n,x,np.zeros((1,2))).to(device)

# train_labels = pd.read_csv(fname+'_train_labels.csv',header=None)
# train_labels = np.array(train_labels.iloc[:,0:])
# test_labels = pd.read_csv(fname+'_test_labels.csv',header=None)
# test_labels = np.array(test_labels.iloc[:,0:])


# non_adj=to_dense_adj(non_perturb_train.edge_index).cpu()
# train_data_list = [ptb_data_train(train_edge_index,n,x,train_labels[i,0:3]) for i in range(0,len(train_labels))]
# train_data_list_label=[]
# x = torch.eye(n,dtype=torch.float)
# for data in train_data_list:
#     data.x=labelx(x,non_adj,data.edge_index)
#     train_data_list_label.append(data)
# train_loader = DataLoader(train_data_list_label,batch_size=batch_size,shuffle=True)



# test_data_list = [ptb_data_test(train_edge_index,n,x,test_labels[i,0:3]) for i in range(0,len(test_labels))]
# test_data_list_label=[]
# x = torch.eye(n,dtype=torch.float)
# for data in test_data_list:
#     data.x=labelx(x,non_adj,data.edge_index)
#     test_data_list_label.append(data)
# test_loader = DataLoader(test_data_list_label,batch_size=len(test_data_list),shuffle=False)

In [None]:
data.x

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])

In [None]:
model = Net(hidden_channels=12).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#criterion = torch.nn.BCEWithLogitsLoss()
criterion = torch.nn.BCELoss()
def train(train_loader):
    model.train()
    loss_eposch=0
    for step, data in enumerate(train_loader):# Iterate in batches over the training dataset.
        data = data.to(device)
        out = model(data.x, non_perturb_train.x, data.y, data.edge_index, non_perturb_train.edge_index) 
        out = out.reshape(-1).to(device) # Perform a single forward pass.
        #print('aa',out)
        #print('bb',data.y[:,0])
        loss = criterion(out, data.y[:,0])  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
        #print(f"Step:{step},Loss:{loss.item()}")
        loss_eposch=loss_eposch+loss.item()
    return loss_eposch/(step+1.0)

def test(loader):
     model.eval()

     correct = 0
     for step, data in enumerate(loader):  # Iterate in batches over the training/test dataset.
         #print(f'Num of graphs in the current batch: {data.num_graphs}')
         data = data.to(device)
         out = model(data.x, non_perturb_train.x, data.y, data.edge_index, non_perturb_train.edge_index) 
         scores = out.cpu().detach().numpy()
         #print(scores)
         labels = data.y[:,1].cpu().detach().numpy()
         break
     return roc_auc_score(labels, scores)  # Derive ratio of correct predictions.

num_epochs = 20
for epoch in range(0, num_epochs):
    loss_step = train(train_loader)
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print('Epoch [{}/{}], Loss: {:.4f}, Train AUC: {:.4f}, Test AUC: {:.4f}'\
                .format(epoch+1, num_epochs, loss_step, train_acc, test_acc))

Epoch [1/20], Loss: 0.5286, Train AUC: 0.9650, Test AUC: 0.9309
Epoch [2/20], Loss: 0.1636, Train AUC: 1.0000, Test AUC: 0.9543
Epoch [3/20], Loss: 0.0570, Train AUC: 1.0000, Test AUC: 0.9543
Epoch [4/20], Loss: 0.0269, Train AUC: 1.0000, Test AUC: 0.9566
Epoch [5/20], Loss: 0.0155, Train AUC: 1.0000, Test AUC: 0.9534
Epoch [6/20], Loss: 0.0053, Train AUC: 1.0000, Test AUC: 0.9518
Epoch [7/20], Loss: 0.0070, Train AUC: 1.0000, Test AUC: 0.9568
Epoch [8/20], Loss: 0.0029, Train AUC: 1.0000, Test AUC: 0.9546
Epoch [9/20], Loss: 0.0007, Train AUC: 1.0000, Test AUC: 0.9561
Epoch [10/20], Loss: 0.0004, Train AUC: 1.0000, Test AUC: 0.9569
Epoch [11/20], Loss: 0.0002, Train AUC: 1.0000, Test AUC: 0.9576
Epoch [12/20], Loss: 0.0001, Train AUC: 1.0000, Test AUC: 0.9577
Epoch [13/20], Loss: 0.0001, Train AUC: 1.0000, Test AUC: 0.9579
Epoch [14/20], Loss: 0.0001, Train AUC: 1.0000, Test AUC: 0.9579
Epoch [15/20], Loss: 0.0001, Train AUC: 1.0000, Test AUC: 0.9581
Epoch [16/20], Loss: 0.0001, Train