In [11]:
import time
import os
import pandas as pd
import numpy as np
import torch
import gc
import dgl
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv
from torch.autograd import Variable
from dgl.data import DGLDataset
from sklearn.utils import shuffle
from gnn_train.pyimport GCN

  1%|          | 6/600 [00:00<00:11, 52.01it/s]

dataset length: 14996


 74%|███████▎  | 442/600 [00:07<00:02, 56.20it/s]


KeyboardInterrupt: 

In [9]:
my_batch_size = 30

class MyDataset(DGLDataset):
    """
    Parameters
    -------------------------
    raw_dir: str
        Specifying the directory that already stores the input data.
    
    """
    def __init__(self, 
                 url=None,
                 raw_dir=None,
                 save_dir=None,
                 force_reload=False,
                 verbose=False):
        super(MyDataset, self).__init__(name='docking_classify',
                                        url=url,
                                        raw_dir=raw_dir,
                                        save_dir=save_dir,
                                        force_reload=force_reload,
                                        verbose=verbose)
    def download(self):
        pass

    #must be implemented
    def process(self):
        df_pos = pd.read_csv('./positive_test_dataset.csv')
        df_neg = pd.read_csv('./negative_test_dataset.csv')
        pos_graphs = df_pos['file_name']
        pos_labels = df_pos['label']
        neg_graphs = df_neg['file_name']
        neg_labels = df_neg['label']

        #half_batch = int(my_batch_size/2)
        self.graph_dataset = []
        self.graph_labels = []
        #negative graphs are more
        for i in range(len(neg_graphs)):
            self.graph_dataset.append(pos_graphs[i%len(pos_graphs)])
            self.graph_dataset.append(neg_graphs[i])
            self.graph_labels.append(torch.Tensor([1])) #positive
            self.graph_labels.append(torch.Tensor([0])) #negative
            
        self.df_dataset = pd.DataFrame({'file_name':self.graph_dataset, 'label':self.graph_labels})
        self.df_dataset = shuffle(self.df_dataset)
        #for i in range(len())

    
    #must be implemented
    def __getitem__(self, idx):
        """get one item by index
        
        Parameters
        ---------------
        idx: int
            Item index

        Returns
        ---------------
        (dgl.DGLGraph, Tensor)
        """
        graph = dgl.load_graphs(self.df_dataset['file_name'][idx.item()])[0] #idx.item():convert torch.Tensor to int
        #print(self.df_dataset['file_name'][idx.item()])
        label = self.df_dataset['label'][idx.item()]
        return graph[0], label[0].float()

    #must be implemented
    def __len__(self):
        #number of data examples
        return self.df_dataset.shape[0]
        

    def save(self):
        pass

    def load(self):
        pass

    def has_cache(self):
        pass

my_dataset = MyDataset()

from dgl.dataloading.pytorch import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler

num_examples = len(my_dataset)
print("dataset length:", num_examples)

test_sampler = SubsetRandomSampler(torch.arange(num_examples))
test_dataloader = GraphDataLoader(my_dataset, sampler=test_sampler, batch_size=my_batch_size, drop_last=False)

dataset length: 4648


In [13]:
class GCN(nn.Module):
    def __init__(self, in_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, 80, allow_zero_in_degree=True)
        self.conv2 = GraphConv(80, 160, allow_zero_in_degree=True)
        self.conv3 = GraphConv(160, 112, allow_zero_in_degree=True)
        self.conv4 = GraphConv(112, 160, allow_zero_in_degree=True)
        self.conv5 = GraphConv(160, 176, allow_zero_in_degree=True)
        self.conv6 = GraphConv(176, 96, allow_zero_in_degree=True)
        self.conv7 = GraphConv(96, 144, allow_zero_in_degree=True)
        self.conv8 = GraphConv(144, 96, allow_zero_in_degree=True)
        self.conv9 = GraphConv(96, 128, allow_zero_in_degree=True)
        self.conv10 = GraphConv(128, 96, allow_zero_in_degree=True)
        self.conv11 = GraphConv(96, 160, allow_zero_in_degree=True)
        self.dnn1 = torch.nn.Linear(160, 140)
        self.dnn2  = torch.nn.Linear(140, num_classes)
        param_mu = torch.tensor(0.0)
        param_sigma = torch.tensor(1.0)
        self.param_mu = nn.Parameter(param_mu)
        self.param_sigma = nn.Parameter(param_sigma)

    def forward(self, g, inputs):
        pow_param = torch.mul(g.edata['h'] - self.param_mu, g.edata['h'] - self.param_mu)/(-self.param_sigma)
        efeat = torch.log(pow_param)
        g.edata['h'] = efeat
        h = self.conv1(g, inputs)
        h = F.leaky_relu(h)
        h = self.conv2(g, h)
        h = F.leaky_relu(h)
        h = self.conv3(g, h)
        h = F.leaky_relu(h)
        h = self.conv4(g, h)
        h = F.leaky_relu(h)
        h = self.conv5(g, h)
        h = F.leaky_relu(h)
        h = self.conv6(g, h)
        h = F.leaky_relu(h)
        h = self.conv7(g, h)
        h = F.leaky_relu(h)
        h = self.conv8(g, h)
        h = F.leaky_relu(h)
        h = self.conv9(g, h)
        h = F.leaky_relu(h)
        h = self.conv10(g, h)
        h = F.leaky_relu(h)
        h = self.conv11(g, h)
        h = F.leaky_relu(h)
        g.ndata['h'] = h
        h = dgl.mean_nodes(g, 'h')
        h = F.leaky_relu(h)
        h = self.dnn1(h)
        h = F.dropout(h, p=0.3)
        h = F.leaky_relu(h)
        h = self.dnn2(h)
        h = F.dropout(h, p=0.2)
        h = torch.sigmoid(h)
        return h

In [19]:
modelPath = '../models/gcn1636930820.pkl'
model = torch.load(modelPath)
model

GCN(
  (conv1): GraphConv(in=44, out=80, normalization=both, activation=None)
  (conv2): GraphConv(in=80, out=160, normalization=both, activation=None)
  (conv3): GraphConv(in=160, out=112, normalization=both, activation=None)
  (conv4): GraphConv(in=112, out=160, normalization=both, activation=None)
  (conv5): GraphConv(in=160, out=176, normalization=both, activation=None)
  (conv6): GraphConv(in=176, out=96, normalization=both, activation=None)
  (conv7): GraphConv(in=96, out=144, normalization=both, activation=None)
  (conv8): GraphConv(in=144, out=96, normalization=both, activation=None)
  (conv9): GraphConv(in=96, out=128, normalization=both, activation=None)
  (conv10): GraphConv(in=128, out=96, normalization=both, activation=None)
  (conv11): GraphConv(in=96, out=160, normalization=both, activation=None)
  (dnn1): Linear(in_features=160, out_features=140, bias=True)
  (dnn2): Linear(in_features=140, out_features=1, bias=True)
)

In [20]:
num_correct = 0
num_tests = 0
FP = 0
FN = 0
device = torch.device("cuda:0")
for batched_graph, labels in test_dataloader:
    batched_graph, labels = batched_graph.to(device), labels.to(device)
    pred = model(batched_graph, batched_graph.ndata['h'].float()).squeeze(1).squeeze(1)
    # print(pred, labels)
    for i, p in enumerate(pred.round()):
        if p != labels[i]:
            FP += 1 if p == torch.tensor(1.0) else 0
            FN += 1 if p == torch.tensor(0.0) else 0

    num_correct += (pred.round() == labels).sum().item() # TP+TN
    num_tests += len(labels) # TP+TN+FP+FN
num_correct/num_tests

0.6034853700516352