In [None]:
# Import package
import numpy as np
import random
from numpy.core.numeric import ones_like
from torch_geometric.data import Data, DataLoader
import torch, os
from tqdm import tqdm
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
from sklearn.metrics import roc_auc_score
from torch_geometric.data import DataLoader

In [2]:
TF = 'ctcf'
DATAPATH = '/DATA/yogesh/encodeDream/%s/'%(TF)
POSITIVE = DATAPATH+'processed_out/positive/'
NEGATIVE = DATAPATH+'processed_out/negative/'

In [6]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,GENConv
from torch_geometric.nn import global_mean_pool

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(5)
        # self.conv1 = GCNConv(train_loader.dataset[0].x.shape[1], hidden_channels)
        # self.conv2 = GCNConv(hidden_channels, hidden_channels)
        # self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.conv1 = GENConv(8, hidden_channels)
        self.conv2 = GENConv(hidden_channels, hidden_channels)
        self.conv3 = GENConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, 2)
        
    def forward(self, x, edge_index, batch):
        # Get node embeeding
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        
        # Readout alayer
        x = global_mean_pool(x, batch)
        
        # Out layer
        # x = F.dropout(x, p=DROPOUT, training=self.training)
        x = self.lin(x)
        return x

In [8]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.set_device(1)
print('Current cuda device ID:',torch.cuda.current_device())
print('Current cuda device name:', torch.cuda.get_device_name())

# Train/test
model = GCN(hidden_channels=64)
model.to(device)
print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
loss_func = torch.nn.CrossEntropyLoss()

def to_device(data, device):
    return data.to(device, non_blocking=True)

def train():
    model.train()
    for data in train_loader:
        a = to_device(data.x, device)
        b = to_device(data.edge_index, device)
        c = to_device(data.batch, device)
        d = to_device(data.y, device)
        
        out = model(a, b, c)
        #print(out)
        
        loss = loss_func(out, d)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
def test(loader):
    model.eval()
    correct = 0
    aupr, auroc, loss=[], [], []
    for data in loader:
        a = to_device(data.x, device)
        b = to_device(data.edge_index, device)
        c = to_device(data.batch, device)
        d = to_device(data.y, device)
        
        out = model(a, b, c)
        prob = F.softmax(out, dim=1)
        
        # AUPR
        precision, recall, thresholds = precision_recall_curve(d.detach().cpu().clone().numpy(), prob[:,1].detach().cpu().clone().numpy())
        aupr.append(auc(recall, precision))
        
        # AUROC
        try:
            auroc.append(roc_auc_score(d.detach().cpu().clone().numpy(), prob[:,1].detach().cpu().clone().numpy()))
        except:
            pass
        
        # LOSS
        loss_ = loss_func(out, d)
        loss.append(loss_.detach().cpu().clone().numpy())
        
        # print(prob)
        
        pred = out.argmax(dim=1)
        correct += int((pred==d).sum())
        
    return correct/len(loader.dataset), np.nanmean(aupr), np.nanmean(auroc), np.nanmean(loss)

cuda:1
Current cuda device ID: 1
Current cuda device name: Tesla V100-PCIE-32GB
GCN(
  (conv1): GENConv(8, 64, aggr=softmax)
  (conv2): GENConv(64, 64, aggr=softmax)
  (conv3): GENConv(64, 64, aggr=softmax)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [None]:
# Import positeve dataset
dum2 = torch.load(POSITIVE+os.listdir(POSITIVE)[0])
np.seterr(divide='ignore', invalid='ignore')

# Training
train_acc_, test_acc_, train_aupr_, test_aupr_, train_auroc_, test_auroc_ = [], [], [], [], [], []
for epoch in range(1, len(os.listdir(NEGATIVE))):
    # Get data
    dum1 = torch.load(NEGATIVE+os.listdir(NEGATIVE)[epoch])
    DATALIST = dum1+dum2

    # Split data
    torch.manual_seed(11)
    random.shuffle(DATALIST)

    split_n = int(0.8*(len(DATALIST)))

    train_dataset = DATALIST[:split_n]
    test_dataset = DATALIST[split_n:]
    
    # Prepare dataloader
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=True)
    
    # Start training
    train()
    train_acc, train_aupr, train_auroc, train_loss = test(train_loader)
    test_acc, test_aupr, test_auroc, test_loss = test(test_loader)
    
    train_acc_.append(train_acc)
    test_acc_.append(test_acc)
    train_aupr_.append(train_aupr)
    test_aupr_.append(test_aupr)
    train_auroc_.append(train_auroc)
    test_auroc_.append(test_auroc)
    
    print(f'Epoch:{epoch:03d}')
    print(f'Train::ACC:{train_acc} AUPR:{train_aupr} AUROC:{train_auroc} LOSS:{train_loss}')
    print(f'Test ::ACC:{test_acc} AUPR:{test_aupr} AUROC:{test_auroc} LOSS:{test_loss}')
    print()
    
    # print(f'Epoch:{epoch:03d},Train acc:{train_acc:.4f},Train aupr:{train_aupr:.4f},Train auroc:{train_auroc:.4f},Test acc:{test_acc:.4f},Test aupr:{test_aupr:.4f},Test auroc:{test_auroc:.4f}')

Epoch:001
Train::ACC:0.6686409723281447 AUPR:0.7343817641450936 AUROC:0.7343336758309548 LOSS:0.6028475761413574
Test ::ACC:0.6677456023736585 AUPR:0.7351739265459599 AUROC:0.7332531018809447 LOSS:0.6036773920059204

Epoch:002
Train::ACC:0.6733227901450625 AUPR:0.7386024878334434 AUROC:0.7408843569228478 LOSS:0.5972746014595032
Test ::ACC:0.6718044557475804 AUPR:0.7358877390732771 AUROC:0.7382350877822781 LOSS:0.5995912551879883

Epoch:003
Train::ACC:0.6744563166809829 AUPR:0.7389475306947504 AUROC:0.741745697827416 LOSS:0.5970451235771179
Test ::ACC:0.6721384120378399 AUPR:0.7351387072121461 AUROC:0.7390835576409297 LOSS:0.5989888906478882



In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['axes.linewidth'] = 1.2

fig, ax = plt.subplots(1,3, figsize=(9,3))
ax[0].plot(train_acc_, color='red', marker='o', linewidth=3, markersize=3, label='train')
ax[0].plot(test_acc_, color='black', marker='o', linewidth=3, markersize=3, label='test')
ax[0].legend()
ax[0].set_ylim([0.5,1])
ax[0].set_ylabel('Accuracy', fontsize='14')
ax[0].set_xlabel('Epoch', fontsize='14')
#ax[0].grid()

ax[1].plot(train_aupr_, color='red', marker='o', linewidth=3, markersize=3, label='train')
ax[1].plot(test_aupr_, color='black', marker='o', linewidth=3, markersize=3, label='test')
ax[1].set_ylim([0,1])
ax[1].legend()
ax[1].set_ylabel('AUPR', fontsize='14')
ax[1].set_xlabel('Epoch', fontsize='14')
#ax[1].grid()

ax[2].plot(train_auroc_, color='red', marker='o', linewidth=3, markersize=3, label='train')
ax[2].plot(test_auroc_, color='black', marker='o', linewidth=3, markersize=3, label='test')
ax[2].set_ylim([0,1])
ax[2].legend()
ax[2].set_ylabel('AUROC', fontsize='14')
ax[2].set_xlabel('Epoch', fontsize='14')

#ax[2].grid()

fig.suptitle(TF.upper()+': Performance (Internal validation)', fontsize=14)
plt.tight_layout()

# plt.savefig(PATH_+'processed_out/'+'10lac_'+TF+'_'+str(kmer)+'_'+str(DROPOUT)+'.png', bbbox_inches ="tight", dpi=600)
plt.show()