In [25]:
import os.path as osp
from tqdm.auto import tqdm
import numpy as np
import wandb

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, VGAE, ResGatedGraphConv, GCN
from torch_geometric.utils import negative_sampling
from torch_geometric.loader import DataLoader

from dataset_processing import RNADataset


In [41]:
dataset = RNADataset(root="./data/")
dataset = dataset.shuffle()
train_data, val_data, test_data = dataset[0:655], dataset[655:873], dataset[873:]

train_dataloader = DataLoader(train_data, batch_size=1, shuffle=False)
val_dataloader = DataLoader(val_data, batch_size=1, shuffle=False)

In [42]:
train_data[0].edge_index

tensor([[ 0,  1,  0, 29,  1,  2,  1, 28,  2,  3,  2, 27,  3,  4,  3, 26,  4,  5,
          5,  6,  6,  7,  7,  8,  8,  9,  8, 19,  9, 10,  9, 18, 10, 11, 10, 17,
         11, 12, 11, 16, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19,
         19, 20, 20, 21, 21, 22, 22, 23, 22, 29, 23, 24, 23, 28, 24, 25, 24, 27,
         25, 26, 26, 27, 27, 28, 28, 29],
        [ 1,  0, 29,  0,  2,  1, 28,  1,  3,  2, 27,  2,  4,  3, 26,  3,  5,  4,
          6,  5,  7,  6,  8,  7,  9,  8, 19,  8, 10,  9, 18,  9, 11, 10, 17, 10,
         12, 11, 16, 11, 13, 12, 14, 13, 15, 14, 16, 15, 17, 16, 18, 17, 19, 18,
         20, 19, 21, 20, 22, 21, 23, 22, 29, 22, 24, 23, 28, 23, 25, 24, 27, 24,
         26, 25, 27, 26, 28, 27, 29, 28]])

In [27]:
def precision(y_pred, y_true):
    y_pred[(y_pred > 0.5)] = 1
    y_pred[(y_pred <= 0.5)] = 0 
    
    tp = torch.sum(y_pred * y_true)
    fp = torch.sum((1 - y_true) * y_pred)
    
    return tp / (tp + fp + epsilon)

In [28]:
def recall(y_pred, y_true):
    y_pred[(y_pred > 0.5)] = 1
    y_pred[(y_pred <= 0.5)] = 0
    
    tp = torch.sum(y_pred * y_true)
    fn = torch.sum(y_true * (1 - y_pred))
    
    return tp / (tp + fn + epsilon)

In [29]:
def f1_loss(y_pred, y_true):
    tp = torch.sum(y_pred * y_true)
    fn = torch.sum(y_true * (1 - y_pred))
    fp = torch.sum((1 - y_true) * y_pred)
    precision = tp / (tp + fp + epsilon)
    recall = tp / (tp + fn + epsilon)

#     k1 = 1 - torch.abs(precision - recall)
#     k2 = 1 - torch.abs(K.mean(precision) - K.mean(recall))
    #calculate upgraded f1 score
    f1 = 2 * precision * recall / (precision + recall + epsilon)
#     tw = K.sum(K.cast(y_true * y_pred, ’float32’), axis=[1, 2, 3])
#     fw = K.sum(K.cast((1 - y_true) * y_pred, ’float32’), axis=[1, 2, 3])
#     fb = K.sum(K.cast(y_true * (1 - y_pred), ’float32’), axis=[1, 2, 3])
    return 1 - f1


In [63]:
class GCNModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_layers, out_channels, dropout):
        super(GCNModel, self).__init__()
        self.gcn = GCN(in_channels, hidden_channels, num_layers, out_channels, dropout=dropout)

    def forward(self, x, edge_index):
        x = self.gcn(x.to(torch.float), edge_index)
        prob_adj = (x @ x.t()).sigmoid()
#         return (prob_adj > 0).nonzero(as_tuple=False).t()
        return prob_adj


In [64]:
hidden_channels = 500
out_channels = 500
num_layers = 4
num_features = dataset.num_features


dr = 0.7
lr = 0.00005
epochs = 180

# model = GCNModel(num_features, hidden_channels, num_layers, out_channels, dr)
model = torch.load("./models/GatedGCN_6_180_7e-05_Adam_300.pt")


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
train_data = list(map(lambda x: x.to(device), train_data))
val_data = list(map(lambda x: x.to(device), val_data))
print(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
# criterion = torch.nn.BCEWithLogitsLoss()

# def RMSELoss(y_pred, y_true):
#     return torch.sqrt(torch.mean((y_pred - y_true) ** 2))

criterion = f1_loss



cuda


In [65]:
run_name = "GCN_" + str(num_layers) + "_" + str(epochs) + "_" + str(lr) + "_" + \
str(hidden_channels) + "_" + str(out_channels) + "_" + str(dr)


wandb.init(
    # set the wandb project where this run will be logged
    project="secondary_structure_prediction1",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": lr,
    "architecture": "GCN",
    "epochs": epochs,
    "optimizer": "Adam",
    "hidden_channels": hidden_channels,
    "out_channels": out_channels,
    "loss": "f1_loss",
    "dropout": dr,
    "train:val:test": "800:218:73"
    },
    name=run_name
)

epsilon = 1e-10

0,1
train_f1,▁▇▇▇▇███▇███████████████
train_loss,█▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_precision,▁▇▇▇▇███▇███████████████
train_recall,█▃▂▂▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁
val_f1,▁▄▄▄▆▆█▅▅▅▆▆▇▆▆▇▇▆▅█▆▇▆█
val_loss,█▄▄▄▃▃▂▃▃▃▂▃▂▃▂▂▁▂▃▁▂▁▂▁
val_precision,▁▄▄▄▆▆█▅▅▅▆▆▇▆▆▇▇▆▅█▆▇▆█
val_recall,█▆▅▅▄▄▄▄▅▆▄▅▄▅▄▄▃▃▅▃▂▄▁▃

0,1
train_f1,0.04837
train_loss,0.9527
train_precision,0.02489
train_recall,0.85701
val_f1,0.04818
val_loss,0.9529
val_precision,0.0248
val_recall,0.84434


In [66]:
def train():
    for epoch in range(1, epochs + 1):
        model.train()
        train_loss = []
        train_recall = []
        train_precision = []
        for g in tqdm(train_data, ncols=100):
            g.to(device)
            optimizer.zero_grad()

            out = model(g.x, g.edge_index)
            y_true = g.adj_mat
            loss = criterion(out, y_true)            
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
            train_precision.append(precision(out, y_true).item())
            train_recall.append(recall(out, y_true).item())
            
        train_prec = np.mean(train_precision)
        train_rec = np.mean(train_recall)
        train_f1 = (2 * train_prec * train_rec) / (train_prec + train_rec)
        print(f'Epoch: {epoch:03d}, loss: {np.mean(train_loss)}, f1: {train_f1}, precision: {train_prec}, recall: {train_rec}')
        
        val_loss = []
        val_recall = []
        val_precision = []
        with torch.no_grad():
            for g in tqdm(val_data, ncols=100):
                g.to(device)
                out = model(g.x, g.edge_index)
        
                y_true = g.adj_mat
                loss = criterion(out, y_true)
                                
                val_loss.append(loss.item())
                val_precision.append(precision(out, y_true).item())
                val_recall.append(recall(out, y_true).item())
 
            
            prec = np.mean(val_precision)
            rec = np.mean(val_recall)
            f1 = (2 * prec * rec) / (prec + rec)
            print(f'val_loss: {np.mean(val_loss)}, val_f1: {f1}, val_precision: {prec}, val_recall: {rec}')
              
            wandb.log({"train_loss": np.mean(train_loss), "train_f1": train_f1, "train_precision": train_prec, 
                       "train_recall": train_rec,
                       "val_loss": np.mean(val_loss), "val_f1": f1, "val_precision": prec, "val_recall": rec})
#     wandb.finish()

In [67]:
train()
torch.save(model, "./models/" + run_name + ".pt")

100%|████████████████████████████████████████████████████████████| 655/655 [00:04<00:00, 147.01it/s]


Epoch: 001, loss: 0.9621720981051903, f1: 0.03888005775610475, precision: 0.019845927459585928, recall: 0.950497046499762


100%|████████████████████████████████████████████████████████████| 218/218 [00:00<00:00, 274.73it/s]


val_loss: 0.9562473012766707, val_f1: 0.04576248244823012, val_precision: 0.023503772792654164, val_recall: 0.8638846961730117


100%|████████████████████████████████████████████████████████████| 655/655 [00:04<00:00, 147.17it/s]


Epoch: 002, loss: 0.9545014344098913, f1: 0.046944972087589514, precision: 0.02411751105798451, recall: 0.8776371912191843


100%|████████████████████████████████████████████████████████████| 218/218 [00:00<00:00, 270.94it/s]


val_loss: 0.9545831907232967, val_f1: 0.046851134450436996, val_precision: 0.024085769248237715, val_recall: 0.8546214446835562


100%|████████████████████████████████████████████████████████████| 655/655 [00:04<00:00, 141.58it/s]


Epoch: 003, loss: 0.9536179557101417, f1: 0.0477023001300342, precision: 0.024523487117653356, recall: 0.8699704546509808


100%|████████████████████████████████████████████████████████████| 218/218 [00:00<00:00, 274.02it/s]


val_loss: 0.9537485168067688, val_f1: 0.047464464650928094, val_precision: 0.02440905674689646, val_recall: 0.8558813589155128


 40%|███████████████████████▋                                    | 259/655 [00:01<00:02, 135.08it/s]


KeyboardInterrupt: 

In [15]:
model = torch.load("./models/" + "GCN_4_180_0.0001_Adam_0.2" + ".pt")


In [11]:
!wandb login ddbabdb4aeb6b610863acd0e17dda52c85c03fb6


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/vdshk/.netrc


In [26]:
wandb.finish()