In [1]:
import os.path as osp
from tqdm.auto import tqdm
import numpy as np
import wandb

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, VGAE, ResGatedGraphConv
from torch_geometric.utils import negative_sampling
from torch_geometric.loader import DataLoader

from dataset_processing import RNADataset


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = RNADataset(root="./data/")
dataset = dataset.shuffle()
train_data, val_data, test_data = dataset[0:655], dataset[655:873], dataset[873:]

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=32, shuffle=False)


In [13]:
def precision(y_pred, y_true):
    y_pred[(y_pred > 0.5)] = 1
    y_pred[(y_pred <= 0.5)] = 0 
    
    tp = torch.sum(y_pred * y_true)
    fp = torch.sum((1 - y_true) * y_pred)
    
    return tp / (tp + fp + epsilon)

In [14]:
def recall(y_pred, y_true):
    y_pred[(y_pred > 0.5)] = 1
    y_pred[(y_pred <= 0.5)] = 0
    
    tp = torch.sum(y_pred * y_true)
    fn = torch.sum(y_true * (1 - y_pred))
    
    return tp / (tp + fn + epsilon)

In [15]:
def f1_loss(y_pred, y_true):
    tp = torch.sum(y_pred * y_true)
    fn = torch.sum(y_true * (1 - y_pred))
    fp = torch.sum((1 - y_true) * y_pred)
    precision = tp / (tp + fp + epsilon)
    recall = tp / (tp + fn + epsilon)

#     k1 = 1 - torch.abs(precision - recall)
#     k2 = 1 - torch.abs(K.mean(precision) - K.mean(recall))
    #calculate upgraded f1 score
    f1 = 2 * precision * recall / (precision + recall + epsilon)
#     tw = K.sum(K.cast(y_true * y_pred, ’float32’), axis=[1, 2, 3])
#     fw = K.sum(K.cast((1 - y_true) * y_pred, ’float32’), axis=[1, 2, 3])
#     fb = K.sum(K.cast(y_true * (1 - y_pred), ’float32’), axis=[1, 2, 3])
    return 1 - f1


In [27]:
class GCNModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNModel, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, out_channels)
#         self.conv4 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).relu()
        x = self.conv3(x, edge_index).relu()
#         x = self.conv4(x, edge_index).relu()
        prob_adj = (x @ x.t()).sigmoid()
#         return (prob_adj > 0).nonzero(as_tuple=False).t()
        return prob_adj


In [28]:
out_channels = 50
num_features = dataset.num_features
epochs = 2

model = GCNModel(num_features, 128, 64)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# optimizer = torch.optim.Adagrad(model.parameters(), lr=0.001, weight_decay=0.9)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
# criterion = torch.nn.BCEWithLogitsLoss()

def RMSELoss(y_pred, y_true):
    return torch.sqrt(torch.mean((y_pred - y_true) ** 2))

criterion = f1_loss

In [22]:
wandb.init(
    # set the wandb project where this run will be logged
    project="secondary_structure_prediction",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.001,
    "architecture": "GCN",
    "epochs": 180,
    "optimizer": "Adam",
    "out_channels": 64,
    "loss": "f1_loss",
    },
    name="Net_3conv_60_1e-3_Adam_64"
)

epsilon = 1e-10

0,1
train_f1,▂▄▂▁▆▅█▇▅
train_loss,█▂▁▂▂▁▂▁▁
train_precision,▁▄▂▁▆▅█▇▅
train_recall,█▆▃▃▁▆▃▄▆
val_f1,▂▄▂▁▆▅█▇▅
val_loss,▃▄▇█▃▂▁▂▂
val_precision,▁▄▂▁▆▅█▇▅
val_recall,█▆▃▃▁▆▃▄▆

0,1
train_f1,0.16036
train_loss,0.87981
train_precision,0.0886
train_recall,0.84312
val_f1,0.16036
val_loss,0.86636
val_precision,0.0886
val_recall,0.84312


In [23]:
def train():
    for epoch in range(1, epochs + 1):
        model.train()
        train_loss = []
        train_recall = []
        train_precision = []
        for g in tqdm(train_data):
            optimizer.zero_grad()

            out = model(g.x, g.edge_index)
            y_true = g.edge_label_index
            loss = criterion(out, y_true)            
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
            train_precision.append(precision(out, y_true).item())
            train_recall.append(recall(out, y_true).item())
            
        prec = np.mean(train_precision)
        rec = np.mean(train_recall)
        f1 = (2 * prec * rec) / (prec + rec)
        print(f'Epoch: {epoch:03d}, loss: {np.mean(train_loss)}, f1: {f1}, precision: {prec}, recall: {rec}')
        
        val_loss = []
        val_recall = []
        val_precision = []
        with torch.no_grad():
            for g in tqdm(val_data):
                out = model(g.x, g.edge_index)
        
                y_true = g.edge_label_index
                loss = criterion(out, y_true)
                                
                val_loss.append(loss.numpy())
                val_precision.append(precision(out, y_true).numpy())
                val_recall.append(recall(out, y_true).numpy())
 
            
            prec = np.mean(val_precision)
            rec = np.mean(val_recall)
            f1 = (2 * prec * rec) / (prec + rec)
            print(f'val_loss: {np.mean(val_loss)}, val_f1: {f1}, val_precision: {prec}, val_recall: {rec}')
            wandb.log({"train_loss": np.mean(train_loss), "train_f1": f1, "train_precision": prec, "train_recall": rec,
                       "val_loss": np.mean(val_loss), "val_f1": f1, "val_precision": prec, "val_recall": rec})
#     wandb.finish()

In [24]:
train()
torch.save(model, "./models/test.pt")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 655/655 [00:05<00:00, 112.97it/s]


Epoch: 001, loss: 0.8914583833163021, f1: 0.11201562722045515, precision: 0.05933080211651462, recall: 1.0


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 218/218 [00:01<00:00, 197.07it/s]


val_loss: 0.8791870474815369, val_f1: 0.12485086011312893, val_precision: 0.0665818303823471, val_recall: 1.0


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 655/655 [00:05<00:00, 121.06it/s]


Epoch: 002, loss: 0.8914213925827551, f1: 0.11201562722045515, precision: 0.05933080211651462, recall: 1.0


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 218/218 [00:00<00:00, 235.64it/s]

val_loss: 0.8791870474815369, val_f1: 0.12485086011312893, val_precision: 0.0665818303823471, val_recall: 1.0





In [26]:
model = torch.load("./models/test.pt")
model.train()

GCNModel(
  (conv1): GCNConv(4, 128)
  (conv2): GCNConv(128, 128)
  (conv3): GCNConv(128, 128)
)

In [76]:
!wandb login ddbabdb4aeb6b610863acd0e17dda52c85c03fb6


wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\chivi/.netrc
