In [None]:
import os.path as osp
from tqdm.auto import tqdm
import numpy as np
import wandb

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, VGAE, ResGatedGraphConv
from torch_geometric.utils import negative_sampling
from torch_geometric.loader import DataLoader

from dataset_creating import MyDataset

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = MyDataset(root="./data/")
dataset = dataset.shuffle()
train_data, val_data, test_data = dataset[0:655], dataset[655:873], dataset[873:]

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=32, shuffle=False)

In [None]:
class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels)
        self.conv2 = GCNConv(2 * out_channels, 2 * out_channels)
        self.conv3 = GCNConv(2 * out_channels, 2 * out_channels)
        self.conv4 = GCNConv(2 * out_channels, 2 * out_channels)

        self.conv_mu = GCNConv(2 * out_channels, out_channels)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).relu()
        x = self.conv3(x, edge_index).relu()
        x = self.conv4(x, edge_index).relu()

        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)


class VariationalGCNDecoder(torch.nn.Module):
    def __init__(self):
        super(VariationalGCNDecoder, self).__init__()
        
    def forward(self, z):
        return (z @ z.t()).sigmoid()

In [None]:
def precision(y_pred, y_true):
    y_pred[(y_pred > 0.5)] = 1
    y_pred[(y_pred <= 0.5)] = 0 
    
    tp = torch.sum(y_pred * y_true)
    fp = torch.sum((1 - y_true) * y_pred)
    
    return tp / (tp + fp + epsilon)

In [None]:
def recall(y_pred, y_true):
    y_pred[(y_pred > 0.5)] = 1
    y_pred[(y_pred <= 0.5)] = 0
    
    tp = torch.sum(y_pred * y_true)
    fn = torch.sum(y_true * (1 - y_pred))
    
    return tp / (tp + fn + epsilon)

In [None]:
def f1_loss(y_pred, y_true):
    tp = torch.sum(y_pred * y_true)
    fn = torch.sum(y_true * (1 - y_pred))
    fp = torch.sum((1 - y_true) * y_pred)
    precision = tp / (tp + fp + epsilon)
    recall = tp / (tp + fn + epsilon)

#     k1 = 1 - torch.abs(precision - recall)
#     k2 = 1 - torch.abs(K.mean(precision) - K.mean(recall))
    #calculate upgraded f1 score
    f1 = 2 * precision * recall / (precision + recall + epsilon)
#     tw = K.sum(K.cast(y_true * y_pred, ’float32’), axis=[1, 2, 3])
#     fw = K.sum(K.cast((1 - y_true) * y_pred, ’float32’), axis=[1, 2, 3])
#     fb = K.sum(K.cast(y_true * (1 - y_pred), ’float32’), axis=[1, 2, 3])
    return 1 - f1

In [None]:
def adj_mat(edge_index, num_nodes):
    mat = torch.zeros([num_nodes, num_nodes], dtype=torch.float64)
    for i in range(edge_index.size(1)):
        mat[edge_index[0][i]][edge_index[1][i]] = 1
        mat[edge_index[1][i]][edge_index[0][i]] = 1
    mat.requires_grad = True
    return mat

In [None]:
out_channels = 50
num_features = dataset.num_features
epochs = 180

model = VGAE(VariationalGCNEncoder(num_features, out_channels), VariationalGCNDecoder())

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# optimizer = torch.optim.Adagrad(model.parameters(), lr=0.001, weight_decay=0.9)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
# criterion = torch.nn.BCEWithLogitsLoss()

criterion = f1_loss

In [None]:
wandb.init(
    # set the wandb project where this run will be logged
    project="secondary_structure_prediction",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.001,
    "architecture": "VGAE",
    "epochs": 180,
    "optimizer": "Adam",
    "out_channels": 64,
    "loss": "f1_loss",
    },
    name="VGAE_3conv_60_1e-3_Adam_64"
)

epsilon = 1e-10

In [None]:
def train():
    for epoch in range(1, epochs + 1):
        model.train()
        train_loss = []
        train_recall = []
        train_precision = []
        for g in tqdm(train_data):
            optimizer.zero_grad()

            z = model.encode(g.x, g.edge_index)
            out = model.decode(z)

            y_true = adj_mat(g.edge_label_index, g.x.size(0))
            loss = criterion(out, y_true)
            loss = loss + (1 / g.num_nodes) * model.kl_loss()            
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
            train_precision.append(precision(out, y_true).item())
            train_recall.append(recall(out, y_true).item())
            
        prec = np.mean(train_precision)
        rec = np.mean(train_recall)
        f1 = (2 * prec * rec) / (prec + rec)
        print(f'Epoch: {epoch:03d}, loss: {np.mean(train_loss)}, f1: {f1}, precision: {prec}, recall: {rec}')
        
        val_loss = []
        val_recall = []
        val_precision = []
        with torch.no_grad():
            for g in tqdm(val_data):
                z = model.encode(g.x, g.edge_index)
                out = model.decode(z)
        
                y_true = adj_mat(g.edge_label_index, g.x.size(0))
                loss = criterion(out, y_true)
                
                loss = loss + (1 / g.num_nodes) * model.kl_loss()
                
                val_loss.append(loss.numpy())
                val_precision.append(precision(out, y_true).numpy())
                val_recall.append(recall(out, y_true).numpy())
 
            
            prec = np.mean(val_precision)
            rec = np.mean(val_recall)
            f1 = (2 * prec * rec) / (prec + rec)
            print(f'val_loss: {np.mean(val_loss)}, val_f1: {f1}, val_precision: {prec}, val_recall: {rec}')
            wandb.log({"train_loss": np.mean(train_loss), "train_f1": f1, "train_precision": prec, "train_recall": rec,
                       "val_loss": np.mean(val_loss), "val_f1": f1, "val_precision": prec, "val_recall": rec})
#     wandb.finish()

In [None]:
train()

In [None]:
wandb.finish()