$$\textbf{Proyecto de Verano: PLN aplicado a la Bioinformática}$$
$$\textit{Y. Sarahi García Gozález}$$

In [1]:
import numpy as np
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import numpy as np
import torch
import urllib.request
import sys
sys.path.append('/kaggle/input/proyecto-archivos/')
from transformers import BertModel, BertConfig, logging
from tqdm import tqdm
from sklearn.metrics import accuracy_score
import wandb
from datetime import datetime
import yaml
import os
import shutil
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [2]:
pip install torchdrug 

Collecting torchdrug
  Downloading torchdrug-0.2.1-py3-none-any.whl.metadata (7.5 kB)
Collecting torch-scatter>=2.0.8 (from torchdrug)
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting torch-cluster>=1.5.9 (from torchdrug)
  Downloading torch_cluster-1.6.3.tar.gz (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting rdkit-pypi>=2020.9 (from torchdrug)
  Downloading rdkit_pypi-2022.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting lmdb (from torchdrug)
  Downloading lmdb-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting fair-esm (from torchdrug)
  Downloading fair_esm-2.0.0-py3-n

In [3]:
import torchdrug
from torchdrug.datasets import Fluorescence

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}\n')

Device: cuda



Descargar los datos

In [5]:
class PeptideBERTDataset(torch.utils.data.Dataset):
    def __init__(self, input_ids, attention_masks, labels):
        self.input_ids = input_ids
        self.attention_masks = attention_masks
        self.labels = labels

        self.length = len(self.input_ids)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        input_id = self.input_ids[idx]
        attention_mask = self.attention_masks[idx]
        label = self.labels[idx]

        return {
            'input_ids': torch.tensor(input_id, dtype=torch.long),
            'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
            'labels': torch.tensor(label, dtype=torch.float)
        }

In [6]:
logging.set_verbosity_error()
import torch.nn as nn
# Definimos la clase PeptideBERT, que hereda de torch.nn.Module (la clase base para todas las redes neuronales en PyTorch)
class PeptideBERT(nn.Module):
    def __init__(self, bert_config):
        super(PeptideBERT, self).__init__()

        # Cargamos el modelo preentrenado
        self.protbert = BertModel.from_pretrained(
            'Rostlab/prot_bert_bfd',
            config=bert_config,
            ignore_mismatched_sizes=True
        )
        
        # Clasificación con capas adicionales
        self.head = nn.Sequential(
            nn.Linear(bert_config.hidden_size, bert_config.hidden_size),
            nn.ReLU(),  # Función de activación ReLU
            nn.Dropout(p=0.15),  # Dropout para evitar sobreajuste
            nn.Linear(bert_config.hidden_size , 1)
        )
        
    def forward(self, inputs, attention_mask):
        # Pasamos las entradas a través de ProtBert
        output = self.protbert(inputs, attention_mask=attention_mask)
        # Usamos la salida de ProtBert como entrada a la capa de clasificación
        return self.head(output.pooler_output)
 


In [7]:
#criterio de pérdida,optimizador y el planificador de learning rate  para el entrenamiento del modelo

def cri_opt_sch(config, model):
    ##criterio de pérdida:MSE
    criterion = torch.nn.MSELoss()
    #optimizador AmadW
    optimizer = torch.optim.AdamW(model.parameters(), lr=config['optim']['lr'])
    #Scheduler
    if config['sch']['name'] == 'onecycle':
        ## Durante el entrenamiento, el learning-rate empieza en un valor inicial, aumenta hasta el valor máximo especificado (max_lr), y luego disminuye nuevamente hacia el final del entrenamiento.
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=config['optim']['lr'],
            epochs=config['epochs'],
            steps_per_epoch=config['sch']['steps']
        ) #Ajusta el learning-rate utilizando un ciclo de una sola pasada
    elif config['sch']['name'] == 'lronplateau':
        ## ajusta el learning-rate basándose en el rendimiento del modelo. Específicamente, reduce la tasa de aprendizaje cuando una métrica de rendimiento ha dejado de mejorar.
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode='max',
            factor=config['sch']['factor'],
            patience=config['sch']['patience']
        )# Reduce lr cuando la métrica especificada ha dejado de mejorar.

    return criterion, optimizer, scheduler


In [8]:
###Función que se encarga del proceso de entrenamiento
def train(model, dataloader, optimizer, criterion, scheduler, device):
    model.train()  # Pone el modelo en modo de entrenamiento
    total_loss = 0.0

    for batch in tqdm(dataloader):  # Itera sobre los lotes de datos en el dataloader
        inputs = batch['input_ids'].to(device)  # Mueve las entradas al dispositivo (CPU o GPU)
        attention_mask = batch['attention_mask'].to(device)  # Mueve la máscara de atención al dispositivo
        labels = batch['labels'].to(device)  # Mueve las etiquetas al dispositivo

        optimizer.zero_grad()  # Resetea los gradientes del optimizador

        logits = model(inputs, attention_mask).squeeze(1)  # Pasa las entradas a través del modelo y ajusta las dimensiones
        loss = criterion(logits, labels)  # Calcula la pérdida

        loss.backward()  # Calcula los gradientes
        optimizer.step()  # Actualiza los parámetros del modelo
        # scheduler.step()  # Si el scheduler es OneCycleLR, ajusta la tasa de aprendizaje en cada paso

        total_loss += loss.item()  # Acumula la pérdida total

    return total_loss / len(dataloader)  # Retorna la pérdida promedio por lote

In [9]:
from scipy.stats import spearmanr
def validate(model, dataloader, criterion, device):
    model.eval()  # Pone el modelo en modo de evaluación
    total_loss = 0.0

    ground_truth = []
    predictions = []

    for batch in tqdm(dataloader):  # Itera sobre los lotes de datos en el dataloader
        inputs = batch['input_ids'].to(device)  # Mueve las entradas al dispositivo
        attention_mask = batch['attention_mask'].to(device)  # Mueve la máscara de atención al dispositivo
        labels = batch['labels'].to(device)  # Mueve las etiquetas al dispositivo

        with torch.inference_mode():  # Desactiva el cálculo de gradientes
            logits = model(inputs, attention_mask).squeeze(1)  # Pasa las entradas a través del modelo
            loss = criterion(logits, labels)  # Calcula la pérdida

        total_loss += loss.item()  # Acumula la pérdida total
        predictions.extend(logits.cpu().tolist())  # Añade las predicciones a la lista
        ground_truth.extend(labels.cpu().tolist())  # Añade las etiquetas reales a la lista

    total_loss = total_loss / len(dataloader)  # Calcula la pérdida promedio
    # Calcula el coeficiente de Pearson
    spearmanr_corr, _ = spearmanr(ground_truth, predictions)

    return total_loss,spearmanr_corr  # Retorna la pérdida promedio

In [10]:
from scipy.stats import spearmanr
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm

def test(model, dataloader, device):
    model.eval()  # Pone el modelo en modo de evaluación

    ground_truth = []
    predictions = []

    for batch in tqdm(dataloader):  # Itera sobre los lotes de datos en el dataloader
        inputs = batch['input_ids'].to(device)  # Mueve las entradas al dispositivo
        attention_mask = batch['attention_mask'].to(device)  # Mueve la máscara de atención al dispositivo
        labels = batch['labels']  # Las etiquetas permanecen en la CPU

        with torch.inference_mode():  # Desactiva el cálculo de gradientes
            logits = model(inputs, attention_mask).squeeze(1)  # Pasa las entradas a través del modelo

        predictions.extend(logits.cpu().tolist())  # Añade las predicciones a la lista
        ground_truth.extend(labels.tolist())  # Añade las etiquetas reales a la lista

    spearman_corr, _ = spearmanr(ground_truth, predictions)

    return spearman_corr  # Retorna las métricas de regresión y la correlación de Spearman


In [35]:
def train_model(model):
    print(f'{"="*30}{"TRAINING":^20}{"="*30}')

    best_val_spearman = 0.0  # Inicializa la mejor pérdida de validación en infinito

    # Iteramos cada época
    for epoch in range(config['epochs']):

        # Llamamos a la función de entrenamiento
        train_loss = train(model, train_data_loader, optimizer, criterion, scheduler, device)
        # Obtenemos learning rate
        curr_lr = optimizer.param_groups[0]['lr']
        # Imprimimos loss de entrenamiento y learning rate
        print(f'Epoch {epoch+1}/{config["epochs"]} - Train Loss: {train_loss}\tLR: {curr_lr}')
        # Imprimimos loss de validación
        val_loss,val_spearman = validate(model, val_data_loader, criterion, device)
        print(f'Epoch {epoch+1}/{config["epochs"]} - Validation Loss: {val_loss}\t Validation spearman: {val_spearman}\n')
        # Actualizar el Scheduler:
        scheduler.step(val_loss)

        # Registrar Métricas con wandb
        if not config['debug']:
            wandb.log({
                'train_loss': train_loss, 
                'val_loss': val_loss, 
                'val_spearman': val_spearman,
                'lr': curr_lr
            })
        # Guardamos mejor modelo
        if val_spearman >= best_val_spearman and not config['debug']:
            best_val_spearman = val_spearman
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'train_loss': train_loss,
                'val_loss': val_loss,
                'spearman': val_spearman,
                'lr': curr_lr
            }, f'{save_dir}/model.pt')
            print('Model Saved\n')
    wandb.finish()


Solubilidad

In [12]:
ds=Fluorescence("f'/kaggle/working/",lazy=True)

02:57:43   Downloading http://s3.amazonaws.com/songlabdata/proteindata/data_pytorch/fluorescence.tar.gz to f'/kaggle/working/fluorescence.tar.gz
02:57:44   Extracting f'/kaggle/working/fluorescence.tar.gz to f'/kaggle/working


Constructing proteins from sequences: 100%|██████████| 54025/54025 [00:00<00:00, 144534.49it/s]


In [13]:
sequences=ds.sequences
targets=ds.targets['log_fluorescence']

In [14]:
aminoacidos = ['G', 'A', 'S', 'P', 'V', 'T', 'C', 'I', 'L', 'N', 'D', 'Q', 'K', 'E', 'M', 'H', 'F', 'R', 'Y', 'W']
#diccionario de mapeo
letter_to_number = {letter: index  for index, letter in enumerate(aminoacidos)}
# Función para convertir secuencias de letras a secuencias de números
def convert_sequences_to_numbers(sequences, mapping):
    return [[mapping[letter] for letter in seq] for seq in sequences]

sequences_number=convert_sequences_to_numbers(sequences, letter_to_number)

In [15]:
#longitud del string más larga
max_length = max(len(s) for s in sequences)

print("La longitud del string más largo es:", max_length)

La longitud del string más largo es: 237


In [16]:
ds.num_samples #train,valid,test

[21446, 5362, 27217]

In [17]:
from torch.utils.data import DataLoader
import numpy as np
from keras.preprocessing.sequence import pad_sequences

def atention_mask(array_sequences, max_length):
    m = len(array_sequences)
    atention_mask_sequence = np.zeros((m, max_length), dtype=np.float64)

    for i, seq in enumerate(array_sequences):
        seq_len = min(len(seq), max_length)
        atention_mask_sequence[i, :seq_len] = 1

    return atention_mask_sequence


def load_data_torchdrug(sequences, targets, ds, max_length,truncate=True):
    print(f'{"="*30}{"DATA":^20}{"="*30}')
    
    n0=ds.num_samples[0] #lista que contiene el número de muestras por set:train,val,test
    n1=ds.num_samples[0]+ds.num_samples[1]
   
    
    train_sequences=[seq for seq in sequences[0:n0] if len(seq)<500]
    train_targets=np.array([target for seq,target in zip(sequences[0:n0],targets[0:n0]) if len(seq)<500])
  
    val_sequences=[seq for seq in sequences[n0:n1] if len(seq)<500]
    val_targets=np.array([target for seq,target in zip(sequences[n0:n1],targets[n0:n1]) if len(seq)<500])

    test_sequences=[seq for seq in sequences[n1:] if len(seq)<500]
    test_targets=np.array([target for seq,target in zip(sequences[n1:],targets[n1:]) if len(seq)<500])

    #cnvertir a array
    # Padear las secuencias para que todas tengan la misma longitud
    max_len = 237
    train_sequences = pad_sequences(train_sequences, maxlen=max_len, padding='post')
    val_sequences= pad_sequences(val_sequences, maxlen=max_len, padding='post')
    test_sequences = pad_sequences(test_sequences, maxlen=max_len, padding='post')
    
    
    # Crear las máscaras de atención
    attention_mask_train = (train_sequences > 0).astype(np.float64)
    attention_mask_val = (val_sequences > 0).astype(np.float64)
    attention_mask_test = (test_sequences > 0).astype(np.float64)
    
    
    
    train_dataset = PeptideBERTDataset(input_ids=train_sequences, attention_masks=attention_mask_train, labels=train_targets)
    val_dataset = PeptideBERTDataset(input_ids=val_sequences, attention_masks=attention_mask_val, labels=val_targets)
    test_dataset = PeptideBERTDataset(input_ids=test_sequences, attention_masks=attention_mask_test, labels=test_targets)

    train_data_loader = DataLoader(
        train_dataset,
        batch_size=16,
        shuffle=True
    )

    val_data_loader = DataLoader(
        val_dataset,
        batch_size=16,
        shuffle=False
    )

    test_data_loader = DataLoader(
        test_dataset,
        batch_size=16,
        shuffle=False
    )

    print('Batch size: ', 16)

    print('Train dataset samples: ', len(train_dataset))
    print('Validation dataset samples: ', len(val_dataset))
    print('Test dataset samples: ', len(test_dataset))

    print('Train dataset batches: ', len(train_data_loader))
    print('Validation dataset batches: ', len(val_data_loader))
    print('Test dataset batches: ', len(test_data_loader))

    print()

    return train_data_loader, val_data_loader, test_data_loader

2024-08-02 02:57:50.871300: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-02 02:57:50.871405: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-02 02:57:51.000370: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [18]:
train_data_loader, val_data_loader, test_data_loader = load_data_torchdrug(sequences_number,targets,ds,max_length)

Batch size:  16
Train dataset samples:  21446
Validation dataset samples:  5362
Test dataset samples:  27217
Train dataset batches:  1341
Validation dataset batches:  336
Test dataset batches:  1702



In [19]:
train_data_loader.dataset[0]

{'input_ids': tensor([ 2, 12,  0, 13, 13,  8, 16,  5,  0,  4,  4,  3,  7,  8,  4, 13,  8, 10,
          0, 10,  4,  9,  0, 15, 12, 16,  2,  4,  2,  0, 13,  0, 13,  0, 10,  1,
          5, 18,  0, 12,  8,  5,  8, 12, 16,  7,  6,  5,  5,  0, 12,  8,  3,  4,
          3, 19,  3,  5,  8,  4,  5,  5,  8,  2, 18,  0,  4, 11,  6, 16,  2, 17,
         18,  3, 10, 15, 14, 12, 11, 15, 10, 16, 16, 12,  2,  1, 14,  3, 13,  0,
         18,  4, 11, 13, 17,  5,  7, 16, 16, 12, 10, 10,  0,  9, 18, 12,  5, 17,
          1, 13,  4, 12, 16, 13,  0, 10,  5,  8,  4,  9, 17,  7, 13,  8, 12,  0,
          7, 10, 16, 12, 13, 10,  0,  9,  7,  8,  0, 15, 12,  8, 13, 18,  9, 18,
          9,  2, 15,  9,  4, 18,  7, 14,  1, 10, 12, 11, 12,  9,  0,  7, 12,  4,
          9, 16, 12,  7, 17, 15, 12,  7, 13, 10,  0,  2,  4, 11,  8,  1, 10, 15,
         18, 11, 11,  9,  5,  3,  7,  0, 10,  0,  3,  4,  8,  8,  3, 10,  9, 15,
         18,  8,  2,  5, 11,  2,  1,  8,  2, 12, 10,  3,  9, 13, 12, 17, 10, 15,
         14,  4

In [20]:
train_data_loader.dataset[0]['input_ids']

tensor([ 2, 12,  0, 13, 13,  8, 16,  5,  0,  4,  4,  3,  7,  8,  4, 13,  8, 10,
         0, 10,  4,  9,  0, 15, 12, 16,  2,  4,  2,  0, 13,  0, 13,  0, 10,  1,
         5, 18,  0, 12,  8,  5,  8, 12, 16,  7,  6,  5,  5,  0, 12,  8,  3,  4,
         3, 19,  3,  5,  8,  4,  5,  5,  8,  2, 18,  0,  4, 11,  6, 16,  2, 17,
        18,  3, 10, 15, 14, 12, 11, 15, 10, 16, 16, 12,  2,  1, 14,  3, 13,  0,
        18,  4, 11, 13, 17,  5,  7, 16, 16, 12, 10, 10,  0,  9, 18, 12,  5, 17,
         1, 13,  4, 12, 16, 13,  0, 10,  5,  8,  4,  9, 17,  7, 13,  8, 12,  0,
         7, 10, 16, 12, 13, 10,  0,  9,  7,  8,  0, 15, 12,  8, 13, 18,  9, 18,
         9,  2, 15,  9,  4, 18,  7, 14,  1, 10, 12, 11, 12,  9,  0,  7, 12,  4,
         9, 16, 12,  7, 17, 15, 12,  7, 13, 10,  0,  2,  4, 11,  8,  1, 10, 15,
        18, 11, 11,  9,  5,  3,  7,  0, 10,  0,  3,  4,  8,  8,  3, 10,  9, 15,
        18,  8,  2,  5, 11,  2,  1,  8,  2, 12, 10,  3,  9, 13, 12, 17, 10, 15,
        14,  4,  8,  8, 13, 16,  4,  5, 

In [21]:
train_data_loader.dataset[7410]['input_ids'].size()

torch.Size([237])

In [22]:
##Configuración y Preparación###
#llamamos al archivo donde se guarda la config del modelo peptidebert
config = yaml.load(open('/kaggle/input/proyecto-archivos/config.yaml', 'r'), Loader=yaml.FullLoader)
config['task'] = 'fluoresencia'
config['batch_size'] = 16
config['epochs'] = 30
config['optim']['lr'] = 1.0e-5
config['sch']['steps'] = len(train_data_loader)

In [23]:
def create_model_torchdrug(config):
    bert_config = BertConfig(
        vocab_size=25,
        hidden_size=512,
        num_hidden_layers=16,
        num_attention_heads=16,
        hidden_dropout_prob=0.15,
        max_position_embeddings= 256 #maximo len de preentrenamiento HF
    )
    #creamos una istancia de PeptideBERT utilizando la configuración de BERT definida
    model = PeptideBERT(bert_config).to(device)
    #regresamos el modelo
    return model

In [31]:

#creamos el modelo
model_torchdrug = create_model_torchdrug(config)

#configuramos criterio de pérdida, optimizador y scheduler
criterion, optimizer, scheduler = cri_opt_sch(config, model_torchdrug)


#Configuración de Weights & Biases (WandB)
if not config['debug']:
    run_name = f'{config["task"]}-{datetime.now().strftime("%m%d_%H%M")}'
    wandb.init(project='PeptideBERT', name=run_name)

    save_dir = f'/kaggle/working/checkpoints/{run_name}'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    else:
        print('ya existe')
    shutil.copy('/kaggle/input/proyecto-archivos/config.yaml', f'{save_dir}/config.yaml')
    #shutil.copy('/kaggle/input/model-peptidos/network.py', f'{save_dir}/network.py')

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
lr,▁
train_loss,▁
val_loss,▁
val_spearman,▁

0,1
lr,1e-05
train_loss,0.78892
val_loss,0.71501
val_spearman,0.10353


In [36]:
#Entrenamiento del Modelo
train_model(model_torchdrug)
if not config['debug']:
    model_torchdrug.load_state_dict(torch.load(f'{save_dir}/model.pt')['model_state_dict'], strict=False)




100%|██████████| 1341/1341 [07:48<00:00,  2.86it/s]


Epoch 1/30 - Train Loss: 0.7102146951244181	LR: 1e-05


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 1/30 - Validation Loss: 0.701082642101461	 Validation spearman: 0.09626263599120631

Model Saved



100%|██████████| 1341/1341 [07:48<00:00,  2.86it/s]


Epoch 2/30 - Train Loss: 0.7097604602941997	LR: 1e-05


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 2/30 - Validation Loss: 0.7004825546450558	 Validation spearman: 0.24070103174442548

Model Saved



100%|██████████| 1341/1341 [07:48<00:00,  2.86it/s]


Epoch 3/30 - Train Loss: 0.7081815564559076	LR: 1e-05


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 3/30 - Validation Loss: 0.7075522171466478	 Validation spearman: 0.22581115338103414



100%|██████████| 1341/1341 [07:48<00:00,  2.86it/s]


Epoch 4/30 - Train Loss: 0.7089868290634586	LR: 1e-05


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 4/30 - Validation Loss: 0.7229855139961555	 Validation spearman: 0.10076756797194697



100%|██████████| 1341/1341 [07:48<00:00,  2.86it/s]


Epoch 5/30 - Train Loss: 0.7063267248877831	LR: 1.0000000000000002e-06


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 5/30 - Validation Loss: 0.7032019827248794	 Validation spearman: 0.12327946448778014



100%|██████████| 1341/1341 [07:48<00:00,  2.86it/s]


Epoch 6/30 - Train Loss: 0.7063942842079933	LR: 1.0000000000000002e-06


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 6/30 - Validation Loss: 0.7044989633640009	 Validation spearman: 0.15567046915848656



100%|██████████| 1341/1341 [07:48<00:00,  2.86it/s]


Epoch 7/30 - Train Loss: 0.7070801406543171	LR: 1.0000000000000002e-06


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 7/30 - Validation Loss: 0.7017252922191152	 Validation spearman: 0.17082044763159762



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 8/30 - Train Loss: 0.7060433746850997	LR: 1.0000000000000002e-06


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 8/30 - Validation Loss: 0.6996578043991966	 Validation spearman: 0.2130033362142388



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 9/30 - Train Loss: 0.7069640752319788	LR: 1.0000000000000002e-06


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 9/30 - Validation Loss: 0.7049830453026862	 Validation spearman: 0.20661102412607643



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 10/30 - Train Loss: 0.7064919169994986	LR: 1.0000000000000002e-07


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 10/30 - Validation Loss: 0.7037663140688979	 Validation spearman: 0.21985307594146863



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 11/30 - Train Loss: 0.7075876970674723	LR: 1.0000000000000002e-07


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 11/30 - Validation Loss: 0.703661255560638	 Validation spearman: 0.24095289809927192

Model Saved



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 12/30 - Train Loss: 0.707435827660614	LR: 1.0000000000000002e-07


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 12/30 - Validation Loss: 0.7029348810735557	 Validation spearman: 0.22160414135480097



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 13/30 - Train Loss: 0.7061256672867549	LR: 1.0000000000000002e-07


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 13/30 - Validation Loss: 0.7028784810432366	 Validation spearman: 0.23042790453099976



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 14/30 - Train Loss: 0.7066634077853804	LR: 1.0000000000000002e-07


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 14/30 - Validation Loss: 0.7035509960592857	 Validation spearman: 0.23366330660365456



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 15/30 - Train Loss: 0.7080022019072312	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 15/30 - Validation Loss: 0.7034720762943228	 Validation spearman: 0.2307565770090365



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 16/30 - Train Loss: 0.7041708030678904	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.44it/s]


Epoch 16/30 - Validation Loss: 0.7034848143036166	 Validation spearman: 0.24270869507660983

Model Saved



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 17/30 - Train Loss: 0.7077989210385071	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.44it/s]


Epoch 17/30 - Validation Loss: 0.7034345087595284	 Validation spearman: 0.2451943574865917

Model Saved



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 18/30 - Train Loss: 0.7073965717904342	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.44it/s]


Epoch 18/30 - Validation Loss: 0.7034141504471856	 Validation spearman: 0.24048930067816604



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 19/30 - Train Loss: 0.7080928650222813	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.44it/s]


Epoch 19/30 - Validation Loss: 0.7033784652261862	 Validation spearman: 0.25182226008585656

Model Saved



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 20/30 - Train Loss: 0.7076198213152437	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 20/30 - Validation Loss: 0.7033570002808812	 Validation spearman: 0.24737080590523078



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 21/30 - Train Loss: 0.7061562572205182	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 21/30 - Validation Loss: 0.7033415256910736	 Validation spearman: 0.24387881913623788



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 22/30 - Train Loss: 0.7057479697208099	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 22/30 - Validation Loss: 0.7033251475409737	 Validation spearman: 0.24909161415913025



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 23/30 - Train Loss: 0.7063505232867512	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 23/30 - Validation Loss: 0.7033452050139507	 Validation spearman: 0.23297010865299236



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 24/30 - Train Loss: 0.7066311105241267	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 24/30 - Validation Loss: 0.703316040226214	 Validation spearman: 0.2445137740727179



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 25/30 - Train Loss: 0.7063132333364351	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 25/30 - Validation Loss: 0.7033215663174078	 Validation spearman: 0.2390703095500761



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 26/30 - Train Loss: 0.7057580755027615	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.43it/s]


Epoch 26/30 - Validation Loss: 0.7033179445147869	 Validation spearman: 0.21581569918328175



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 27/30 - Train Loss: 0.707142449004227	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.42it/s]


Epoch 27/30 - Validation Loss: 0.703313705378345	 Validation spearman: 0.2443833231693246



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 28/30 - Train Loss: 0.7074430815147695	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.47it/s]


Epoch 28/30 - Validation Loss: 0.7032878540145854	 Validation spearman: 0.23651992556619128



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 29/30 - Train Loss: 0.7075561036999594	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.44it/s]


Epoch 29/30 - Validation Loss: 0.7032708614798528	 Validation spearman: 0.24438732950996928



100%|██████████| 1341/1341 [07:47<00:00,  2.87it/s]


Epoch 30/30 - Train Loss: 0.7067271697819011	LR: 1.0000000000000004e-08


100%|██████████| 336/336 [00:35<00:00,  9.46it/s]


Epoch 30/30 - Validation Loss: 0.7032723975633937	 Validation spearman: 0.24297107155280848



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
lr,█████████▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▁▄▁█▆▂▂▃▆▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
val_spearman,▅▄▄██▁▇▇▁▂▄▄▆▆▆▇▆▇▇▇▇█▇██▇█▇▇▇▆▇▇▇▇

0,1
lr,0.0
train_loss,0.70673
val_loss,0.70327
val_spearman,0.24297


In [37]:
#test
test_sperman = test(model_torchdrug, test_data_loader, device)
print(f'Test Accuracy: {test_sperman}%')

100%|██████████| 1702/1702 [02:59<00:00,  9.46it/s]

Test Accuracy: 0.23889437267209304%



