In [1]:
import optuna as opt
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

import sys
sys.path.append('/home/ubuntu/nk-paper-2024-1/pscapes')
sys.path.append('/home/ubuntu/nk-paper-2024-1/nk-ml-2024')

from torch.utils.data import DataLoader

from pscapes.landscape_class import ProteinLandscape
from pscapes.utils import dict_to_np_array, np_array_to_dict

from src.architectures.architectures import SequenceRegressionCNN, SequenceRegressionLSTM, SequenceRegressionMLP, SequenceRegressionLinear, SequenceRegressionTransformer
from src.architectures.ml_utils import train_val_test_split_ohe, train_model
import pickle

from sklearn.metrics import r2_score

from hyperopt import optimise_hparams


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEQ_LEN = 6
AA_ALPHABET = 'ACDEFG'

In [3]:
#Load NK landscapes -- only a single replicate for hparam tuning 

LANDSCAPES = []
for k in range(6): 
    for r in range(1): 
        landscape = ProteinLandscape(csv_path='../data/nk_landscapes/k{0}_r{1}.csv'.format(k,r), amino_acids=AA_ALPHABET)
        LANDSCAPES.append(landscape)

In [4]:
LANDSCAPES = [i.fit_OHE() for i in LANDSCAPES]

In [5]:
landscapes_ohe, xy_train, xy_val, xy_test, x_test, y_test = train_val_test_split_ohe(LANDSCAPES)

In [6]:
landscape0_xy_train = xy_train[0]
landscape0_xy_val   = xy_val[0]

In [7]:
def cnn_objective_NK(trial, train_data, val_data, seq_length, amino_acids, n_epochs=30,
                    patience=5, min_delta=1e-5):
    # Define the search space
    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 2)
    
    num_kernels = [int(trial.suggest_discrete_uniform("n_kernels", 16, 128, 16))
                   for i in range(num_conv_layers)]  
    
    kernel_sizes = [int(trial.suggest_discrete_uniform("kernel_sizes", 2, 6, 1))
                   for i in range(num_conv_layers)]
    
    learning_rate = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001])

    batch_size    = trial.suggest_discrete_uniform('batch_size', 32, 128, 32)
    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionCNN(input_channels=len(AA_ALPHABET), sequence_length=SEQ_LEN, 
                                  num_conv_layers=num_conv_layers, n_kernels=num_kernels, kernel_sizes=kernel_sizes)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Dummy training and validation data loaders
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=32)

    #run train/val
    val_loss = optimise_hparams(trial, model, loss_fn, optimizer, train_loader, 
                               val_loader, seq_length=seq_length, amino_acids=amino_acids,
                               n_epochs=n_epochs, patience=patience, min_delta=min_delta, device=device)
    return val_loss


In [8]:
SequenceRegressionMLP?

[0;31mInit signature:[0m
[0mSequenceRegressionMLP[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0malphabet_size[0m[0;34m=[0m[0;36m5[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msequence_length[0m[0;34m=[0m[0;36m10[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mhidden_sizes[0m[0;34m=[0m[0;34m[[0m[0;36m128[0m[0;34m,[0m [0;36m64[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in
a tree structure. You can assign the submodules as regular attributes::

    import torch.nn as nn
    import torch.nn.functional as F

    class Model(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(1, 20, 5)
            self.conv2 = nn.Conv2d(20, 20, 5)

        def forward(self, x):
            x = F.relu(self.conv1(x))

In [9]:
256*2

512

In [10]:
def mlp_objective_NK(trial, train_data, val_data, seq_length, amino_acids, n_epochs=30,
                    patience=5, min_delta=1e-5):
    # Define the search space
    n_hidden_layers = trial.suggest_int('num_hidden_layers', 1, 4)
    hidden_sizes    = [int(trial.suggest_categorical("hidden{}_size".format(i), [32,64, 96, 128, 256])) 
                       for i in range(n_hidden_layers)]
    print(hidden_sizes)
    
    learning_rate = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001])

    batch_size    = int(trial.suggest_discrete_uniform('batch_size', 32, 128, 32))
    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionMLP(alphabet_size=len(amino_acids), sequence_length=seq_length, hidden_sizes=hidden_sizes)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)

    #run train/val
    val_loss = optimise_hparams(trial, model, loss_fn, optimizer, train_loader, 
                               val_loader, seq_length=seq_length, amino_acids=amino_acids,
                               n_epochs=n_epochs, patience=patience, min_delta=min_delta, device=device)
    return val_loss

In [11]:
def unidirectional_lstm_objective_NK(trial, train_data, val_data, seq_length, amino_acids, n_epochs=30,
                    patience=5, min_delta=1e-5):
    # Define the search space

    
    num_layers = trial.suggest_int('num_layers', 1, 4)
    hidden_size    = trial.suggest_categorical("hidden_size", [32,64, 96, 128, 256])
                       
    
    learning_rate = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001])

    batch_size    = int(trial.suggest_discrete_uniform('batch_size', 32, 256, 32))
    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionLSTM(input_size=len(amino_acids), hidden_size=hidden_size, num_layers=num_layers, bidirectional=False)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)

    #run train/val
    val_loss = optimise_hparams(trial, model, loss_fn, optimizer, train_loader, 
                               val_loader, seq_length=seq_length, amino_acids=amino_acids,
                               n_epochs=n_epochs, patience=patience, min_delta=min_delta, device=device)
    return val_loss

In [12]:
def bidirectional_lstm_objective_NK(trial, train_data, val_data, seq_length, amino_acids, n_epochs=30,
                    patience=5, min_delta=1e-5):
    # Define the search space

    
    num_layers = trial.suggest_int('num_layers', 1, 4)
    hidden_size    = trial.suggest_categorical("hidden_size", [32,64, 96, 128, 256])
                       
    
    learning_rate = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001])

    batch_size    = int(trial.suggest_discrete_uniform('batch_size', 32, 256, 32))
    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionLSTM(input_size=len(amino_acids), hidden_size=hidden_size, num_layers=num_layers, bidirectional=True)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)

    #run train/val
    val_loss = optimise_hparams(trial, model, loss_fn, optimizer, train_loader, 
                               val_loader, seq_length=seq_length, amino_acids=amino_acids,
                               n_epochs=n_epochs, patience=patience, min_delta=min_delta, device=device)
    return val_loss

In [26]:
def generate_valid_combinations_transformer(embed_dim_options, max_heads):
    valid_combinations = []
    
    for embed_dim in embed_dim_options:
        for num_heads in range(1, max_heads + 1):
            if embed_dim % num_heads == 0:
                valid_combinations.append((embed_dim, num_heads))
    
    return valid_combinations
embed_dim_options = [16, 32, 64, 128]
max_heads = 8

valid_combinations = generate_valid_combinations(embed_dim_options, max_heads)

In [28]:
def transformer_objective_NK(trial, train_data, val_data, seq_length, amino_acids, n_epochs=30,
                    patience=5, min_delta=1e-5):
    # Define the search space
    input_dim = len(amino_acids)
    #d_model is the embedding 

    embed_dim_options = [16, 32, 64, 128]
    max_heads = 8
    valid_combinations = generate_valid_combinations(embed_dim_options, max_heads)

    d_model, nhead = trial.suggest_categorical("embed_dim_num_heads", valid_combinations)
        
    num_layers      = trial.suggest_int('num_layers', 1, 6)
    dim_feedforward = trial.suggest_categorical('dim_feedforward', [128, 256, 512]) 
    max_seq_length  = trial.suggest_categorical("max_seq_length", [6, 8, 10, 12])                  
    
    learning_rate = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001])

    batch_size    = int(trial.suggest_discrete_uniform('batch_size', 32, 256, 32))
    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionTransformer(input_dim=input_dim, d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
                                         max_seq_length=max_seq_length)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #train and val loaders 
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)

    #run train/val
    val_loss = optimise_hparams(trial, model, loss_fn, optimizer, train_loader, 
                               val_loader, seq_length=seq_length, amino_acids=amino_acids,
                               n_epochs=n_epochs, patience=patience, min_delta=min_delta, device=device)
    return val_loss

In [None]:
 SequenceRegressionLinear(alphabet_size=5, sequence_length=10)

In [33]:
def linear_objective_NK(trial, train_data, val_data, seq_length, amino_acids, n_epochs=30,
                    patience=5, min_delta=1e-5):
    # Define the search space

    
    learning_rate = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001])

    batch_size    = int(trial.suggest_discrete_uniform('batch_size', 32, 256, 32))
    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionLinear(alphabet_size=len(amino_acids), sequence_length=seq_length)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #train and val loaders 
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)

    #run train/val
    val_loss = optimise_hparams(trial, model, loss_fn, optimizer, train_loader, 
                               val_loader, seq_length=seq_length, amino_acids=amino_acids,
                               n_epochs=n_epochs, patience=patience, min_delta=min_delta, device=device)
    return val_loss

In [34]:
#cnn_studies = [opt.create_study(direction='minimize') for i in LANDSCAPES]
#mlp_studies = [opt.create_study(direction='minimize') for i in LANDSCAPES]
#lstm_studies = [opt.create_study(direction='minimize') for i in LANDSCAPES]
#transformer_studies = [opt.create_study(direction='minimize') for i in LANDSCAPES]
linear_studies = [opt.create_study(direction='minimize') for i in LANDSCAPES]


[I 2024-10-30 06:51:24,796] A new study created in memory with name: no-name-80664c61-0757-4a88-84a8-e0e124b059ea
[I 2024-10-30 06:51:24,797] A new study created in memory with name: no-name-3644e0d0-e967-4b9c-a6db-a56ca8250d6d
[I 2024-10-30 06:51:24,798] A new study created in memory with name: no-name-56ad802a-a4d7-4112-b72f-d32c5cdc0787
[I 2024-10-30 06:51:24,799] A new study created in memory with name: no-name-5a625edb-c9e0-4eb0-ac71-4ad7e2bd9ae6
[I 2024-10-30 06:51:24,799] A new study created in memory with name: no-name-537a34c0-aa29-4cb6-acdb-06cba6c79b9d
[I 2024-10-30 06:51:24,800] A new study created in memory with name: no-name-9a67bddf-149f-483a-9510-0b3cf2786b3e


In [35]:
for index, study in enumerate(linear_studies):
    study.optimize(lambda trial: linear_objective_NK(trial, train_data= xy_train[index], val_data=xy_val[index], seq_length=SEQ_LEN,
                                               amino_acids=AA_ALPHABET, n_epochs=50), n_trials=12)

  batch_size    = int(trial.suggest_discrete_uniform('batch_size', 32, 256, 32))


Epoch: 0, Val loss: 1.2402881593693669e-15
Epoch: 1, Val loss: 1.3197617229092686e-15
Epoch: 2, Val loss: 1.828292697591813e-15
Epoch: 3, Val loss: 2.6505454891931755e-14
Epoch: 4, Val loss: 2.836986701585662e-05


[I 2024-10-30 06:52:12,775] Trial 0 finished with value: 3.3416055099907804e-05 and parameters: {'lr': 0.01, 'batch_size': 32.0}. Best is trial 0 with value: 3.3416055099907804e-05.


Epoch: 5, Val loss: 3.3416055099907804e-05
Early stopping at epoch 5
Best validation loss this trial: 3.3416055099907804e-05
Epoch: 0, Val loss: 0.5427766327674572
Epoch: 1, Val loss: 0.41609393251247895
Epoch: 2, Val loss: 0.31599770066065663
Epoch: 3, Val loss: 0.2385255109805327
Epoch: 4, Val loss: 0.17974791083580408
Epoch: 5, Val loss: 0.13632374295057395
Epoch: 6, Val loss: 0.10488661894431481
Epoch: 7, Val loss: 0.08247372355216588
Epoch: 8, Val loss: 0.06673415100727326
Epoch: 9, Val loss: 0.05559699351970966
Epoch: 10, Val loss: 0.04750126227736473
Epoch: 11, Val loss: 0.041304399665349566
Epoch: 12, Val loss: 0.03627334119608769
Epoch: 13, Val loss: 0.031975625321651116
Epoch: 14, Val loss: 0.02813585475087166
Epoch: 15, Val loss: 0.02464491420258314
Epoch: 16, Val loss: 0.021449160881531544
Epoch: 17, Val loss: 0.018525369751911897
Epoch: 18, Val loss: 0.01585659236671069
Epoch: 19, Val loss: 0.013443658844782757
Epoch: 20, Val loss: 0.011282187337294603
Epoch: 21, Val loss:

[I 2024-10-30 06:52:21,165] Trial 1 finished with value: 1.0280315493469644e-07 and parameters: {'lr': 0.0001, 'batch_size': 192.0}. Best is trial 1 with value: 1.0280315493469644e-07.


Epoch: 45, Val loss: 2.597567284003442e-07
Epoch: 46, Val loss: 1.0280315493469644e-07
Early stopping at epoch 46
Best validation loss this trial: 1.0280315493469644e-07
Epoch: 0, Val loss: 1.1686066424031757e-15
Epoch: 1, Val loss: 8.534083752269086e-16
Epoch: 2, Val loss: 1.3835006612488165e-15
Epoch: 3, Val loss: 3.4778827241108188e-15
Epoch: 4, Val loss: 1.6579740071181353e-13


[I 2024-10-30 06:52:25,983] Trial 2 finished with value: 5.789339382514129e-07 and parameters: {'lr': 0.01, 'batch_size': 32.0}. Best is trial 1 with value: 1.0280315493469644e-07.


Epoch: 5, Val loss: 5.789339382514129e-07
Early stopping at epoch 5
Best validation loss this trial: 5.789339382514129e-07
Epoch: 0, Val loss: 9.602559066874984e-12
Epoch: 1, Val loss: 3.861126931364208e-15
Epoch: 2, Val loss: 3.3969584681219523e-15
Epoch: 3, Val loss: 3.323207618069069e-15
Epoch: 4, Val loss: 1.9991530783922904e-15


[I 2024-10-30 06:52:27,374] Trial 3 finished with value: 2.1383718084064995e-15 and parameters: {'lr': 0.01, 'batch_size': 128.0}. Best is trial 3 with value: 2.1383718084064995e-15.


Epoch: 5, Val loss: 2.1383718084064995e-15
Early stopping at epoch 5
Best validation loss this trial: 2.1383718084064995e-15
Epoch: 0, Val loss: 1.7162506732401745e-11
Epoch: 1, Val loss: 2.385245493703519e-15
Epoch: 2, Val loss: 1.816726808318831e-15
Epoch: 3, Val loss: 1.3257842886287517e-15
Epoch: 4, Val loss: 1.2220400790797309e-15


[I 2024-10-30 06:52:28,789] Trial 4 finished with value: 9.055279846977788e-16 and parameters: {'lr': 0.01, 'batch_size': 128.0}. Best is trial 4 with value: 9.055279846977788e-16.
[I 2024-10-30 06:52:28,967] Trial 5 pruned. 


Epoch: 5, Val loss: 9.055279846977788e-16
Early stopping at epoch 5
Best validation loss this trial: 9.055279846977788e-16
Epoch: 0, Val loss: 3.7535180934921115e-08


[I 2024-10-30 06:52:29,272] Trial 6 pruned. 
[I 2024-10-30 06:52:29,451] Trial 7 pruned. 


Epoch: 0, Val loss: 0.005632241068479533
Epoch: 0, Val loss: 1.1533647085825986e-08


[I 2024-10-30 06:52:29,659] Trial 8 pruned. 


Epoch: 0, Val loss: 4.051625075629742e-10


[I 2024-10-30 06:52:29,866] Trial 9 pruned. 
[I 2024-10-30 06:52:30,019] Trial 10 pruned. 


Epoch: 0, Val loss: 0.03763038308379498
Epoch: 0, Val loss: 0.2619664028286934
Epoch: 0, Val loss: 2.706894636996159e-15
Epoch: 1, Val loss: 1.1132916213793695e-15
Epoch: 2, Val loss: 1.0640098259994128e-15
Epoch: 3, Val loss: 9.036677236618428e-16
Epoch: 4, Val loss: 1.0032199525453325e-15


[I 2024-10-30 06:52:31,866] Trial 11 finished with value: 9.132470893898361e-16 and parameters: {'lr': 0.01, 'batch_size': 96.0}. Best is trial 4 with value: 9.055279846977788e-16.


Epoch: 5, Val loss: 9.132470893898361e-16
Early stopping at epoch 5
Best validation loss this trial: 9.132470893898361e-16
Epoch: 0, Val loss: 0.01872760296250001
Epoch: 1, Val loss: 0.013238088621829566
Epoch: 2, Val loss: 0.013127447404444981
Epoch: 3, Val loss: 0.01316770919574759
Epoch: 4, Val loss: 0.013139345372716585
Epoch: 5, Val loss: 0.013149433637945315
Epoch: 6, Val loss: 0.013173393738002349


[I 2024-10-30 06:52:34,327] Trial 0 finished with value: 0.013194964112093052 and parameters: {'lr': 0.001, 'batch_size': 96.0}. Best is trial 0 with value: 0.013194964112093052.


Epoch: 7, Val loss: 0.013194964112093052
Early stopping at epoch 7
Best validation loss this trial: 0.013194964112093052
Epoch: 0, Val loss: 0.013426726435621579
Epoch: 1, Val loss: 0.013361607057352861
Epoch: 2, Val loss: 0.013364329561591148
Epoch: 3, Val loss: 0.013495281680176656
Epoch: 4, Val loss: 0.013549954661478599


[I 2024-10-30 06:52:35,355] Trial 1 finished with value: 0.013650519866496324 and parameters: {'lr': 0.01, 'batch_size': 256.0}. Best is trial 0 with value: 0.013194964112093052.


Epoch: 5, Val loss: 0.013469094689935446
Epoch: 6, Val loss: 0.013650519866496324
Early stopping at epoch 6
Best validation loss this trial: 0.013650519866496324
Epoch: 0, Val loss: 0.15713797280421624
Epoch: 1, Val loss: 0.08408040562883401
Epoch: 2, Val loss: 0.05444951470081623
Epoch: 3, Val loss: 0.0410430608317256
Epoch: 4, Val loss: 0.032807060971091956
Epoch: 5, Val loss: 0.026718489945125885
Epoch: 6, Val loss: 0.022184215306949157
Epoch: 7, Val loss: 0.018878418522385452
Epoch: 8, Val loss: 0.016593351637801297
Epoch: 9, Val loss: 0.015089014986864267
Epoch: 10, Val loss: 0.014170311964475192
Epoch: 11, Val loss: 0.013640978254186802
Epoch: 12, Val loss: 0.01335692268390304
Epoch: 13, Val loss: 0.013225386026673593
Epoch: 14, Val loss: 0.013158675963775469
Epoch: 15, Val loss: 0.013132614214928487
Epoch: 16, Val loss: 0.013125629844860388
Epoch: 17, Val loss: 0.013124644457816314
Epoch: 18, Val loss: 0.013123012386644498
Epoch: 19, Val loss: 0.013120713607909588
Epoch: 20, Val

[I 2024-10-30 06:52:43,064] Trial 2 finished with value: 0.013119734608783172 and parameters: {'lr': 0.0001, 'batch_size': 96.0}. Best is trial 2 with value: 0.013119734608783172.


Epoch: 24, Val loss: 0.013119734608783172
Early stopping at epoch 24
Best validation loss this trial: 0.013119734608783172
Epoch: 0, Val loss: 0.01327836134025238
Epoch: 1, Val loss: 0.013182869539237939
Epoch: 2, Val loss: 0.013335156829581939
Epoch: 3, Val loss: 0.013334817105792781
Epoch: 4, Val loss: 0.013320462553340018
Epoch: 5, Val loss: 0.013265026118367529


[I 2024-10-30 06:52:48,746] Trial 3 finished with value: 0.013296562073847804 and parameters: {'lr': 0.001, 'batch_size': 32.0}. Best is trial 2 with value: 0.013119734608783172.


Epoch: 6, Val loss: 0.013296562073847804
Early stopping at epoch 6
Best validation loss this trial: 0.013296562073847804
Epoch: 0, Val loss: 0.013379126668293426


[W 2024-10-30 06:52:49,279] Trial 4 failed with parameters: {'lr': 0.01, 'batch_size': 160.0} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/ubuntu/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_3441/3185484137.py", line 2, in <lambda>
    study.optimize(lambda trial: linear_objective_NK(trial, train_data= xy_train[index], val_data=xy_val[index], seq_length=SEQ_LEN,
  File "/tmp/ipykernel_3441/689578029.py", line 24, in linear_objective_NK
    val_loss = optimise_hparams(trial, model, loss_fn, optimizer, train_loader,
  File "/home/ubuntu/nk-paper-2024-1/nk-ml-2024/hyperopt/hyperopt.py", line 74, in optimise_hparams
    predictions = model(x_batch)
  File "/usr/lib/python3/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/lib/python3/dist-packages/torch/

Epoch: 1, Val loss: 0.013429664372605211


KeyboardInterrupt: 

In [56]:
args = {'a':1, 'b':2, 'd':True}

In [57]:
args['d']

True

In [47]:
def test(a, b, c): 
    return a+b+c

In [12]:
import copy 

def generate_valid_combinations_transformer(embed_dim_options, max_heads):
    valid_combinations = []
    
    for embed_dim in embed_dim_options:
        for num_heads in range(1, max_heads + 1):
            if embed_dim % num_heads == 0:
                valid_combinations.append((embed_dim, num_heads))
    
    return valid_combinations

def objective_NK(trial, h_param_search_space, model, train_data, val_data, n_epochs=30, patience=5, min_delta=1e-5, device='cuda'):
    """
    High-level function to perform hyperparameter optimisation on models. Define the search space in h_param_search_space (dict) 
    by specifying model parameter names as keys, and values as optuna trial samplers. Please also specify model parameters needed 
    for instantiation but that are not being optimised (otherwise model will return error). 

    trial:                           optuna trial object
    h_param_search_space (dict):     dict of hyperparameters {hparam_name: hparam_value}. Specify search space with optuna.trial sampler as value if 
                                     wanting to optimise that hyperparameter. Example: {'learning_rate': trial.suggest_categorical('lr', [0.01, 0.001, 0.0001]), 'sequence_length':5 }
    model (nn.Module):               model to optimise. Do NOT instantiate model with model() on passing.
    train_data:                      train data
    val_data:                        val data
    n_epochs (int):                  number of epochs to train for 
    patience (int):                  patience for early stopping 
    mind_delta(float):               min_delta for early stopping   
    """           

    #define search spaces based on model
    hpss= h_param_search_space
    learning_rate = trial.suggest_categorical('lr', hpss['learning_rate'])
    batch_size    = trial.suggest_categorical('batch_size', hpss['batch_size'])
    
    if model==SequenceRegressionLinear:
        model_instance = model(alphabet_size=hpss['alphabet_size'], sequence_length=hpss['sequence_length'])
        
    elif model==SequenceRegressionMLP:
        n_hidden_layers = trial.suggest_int('n_hidden_layers',1, hpss['max_hidden_layers']) #max_hidden_sizes should be an int
        hidden_sizes    = [int(trial.suggest_categorical("hidden{}_size".format(i), hpss['hidden_sizes_categorical'])) #hidden_sizes_categorical should be a list of hidden sizes
                            for i in range(n_hidden_layers)]
        model_instance = model(alphabet_size=hpss['alphabet_size'], sequence_length=hpss['sequence_length'], hidden_sizes=hidden_sizes)

    elif model==SequenceRegressionCNN:
        num_conv_layers = trial.suggest_int('num_conv_layers', 1, hpss['max_conv_layers']) #max_conv_layers should be an int
        n_kernels = [int(trial.suggest_discrete_uniform("n_kernels", hpss['n_kernels_min'], hpss['n_kernels_max'] , hpss['n_kernels_step']))for i in range(num_conv_layers)]      
        kernel_sizes = [int(trial.suggest_discrete_uniform("kernel_sizes", hpss['kernel_sizes_min'], hpss['kernel_sizes_max'], 1))for i in range(num_conv_layers)]
        model_instance = model(input_channels=hpss['alphabet_size'], sequence_length=hpss['sequence_length'], num_conv_layers=num_conv_layers,
                              n_kernels=n_kernels, kernel_sizes=kernel_sizes)
    elif model==SequenceRegressionLSTM: 
        num_layers     = trial.suggest_int('num_layers', 1, hpss['max_lstm_layers']) #max_lstm_layers should be int
        hidden_size    = trial.suggest_categorical("hidden_size", hpss['hidden_sizes']) #hidden_sizes should be a list of possible hidden layuer sizes
        bidirectional  = hpss['bidirectional']               
        model_instance = model(input_size=hpss['alphabet_size'], hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional)
    
    elif model==SequenceRegressionTransformer:
        embed_dim_options = hpss['embed_dim_options']
        max_heads = hpss['max_heads']
        valid_combinations = generate_valid_combinations(embed_dim_options, max_heads)
    
        d_model, nhead = trial.suggest_categorical("embed_dim_num_heads", valid_combinations)
            
        num_layers      = trial.suggest_int('num_layers', 1, hpss['max_layers']) #should be int
        dim_feedforward = trial.suggest_categorical('dim_feedforward', hpss['feedforward_dims']) # should be list of ints possible dims 
        max_seq_length  = trial.suggest_categorical("max_seq_length", hpss['max_seq_lengths']) #shold be list of ints of possible max seq lengths                   
        model_instance  = model(input_dim=hpss['alphabet_size'], d_model=d_model, nhead=nhead,dim_feedforward=dim_feedforward,
                               max_seq_length=max_seq_length)
        
    # Initialize model with the trial’s hyperparameters
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model_instance.parameters(), lr=learning_rate)


    #train and val loaders 
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)

    #run train/val
    val_loss = optimise_hparams(trial, model_instance, loss_fn, optimizer, train_loader, 
                               val_loader, n_epochs=n_epochs, patience=patience, min_delta=min_delta, device=device)
    return val_loss

In [18]:
linear_hparam_space = {'learning_rate': [0.01, 0.001, 0.0001], 'batch_size': [32, 64, 128, 256], 
                       'alphabet_size':6, 'sequence_length':6} 
mlp_hparam_space   = {'learning_rate': [0.01, 0.001, 0.0001], 'batch_size': [32, 64, 128, 256], 
                       'alphabet_size':6, 'sequence_length':6, 'max_hidden_layers':3,
                     'hidden_sizes_categorical': [32,64, 128, 256]} 

#linear_studies = [opt.create_study(direction='minimize') for i in LANDSCAPES]
mlp_studies = [opt.create_study(direction='minimize') for i in LANDSCAPES]


[I 2024-10-30 08:13:32,862] A new study created in memory with name: no-name-25179e94-3cf6-45c8-a3e9-832b9d4f2c06
[I 2024-10-30 08:13:32,863] A new study created in memory with name: no-name-b5288090-9856-4902-892c-2480807deac1
[I 2024-10-30 08:13:32,864] A new study created in memory with name: no-name-b2a4c1da-9884-493d-a416-37539082c4ef
[I 2024-10-30 08:13:32,865] A new study created in memory with name: no-name-24721ba4-7e88-4387-b105-a402bb30fe7b
[I 2024-10-30 08:13:32,865] A new study created in memory with name: no-name-2e006a1b-300c-4d92-969f-bea2fa336508
[I 2024-10-30 08:13:32,866] A new study created in memory with name: no-name-6c005807-a339-47ff-bf38-de1c11b6b1a8


In [19]:
for index, study in enumerate(mlp_studies):
    study.optimize(lambda trial: objective_NK(trial, mlp_hparam_space, SequenceRegressionMLP, 
                                              train_data= xy_train[index], val_data=xy_val[index], n_epochs=50), n_trials=1)


Epoch: 0, Val loss: 0.00017392281043319567
Epoch: 1, Val loss: 5.126011955464985e-05
Epoch: 2, Val loss: 2.6944504097011334e-05
Epoch: 3, Val loss: 1.6409008498219735e-05
Epoch: 4, Val loss: 1.051667187173241e-05
Epoch: 5, Val loss: 7.364031477926015e-06
Epoch: 6, Val loss: 4.08083922023144e-06
Epoch: 7, Val loss: 3.482142265981519e-06
Epoch: 8, Val loss: 2.0205503842127224e-06
Epoch: 9, Val loss: 1.6925319740085659e-06
Epoch: 10, Val loss: 1.067970995856771e-06


[I 2024-10-30 08:13:55,466] Trial 0 finished with value: 9.093471661244466e-07 and parameters: {'lr': 0.01, 'batch_size': 128, 'n_hidden_layers': 3, 'hidden0_size': 128, 'hidden1_size': 256, 'hidden2_size': 32}. Best is trial 0 with value: 9.093471661244466e-07.


Epoch: 11, Val loss: 9.093471661244466e-07
Early stopping at epoch 11
Best validation loss this trial: 9.093471661244466e-07
Epoch: 0, Val loss: 0.0009504179281190548
Epoch: 1, Val loss: 0.00040564202463739895
Epoch: 2, Val loss: 0.0003051735728413949
Epoch: 3, Val loss: 0.00021636482633144842
Epoch: 4, Val loss: 0.00017115103401010856
Epoch: 5, Val loss: 0.0001525707238425429
Epoch: 6, Val loss: 0.00013904524909861735
Epoch: 7, Val loss: 7.99493770275273e-05
Epoch: 8, Val loss: 6.163378236676034e-05
Epoch: 9, Val loss: 6.351220561664065e-05
Epoch: 10, Val loss: 4.9304417779760705e-05
Epoch: 11, Val loss: 4.2378104831769655e-05
Epoch: 12, Val loss: 4.0326292744591934e-05
Epoch: 13, Val loss: 3.0692004952276264e-05
Epoch: 14, Val loss: 3.102085472949737e-05
Epoch: 15, Val loss: 3.8290762276096175e-05
Epoch: 16, Val loss: 2.2425908969320222e-05
Epoch: 17, Val loss: 2.862617916862369e-05


[I 2024-10-30 08:14:07,179] Trial 0 finished with value: 2.2894871372589743e-05 and parameters: {'lr': 0.001, 'batch_size': 64, 'n_hidden_layers': 2, 'hidden0_size': 128, 'hidden1_size': 64}. Best is trial 0 with value: 2.2894871372589743e-05.


Epoch: 18, Val loss: 2.2894871372589743e-05
Early stopping at epoch 18
Best validation loss this trial: 2.2894871372589743e-05
Epoch: 0, Val loss: 0.007956855015980447
Epoch: 1, Val loss: 0.005038755915016254
Epoch: 2, Val loss: 0.0039705223549141464
Epoch: 3, Val loss: 0.003580946085226332
Epoch: 4, Val loss: 0.0048224216448461525
Epoch: 5, Val loss: 0.003953304169122448
Epoch: 6, Val loss: 0.00341567943075624
Epoch: 7, Val loss: 0.003365459873810665
Epoch: 8, Val loss: 0.003135322858320756
Epoch: 9, Val loss: 0.0033804976334596355
Epoch: 10, Val loss: 0.0031459285707499543
Epoch: 11, Val loss: 0.0034445363726729574
Epoch: 12, Val loss: 0.002786370167413201
Epoch: 13, Val loss: 0.003170101470552767
Epoch: 14, Val loss: 0.0028750169380472447
Epoch: 15, Val loss: 0.002870042857606537
Epoch: 16, Val loss: 0.002973034053057846


[I 2024-10-30 08:14:25,179] Trial 0 finished with value: 0.0029827114498107415 and parameters: {'lr': 0.01, 'batch_size': 32, 'n_hidden_layers': 1, 'hidden0_size': 256}. Best is trial 0 with value: 0.0029827114498107415.


Epoch: 17, Val loss: 0.0029827114498107415
Early stopping at epoch 17
Best validation loss this trial: 0.0029827114498107415
Epoch: 0, Val loss: 0.017838632612306084
Epoch: 1, Val loss: 0.016260554705563392
Epoch: 2, Val loss: 0.014092595987101523
Epoch: 3, Val loss: 0.014102688216421211
Epoch: 4, Val loss: 0.012350788568823129
Epoch: 5, Val loss: 0.012262025993301446
Epoch: 6, Val loss: 0.010714968389823522
Epoch: 7, Val loss: 0.010431886441671314
Epoch: 8, Val loss: 0.009608377837854573
Epoch: 9, Val loss: 0.009360926826763103
Epoch: 10, Val loss: 0.008979608794339957
Epoch: 11, Val loss: 0.008701346516131591
Epoch: 12, Val loss: 0.00844527532159486
Epoch: 13, Val loss: 0.008192596477496192
Epoch: 14, Val loss: 0.008238772741264194
Epoch: 15, Val loss: 0.007836056532911383
Epoch: 16, Val loss: 0.007676330473648113
Epoch: 17, Val loss: 0.008153558713702373
Epoch: 18, Val loss: 0.007631317220835222
Epoch: 19, Val loss: 0.007745776819980616
Epoch: 20, Val loss: 0.007472964725059131
Epoc

[I 2024-10-30 08:15:28,981] Trial 0 finished with value: 0.006539100508452353 and parameters: {'lr': 0.001, 'batch_size': 32, 'n_hidden_layers': 3, 'hidden0_size': 32, 'hidden1_size': 256, 'hidden2_size': 256}. Best is trial 0 with value: 0.006539100508452353.


Epoch: 47, Val loss: 0.006539100508452353
Early stopping at epoch 47
Best validation loss this trial: 0.006539100508452353
Epoch: 0, Val loss: 0.01864490962276856
Epoch: 1, Val loss: 0.018579411755005517
Epoch: 2, Val loss: 0.01862091158206264
Epoch: 3, Val loss: 0.01873551824440559
Epoch: 4, Val loss: 0.01847749805698792
Epoch: 5, Val loss: 0.01838134943197171
Epoch: 6, Val loss: 0.0184380360879004
Epoch: 7, Val loss: 0.018319017812609674
Epoch: 8, Val loss: 0.01829816078146299
Epoch: 9, Val loss: 0.018225470433632533
Epoch: 10, Val loss: 0.018240938832362493
Epoch: 11, Val loss: 0.018282002490013837
Epoch: 12, Val loss: 0.018381074878076713
Epoch: 13, Val loss: 0.01828776200612386
Epoch: 14, Val loss: 0.018172867471973102
Epoch: 15, Val loss: 0.018188741182287534
Epoch: 16, Val loss: 0.018419481161981822
Epoch: 17, Val loss: 0.018176968737194935
Epoch: 18, Val loss: 0.018162983749061824
Epoch: 19, Val loss: 0.018124219651023548
Epoch: 20, Val loss: 0.01808136502901713
Epoch: 21, Val 

[I 2024-10-30 08:15:34,076] Trial 0 finished with value: 0.018230341374874115 and parameters: {'lr': 0.01, 'batch_size': 256, 'n_hidden_layers': 2, 'hidden0_size': 32, 'hidden1_size': 32}. Best is trial 0 with value: 0.018230341374874115.


Epoch: 27, Val loss: 0.018230341374874115
Early stopping at epoch 27
Best validation loss this trial: 0.018230341374874115
Epoch: 0, Val loss: 0.019459166150126193
Epoch: 1, Val loss: 0.01932551234992396
Epoch: 2, Val loss: 0.019207427628402017
Epoch: 3, Val loss: 0.019099191778427005
Epoch: 4, Val loss: 0.01930389311323818
Epoch: 5, Val loss: 0.01994352389731978
Epoch: 6, Val loss: 0.019216043603980642
Epoch: 7, Val loss: 0.01946127998172982


[I 2024-10-30 08:15:43,086] Trial 0 finished with value: 0.019110098513018373 and parameters: {'lr': 0.01, 'batch_size': 32, 'n_hidden_layers': 1, 'hidden0_size': 128}. Best is trial 0 with value: 0.019110098513018373.


Epoch: 8, Val loss: 0.019110098513018373
Early stopping at epoch 8
Best validation loss this trial: 0.019110098513018373


In [None]:
    learning_rate = trial.suggest_categorical('lr', [0.01, 0.001, 0.0001])

    batch_size    = int(trial.suggest_discrete_uniform('batch_size', 32, 256, 32))
    