In [3]:
import optuna as opt
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

import sys
sys.path.append('/home/mahakaran/NK-paper-12-5-24-version/nk-ml-paper2-2024/pscapes')
sys.path.append('/home/mahakaran/NK-paper-12-5-24-version/nk-ml-paper2-2024/nk-ml-2024')

from torch.utils.data import DataLoader

from pscapes.landscape_class import ProteinLandscape
from pscapes.utils import dict_to_np_array, np_array_to_dict

from src.architectures.architectures import SequenceRegressionCNN
from src.architectures.ml_utils import train_val_test_split_ohe


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
f = ProteinLandscape(csv_path='../data/nk_landscapes/k0_r0.csv')

In [5]:
SEQ_LEN = 6
AA_ALPHABET = 'ACDEFG'

In [6]:
#Load NK landscapes -- only a single replicate for hparam tuning 

LANDSCAPES = []
for k in range(6): 
    for r in range(1): 
        landscape = ProteinLandscape(csv_path='../data/nk_landscapes/k{0}_r{1}.csv'.format(k,r), amino_acids=AA_ALPHABET)
        LANDSCAPES.append(landscape)

In [7]:
LANDSCAPES = [i.fit_OHE() for i in LANDSCAPES]

In [8]:
landscapes_ohe, xy_train, xy_val, xy_test, x_test, y_test = train_val_test_split_ohe(LANDSCAPES)

In [9]:
len(xy_train)

6

In [10]:
landscape0_xy_train = xy_train[0]
landscape0_xy_val   = xy_val[0]

In [13]:
def cnn_objective(trial, train_data, val_data, epochs):
    # Define the search space
    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 2)
    
    num_kernels = [int(trial.suggest_discrete_uniform("n_kernels", 16, 128, 16))
                   for i in range(num_conv_layers)]  
    
    kernel_sizes = [int(trial.suggest_discrete_uniform("kernel_sizes", 2, 6, 1))
                   for i in range(num_conv_layers)]
    
    learning_rate = trial.suggest_loguniform('lr', 1e-4, 1e-2)

    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionCNN(input_channels=len(AA_ALPHABET), sequence_length=SEQ_LEN, 
                                  num_conv_layers=num_conv_layers, n_kernels=num_kernels, kernel_sizes=kernel_sizes)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Dummy training and validation data loaders

    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=32)

    # Training loop with validation loss calculation
    for epoch in range(epochs):
        model.train()
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()
            predictions = model(x_batch)
            loss = loss_fn(predictions, y_batch)
            loss.backward()
            optimizer.step()

        # Calculate validation loss`
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                predictions = model(x_batch)
                val_loss += loss_fn(predictions, y_batch).item()

        val_loss /= len(val_loader)
        
        
        trial.report(val_loss, epoch)
        
        print('Epoch {0}: Val loss: {1}'.format(epoch, val_loss))
        if trial.should_prune():
            raise opt.TrialPruned()
    print('Best Val Loss this Trial: {}'.format(val_loss))
        

    return val_loss



In [15]:
# Running the study
cnn_study = opt.create_study(direction="minimize")


cnn_study.optimize(lambda trial: cnn_objective(trial, train_data=landscape0_xy_train, val_data=landscape0_xy_val, 
                                epochs=20), n_trials=50)

print("Best CNN hyperparameters:", cnn_study.best_params)
print("Best CNN validation loss:", cnn_study.best_value)

[I 2024-10-28 14:13:10,405] A new study created in memory with name: no-name-7788b05d-68b3-4d25-be6e-c7aa83b897c6
  num_kernels = [int(trial.suggest_discrete_uniform("n_kernels", 16, 128, 16))
  kernel_sizes = [int(trial.suggest_discrete_uniform("kernel_sizes", 2, 6, 1))
  learning_rate = trial.suggest_loguniform('lr', 1e-4, 1e-2)


Epoch 0: Val loss: 0.00020757825687825162
Epoch 1: Val loss: 0.0001091291334856407
Epoch 2: Val loss: 3.67508167376703e-05
Epoch 3: Val loss: 0.00014605295700287251
Epoch 4: Val loss: 2.5667289727195574e-05
Epoch 5: Val loss: 1.9624385940837354e-05
Epoch 6: Val loss: 1.4869934587210066e-05
Epoch 7: Val loss: 1.0646885514758582e-05
Epoch 8: Val loss: 2.6002847203838228e-06
Epoch 9: Val loss: 2.831873255839819e-06
Epoch 10: Val loss: 1.5863822552794259e-06
Epoch 11: Val loss: 2.089623247338439e-05
Epoch 12: Val loss: 2.2883838840391392e-05
Epoch 13: Val loss: 7.783788526614802e-05
Epoch 14: Val loss: 3.7240332689273205e-06
Epoch 15: Val loss: 4.301125529841984e-06
Epoch 16: Val loss: 1.3183699437886235e-05
Epoch 17: Val loss: 1.5598939268636611e-06
Epoch 18: Val loss: 8.111925925009135e-06


[I 2024-10-28 14:13:57,653] Trial 0 finished with value: 8.184931586513882e-06 and parameters: {'num_conv_layers': 2, 'n_kernels': 48.0, 'kernel_sizes': 5.0, 'lr': 0.009558495354767655}. Best is trial 0 with value: 8.184931586513882e-06.


Epoch 19: Val loss: 8.184931586513882e-06
Best Val Loss this Trial: 8.184931586513882e-06
Epoch 0: Val loss: 0.0012918205249708337
Epoch 1: Val loss: 0.00030248108781121957
Epoch 2: Val loss: 0.00013126290809267605
Epoch 3: Val loss: 6.403058811969474e-05
Epoch 4: Val loss: 3.203281647805324e-05
Epoch 5: Val loss: 1.7343183103587522e-05
Epoch 6: Val loss: 1.0398261740472382e-05
Epoch 7: Val loss: 6.587648413109973e-06
Epoch 8: Val loss: 1.8072523767515144e-05
Epoch 9: Val loss: 2.679496981855687e-06
Epoch 10: Val loss: 2.912931646593419e-06
Epoch 11: Val loss: 2.389892279972208e-06
Epoch 12: Val loss: 8.801633522770682e-06
Epoch 13: Val loss: 1.3685768098355797e-06
Epoch 14: Val loss: 1.1453108836343372e-05
Epoch 15: Val loss: 4.212091758185684e-06
Epoch 16: Val loss: 2.460928899779932e-06
Epoch 17: Val loss: 9.02681030064407e-07
Epoch 18: Val loss: 1.003514983156679e-06


[I 2024-10-28 14:14:26,726] Trial 1 finished with value: 1.79948728890282e-06 and parameters: {'num_conv_layers': 1, 'n_kernels': 16.0, 'kernel_sizes': 4.0, 'lr': 0.0007353201469156231}. Best is trial 1 with value: 1.79948728890282e-06.


Epoch 19: Val loss: 1.79948728890282e-06
Best Val Loss this Trial: 1.79948728890282e-06
Epoch 0: Val loss: 0.002392408666628787
Epoch 1: Val loss: 0.0006649430231022068
Epoch 2: Val loss: 0.00034481485101392365
Epoch 3: Val loss: 0.00021778847169629537
Epoch 4: Val loss: 0.00015596117149331034
Epoch 5: Val loss: 0.00013416801529173326
Epoch 6: Val loss: 0.00010501024292189426
Epoch 7: Val loss: 8.102180978840985e-05
Epoch 8: Val loss: 6.408111198846814e-05
Epoch 9: Val loss: 7.422781784090404e-05
Epoch 10: Val loss: 5.313682789001644e-05
Epoch 11: Val loss: 3.5185280816781764e-05
Epoch 12: Val loss: 3.227851172215375e-05
Epoch 13: Val loss: 4.794235096635689e-05
Epoch 14: Val loss: 2.230812378134305e-05
Epoch 15: Val loss: 2.026078394002482e-05
Epoch 16: Val loss: 2.848161978200108e-05
Epoch 17: Val loss: 1.6730797406670073e-05
Epoch 18: Val loss: 1.5669962403679274e-05


[I 2024-10-28 14:15:12,490] Trial 2 finished with value: 1.2729396666666298e-05 and parameters: {'num_conv_layers': 2, 'n_kernels': 32.0, 'kernel_sizes': 6.0, 'lr': 0.00018830661575054285}. Best is trial 1 with value: 1.79948728890282e-06.


Epoch 19: Val loss: 1.2729396666666298e-05
Best Val Loss this Trial: 1.2729396666666298e-05
Epoch 0: Val loss: 0.00017957211180807004
Epoch 1: Val loss: 5.940460489047184e-05
Epoch 2: Val loss: 4.290184051180995e-05
Epoch 3: Val loss: 2.6267524059905678e-05
Epoch 4: Val loss: 1.8299710059842706e-05
Epoch 5: Val loss: 4.036725949534339e-05
Epoch 6: Val loss: 1.629286406701157e-05
Epoch 7: Val loss: 1.7502779583937965e-05
Epoch 8: Val loss: 1.3867622993320065e-05
Epoch 9: Val loss: 9.415838065282685e-06
Epoch 10: Val loss: 1.672423155453458e-05
Epoch 11: Val loss: 7.1464971980222044e-06
Epoch 12: Val loss: 1.112290347670292e-05
Epoch 13: Val loss: 5.005806683506789e-06
Epoch 14: Val loss: 7.3944154376235965e-06
Epoch 15: Val loss: 1.864320513485502e-05
Epoch 16: Val loss: 3.973246684369932e-06
Epoch 17: Val loss: 2.570481767459289e-05
Epoch 18: Val loss: 4.5469353041033515e-05


[I 2024-10-28 14:16:12,024] Trial 3 finished with value: 5.313348797477747e-06 and parameters: {'num_conv_layers': 2, 'n_kernels': 112.0, 'kernel_sizes': 3.0, 'lr': 0.00041102570236595004}. Best is trial 1 with value: 1.79948728890282e-06.


Epoch 19: Val loss: 5.313348797477747e-06
Best Val Loss this Trial: 5.313348797477747e-06
Epoch 0: Val loss: 0.0018064063214040243
Epoch 1: Val loss: 0.00030174985208098864
Epoch 2: Val loss: 0.00028682800515308086
Epoch 3: Val loss: 9.857455287966976e-05
Epoch 4: Val loss: 0.0001174795053154031
Epoch 5: Val loss: 0.00011327795112024472
Epoch 6: Val loss: 6.561286239662163e-05
Epoch 7: Val loss: 8.348102397949566e-05
Epoch 8: Val loss: 0.00022158993097891807
Epoch 9: Val loss: 0.00011060829698787226
Epoch 10: Val loss: 4.873916516686207e-05
Epoch 11: Val loss: 8.436208125390411e-05
Epoch 12: Val loss: 0.00022296080372990784
Epoch 13: Val loss: 5.484813179938435e-05
Epoch 14: Val loss: 4.8623973511855424e-05
Epoch 15: Val loss: 9.850840019952689e-05
Epoch 16: Val loss: 5.567694131611313e-05
Epoch 17: Val loss: 3.6951349273326006e-05
Epoch 18: Val loss: 7.184122090947289e-05


[I 2024-10-28 14:16:54,835] Trial 4 finished with value: 5.785035177995401e-05 and parameters: {'num_conv_layers': 2, 'n_kernels': 16.0, 'kernel_sizes': 2.0, 'lr': 0.005179615220291727}. Best is trial 1 with value: 1.79948728890282e-06.


Epoch 19: Val loss: 5.785035177995401e-05
Best Val Loss this Trial: 5.785035177995401e-05
Epoch 0: Val loss: 6.309930675218537e-05
Epoch 1: Val loss: 5.860941042229593e-05
Epoch 2: Val loss: 9.90689719074931e-05
Epoch 3: Val loss: 0.0004644108284215053
Epoch 4: Val loss: 1.991547168876827e-05
Epoch 5: Val loss: 4.5690074236914275e-05


[W 2024-10-28 14:17:15,168] Trial 5 failed with parameters: {'num_conv_layers': 2, 'n_kernels': 80.0, 'kernel_sizes': 3.0, 'lr': 0.00246579277260613} because of the following error: NameError("name 'optuna' is not defined").
Traceback (most recent call last):
  File "/home/mahakaran/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_57346/1125805103.py", line 5, in <lambda>
    cnn_study.optimize(lambda trial: cnn_objective(trial, train_data=landscape0_xy_train, val_data=landscape0_xy_val,
  File "/tmp/ipykernel_57346/1706552619.py", line 53, in cnn_objective
    raise optuna.TrialPruned()
NameError: name 'optuna' is not defined
[W 2024-10-28 14:17:15,170] Trial 5 failed with value None.


Epoch 6: Val loss: 7.97564155688563e-05


NameError: name 'optuna' is not defined

In [22]:
LANDSCAPES[0].mutation_arrays

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35]),
 array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
        3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5]),
 array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
        4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]))