In [3]:
import optuna as opt
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

import sys
sys.path.append('/home/mahakaran/NK-paper-12-5-24-version/nk-ml-paper2-2024/pscapes')
sys.path.append('/home/mahakaran/NK-paper-12-5-24-version/nk-ml-paper2-2024/nk-ml-2024')

from torch.utils.data import DataLoader

from pscapes.landscape_class import ProteinLandscape
from pscapes.utils import dict_to_np_array, np_array_to_dict

from src.architectures.architectures import SequenceRegressionCNN
from src.architectures.ml_utils import train_val_test_split_ohe


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
f = ProteinLandscape(csv_path='../data/nk_landscapes/k0_r0.csv')

In [5]:
SEQ_LEN = 6
AA_ALPHABET = 'ACDEFG'

In [6]:
#Load NK landscapes -- only a single replicate for hparam tuning 

LANDSCAPES = []
for k in range(6): 
    for r in range(1): 
        landscape = ProteinLandscape(csv_path='../data/nk_landscapes/k{0}_r{1}.csv'.format(k,r), amino_acids=AA_ALPHABET)
        LANDSCAPES.append(landscape)

In [7]:
LANDSCAPES = [i.fit_OHE() for i in LANDSCAPES]

In [8]:
landscapes_ohe, xy_train, xy_val, xy_test, x_test, y_test = train_val_test_split_ohe(LANDSCAPES)

In [9]:
len(xy_train)

6

In [10]:
landscape0_xy_train = xy_train[0]
landscape0_xy_val   = xy_val[0]

In [13]:
def cnn_objective(trial, train_data, val_data, epochs):
    # Define the search space
    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 2)
    
    num_kernels = [int(trial.suggest_discrete_uniform("n_kernels", 16, 128, 16))
                   for i in range(num_conv_layers)]  
    
    kernel_sizes = [int(trial.suggest_discrete_uniform("kernel_sizes", 2, 6, 1))
                   for i in range(num_conv_layers)]
    
    learning_rate = trial.suggest_loguniform('lr', 1e-4, 1e-2)

    
    # Initialize model with the trial’s hyperparameters
    model = SequenceRegressionCNN(input_channels=len(AA_ALPHABET), sequence_length=SEQ_LEN, 
                                  num_conv_layers=num_conv_layers, n_kernels=num_kernels, kernel_sizes=kernel_sizes)
    
    # Loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Dummy training and validation data loaders

    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=32)

    # Training loop with validation loss calculation
    for epoch in range(epochs):
        model.train()
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()
            predictions = model(x_batch)
            loss = loss_fn(predictions, y_batch)
            loss.backward()
            optimizer.step()

        # Calculate validation loss
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                predictions = model(x_batch)
                val_loss += loss_fn(predictions, y_batch).item()

        val_loss /= len(val_loader)
        
        
        trial.report(val_loss, epoch)
        
        print('Epoch {0}: Val loss: {1}'.format(epoch, val_loss))
        if trial.should_prune():
            raise opt.TrialPruned()
    print('Best Val Loss this Trial: {}'.format(val_loss))
        

    return val_loss



In [14]:
# Running the study
cnn_study = opt.create_study(direction="minimize")


cnn_study.optimize(lambda trial: cnn_objective(trial, train_data=landscape0_xy_train, val_data=landscape0_xy_val, 
                                epochs=20), n_trials=50)

print("Best CNN hyperparameters:", cnn_study.best_params)
print("Best CNN validation loss:", cnn_study.best_value)

[I 2024-10-28 14:06:41,565] A new study created in memory with name: no-name-8ac769ad-c53e-4813-be97-0e7da7429801
  num_kernels = [int(trial.suggest_discrete_uniform("n_kernels", 16, 128, 16))
  kernel_sizes = [int(trial.suggest_discrete_uniform("kernel_sizes", 2, 6, 1))
  learning_rate = trial.suggest_loguniform('lr', 1e-4, 1e-2)


Epoch 0: Val loss: 6.406040604341462e-05
Epoch 1: Val loss: 4.176465309628994e-05
Epoch 2: Val loss: 9.847667383721087e-05
Epoch 3: Val loss: 9.482124142257226e-05
Epoch 4: Val loss: 0.00035426035777389066
Epoch 5: Val loss: 4.105059404412939e-05
Epoch 6: Val loss: 1.9745306289200344e-05
Epoch 7: Val loss: 0.00012022573775119291
Epoch 8: Val loss: 0.00016489610412759054
Epoch 9: Val loss: 5.594520910710701e-05
Epoch 10: Val loss: 7.92523748255668e-06
Epoch 11: Val loss: 8.808666532584337e-06
Epoch 12: Val loss: 2.5608373503835024e-05
Epoch 13: Val loss: 3.0780139147678616e-05
Epoch 14: Val loss: 8.668159688195141e-06
Epoch 15: Val loss: 6.758396217715677e-06
Epoch 16: Val loss: 3.4161279371700302e-06
Epoch 17: Val loss: 1.4736767581749347e-05
Epoch 18: Val loss: 7.72482529559305e-05


[I 2024-10-28 14:07:10,094] Trial 0 finished with value: 3.0381646974690848e-05 and parameters: {'num_conv_layers': 1, 'n_kernels': 48.0, 'kernel_sizes': 3.0, 'lr': 0.007937900803106161}. Best is trial 0 with value: 3.0381646974690848e-05.


Epoch 19: Val loss: 3.0381646974690848e-05
Best Val Loss this Trial: 3.0381646974690848e-05
Epoch 0: Val loss: 6.714870462143837e-05
Epoch 1: Val loss: 3.147456439745659e-05
Epoch 2: Val loss: 3.760856170396064e-05
Epoch 3: Val loss: 7.373076601248208e-05
Epoch 4: Val loss: 2.4670104989920033e-05
Epoch 5: Val loss: 0.00038600036450044817
Epoch 6: Val loss: 6.347357755766696e-05
Epoch 7: Val loss: 7.451704842758337e-05
Epoch 8: Val loss: 4.652663170014016e-05
Epoch 9: Val loss: 4.4064754664036744e-05
Epoch 10: Val loss: 6.939456043488835e-05
Epoch 11: Val loss: 4.541923294904041e-05
Epoch 12: Val loss: 5.23121076558033e-05
Epoch 13: Val loss: 4.859091165373808e-05
Epoch 14: Val loss: 2.7224153141921703e-05
Epoch 15: Val loss: 1.0878249668166973e-05
Epoch 16: Val loss: 3.844023312067137e-05
Epoch 17: Val loss: 4.7155799022112063e-05
Epoch 18: Val loss: 4.997321396245737e-05


[I 2024-10-28 14:07:39,553] Trial 1 finished with value: 1.972890336569085e-05 and parameters: {'num_conv_layers': 1, 'n_kernels': 80.0, 'kernel_sizes': 3.0, 'lr': 0.003995010349141688}. Best is trial 1 with value: 1.972890336569085e-05.


Epoch 19: Val loss: 1.972890336569085e-05
Best Val Loss this Trial: 1.972890336569085e-05
Epoch 0: Val loss: 0.00019037667385792025
Epoch 1: Val loss: 0.00011469984173555321
Epoch 2: Val loss: 0.00013418510146342064
Epoch 3: Val loss: 4.608148594700799e-05
Epoch 4: Val loss: 3.7943334763650244e-05
Epoch 5: Val loss: 2.465340244232052e-05
Epoch 6: Val loss: 4.051496047100239e-05
Epoch 7: Val loss: 4.2423117294799114e-05
Epoch 8: Val loss: 1.7811806208975704e-05
Epoch 9: Val loss: 1.894223569213195e-05
Epoch 10: Val loss: 2.9350058555901032e-05
Epoch 11: Val loss: 2.1172472599542572e-05
Epoch 12: Val loss: 3.161781414668921e-05
Epoch 13: Val loss: 9.061573018309582e-06
Epoch 14: Val loss: 3.015259971239447e-05
Epoch 15: Val loss: 1.8839253189226783e-05
Epoch 16: Val loss: 3.5516843531190214e-05
Epoch 17: Val loss: 8.478370029137233e-06
Epoch 18: Val loss: 2.401197695075813e-05


[I 2024-10-28 14:08:11,108] Trial 2 finished with value: 6.643139340473067e-06 and parameters: {'num_conv_layers': 1, 'n_kernels': 96.0, 'kernel_sizes': 6.0, 'lr': 0.0014317493546176436}. Best is trial 2 with value: 6.643139340473067e-06.


Epoch 19: Val loss: 6.643139340473067e-06
Best Val Loss this Trial: 6.643139340473067e-06
Epoch 0: Val loss: 8.14648547098359e-05
Epoch 1: Val loss: 6.227283236699011e-05
Epoch 2: Val loss: 0.00018548760051330607
Epoch 3: Val loss: 0.0009451223821896645
Epoch 4: Val loss: 9.428260896556899e-05
Epoch 5: Val loss: 0.00010445143603786934
Epoch 6: Val loss: 8.321416826842105e-05
Epoch 7: Val loss: 4.101877010935613e-05
Epoch 8: Val loss: 0.0005381829133923922
Epoch 9: Val loss: 7.950938915877114e-05
Epoch 10: Val loss: 3.865462208406639e-05
Epoch 11: Val loss: 5.080973461835494e-05
Epoch 12: Val loss: 3.0308902210875116e-05
Epoch 13: Val loss: 7.839101881550478e-05
Epoch 14: Val loss: 2.5489551934984993e-05
Epoch 15: Val loss: 2.3654287629025744e-05
Epoch 16: Val loss: 9.45714548787435e-06
Epoch 17: Val loss: 1.5144759353049274e-05
Epoch 18: Val loss: 1.454991589082802e-05


[I 2024-10-28 14:08:44,076] Trial 3 finished with value: 8.758992328142233e-06 and parameters: {'num_conv_layers': 1, 'n_kernels': 128.0, 'kernel_sizes': 3.0, 'lr': 0.006593503188757435}. Best is trial 2 with value: 6.643139340473067e-06.


Epoch 19: Val loss: 8.758992328142233e-06
Best Val Loss this Trial: 8.758992328142233e-06
Epoch 0: Val loss: 0.00012651164264457487
Epoch 1: Val loss: 9.711930670941639e-05
Epoch 2: Val loss: 0.0004594406113277277
Epoch 3: Val loss: 3.626803363736572e-05
Epoch 4: Val loss: 2.7196575742852714e-05
Epoch 5: Val loss: 3.240114809656924e-05
Epoch 6: Val loss: 1.234732653018031e-05
Epoch 7: Val loss: 0.000327718949629965
Epoch 8: Val loss: 1.922550492388401e-05
Epoch 9: Val loss: 3.92787501252128e-05
Epoch 10: Val loss: 1.705682584581823e-05
Epoch 11: Val loss: 3.872261003512266e-05
Epoch 12: Val loss: 4.976849997438277e-05
Epoch 13: Val loss: 3.6450479994419166e-06
Epoch 14: Val loss: 1.4094836749278236e-05
Epoch 15: Val loss: 3.8226357015119924e-05
Epoch 16: Val loss: 5.8462458059888085e-06
Epoch 17: Val loss: 0.0005680326622130716
Epoch 18: Val loss: 5.805614725828397e-06


[I 2024-10-28 14:09:32,497] Trial 4 finished with value: 4.294698210948351e-06 and parameters: {'num_conv_layers': 2, 'n_kernels': 32.0, 'kernel_sizes': 4.0, 'lr': 0.008745368120211164}. Best is trial 4 with value: 4.294698210948351e-06.


Epoch 19: Val loss: 4.294698210948351e-06
Best Val Loss this Trial: 4.294698210948351e-06


[W 2024-10-28 14:09:34,487] Trial 5 failed with parameters: {'num_conv_layers': 1, 'n_kernels': 32.0, 'kernel_sizes': 2.0, 'lr': 0.0008018973793972665} because of the following error: NameError("name 'optuna' is not defined").
Traceback (most recent call last):
  File "/home/mahakaran/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_57346/1125805103.py", line 5, in <lambda>
    cnn_study.optimize(lambda trial: cnn_objective(trial, train_data=landscape0_xy_train, val_data=landscape0_xy_val,
  File "/tmp/ipykernel_57346/1706552619.py", line 53, in cnn_objective
    raise optuna.TrialPruned()
NameError: name 'optuna' is not defined
[W 2024-10-28 14:09:34,489] Trial 5 failed with value None.


Epoch 0: Val loss: 0.0032401715771446372


NameError: name 'optuna' is not defined

In [None]:
SequenceRegressionCNN(num_conv_layers=1)