Notebook to perform a hyperparameter sweep for the customizable model.
We also will be using the TF dependent adjustment function to closer mimic the real data.

In [1]:
# imports
import argparse
from argparse import Namespace

from pytorch_lightning import Trainer, LightningModule, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger
from torchsummary import summary

from yeastdnnexplorer.data_loaders.synthetic_data_loader import SyntheticDataLoader
from yeastdnnexplorer.ml_models.simple_model import SimpleModel
from yeastdnnexplorer.ml_models.customizable_model import CustomizableModel

import matplotlib.pyplot as plt
import seaborn as sns

from yeastdnnexplorer.probability_models.generate_data import perturbation_effect_adjustment_function_with_tf_relationships

seed_everything(42)

Seed set to 42


42

In [2]:
# define checkpoints for the model
# tells it when to save snapshots of the model during training
# Callback to save the best model based on validation loss
best_model_checkpoint = ModelCheckpoint(
    monitor="val_loss",
    mode="min",
    filename="best-model-{epoch:02d}-{val_loss:.2f}",
    save_top_k=1,
)

# Callback to save checkpoints every 5 epochs, regardless of performance
periodic_checkpoint = ModelCheckpoint(
    filename="periodic-{epoch:02d}",
    every_n_epochs=2,
    save_top_k=-1,  # Setting -1 saves all checkpoints
)

# define loggers for the model# configure loggers
tb_logger = TensorBoardLogger("logs/tensorboard_logs")
csv_logger = CSVLogger("logs/csv_logs")

Running our experiment

In [4]:
tf_relationships_dict = {
    0: [2, 4, 7],
    1: [8],
    2: [3, 9],
    3: [1, 6],
    4: [5],
    5: [0, 2, 8],
    6: [4],
    7: [1, 4],
    8: [6],
    9: [0, 3, 8]
}

data_module = SyntheticDataLoader(
    batch_size=32,
    num_genes=4000,
    signal_mean=3.0,
    signal=[0.5] * 10,  # old: [0.1, 0.15, 0.2, 0.25, 0.3],
    n_sample=[1, 2, 2, 4, 4],  # sum of this is num of tfs
    val_size=0.1,
    test_size=0.1,
    random_state=42,
    max_mean_adjustment=15.0,
    adjustment_function=perturbation_effect_adjustment_function_with_tf_relationships,
    tf_relationships=tf_relationships_dict
)

num_tfs = sum(data_module.n_sample)  # sum of all n_sample is the number of TFs

model = CustomizableModel(
    input_dim=num_tfs, 
    output_dim=num_tfs, 
    lr=0.01,
    hidden_layer_num=3,
    hidden_layer_sizes=[128, 64, 32],
    activation="ReLU",
    optimizer="Adam",
    L2_regularization_term=0.0,
    dropout_rate=0.0,
)

trainer = Trainer(
    max_epochs=10,
    deterministic=True,
    accelerator='cpu',
    callbacks=[best_model_checkpoint, periodic_checkpoint],
    logger=[tb_logger, csv_logger],
)

trainer.fit(model, data_module)

test_results = trainer.test(model, datamodule=data_module)
print("Printing test results...")
print(
    test_results
)  # this prints all metrics that were logged during the test phase

# print summary of model
print("Printing model summary...")
summary(model, (num_tfs, num_tfs)) 

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


bm - entering hidden layer generation loop
0 128 64
1 64 32
bm - adjustment function provided to dataLoader setup


  X_train, Y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(
  X_val, Y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(
  X_test, Y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(

  | Name          | Type       | Params
---------------------------------------------
0 | activation    | ReLU       | 0     
1 | input_layer   | Linear     | 1.8 K 
2 | hidden_layers | ModuleList | 10.3 K
3 | output_layer  | Linear     | 429   
4 | dropout       | Dropout    | 0     
---------------------------------------------
12.6 K    Trainable params
0         Non-trainable params
12.6 K    Total params
0.050     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


bm - adjustment function provided to dataLoader setup


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           5.1191582679748535
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Printing test results...
[{'test_loss': 5.1191582679748535}]
Printing model summary...
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1              [-1, 13, 128]           1,792
              ReLU-2              [-1, 13, 128]               0
           Dropout-3              [-1, 13, 128]               0
            Linear-4               [-1, 13, 64]           8,256
              ReLU-5               [-1, 13, 64]               0
          