In [1]:
BATCH_SIZE = 1
EPOCHS = 50
NUM_WORKERS = 2 # number of CPUs used
NTH_FRAME = 2
SLIDING_STEP = 1
SEQ_LEN = 1

DIR = "data/30x25/"

Create Lightning model of Decoder-ConvLSTM

In [3]:
from models.decoder.architectures import DecoderConvLSTM
import pytorch_lightning as pl

class LightningNet(pl.LightningModule):
    def __init__(self, inner_dims, seq_len, learning_rate):
        super().__init__()        
        
        with open("models/decoder/decoder_params.yaml") as file:
            params = yaml.safe_load(file)
        decoder = models.decoder.architectures.LightningNet(params["inner_dims"], params["seq_len"], params["learning_rate"])
        decoder.load_state_dict(torch.load("models/decoder/decoder.pth"))
        decoder.eval();
        
        self.model = DecoderConvLSTM(inner_dims=inner_dims, seq_len=seq_len, decoder=decoder)
        self.inner_dims = inner_dims
        self.seq_len = seq_len
        self.learning_rate = learning_rate
        self.save_hyperparameters()
        
    def forward(self, x):
        return self.model(x)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    def training_step(self, batch, batch_idx):
        X, y = batch  
        # .squeeze(2) to remove the seq_len dim:
        # [batch, seq_len, channel, width, height] -> [batch, seq_len, width, height]
        y_hat = self(X.squeeze(1)) 
        loss = F.mse_loss(y_hat, y)
        self.log('train_loss', loss, on_step=False, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        X, y = batch  
        y_hat = self(X.squeeze(2))
        loss = F.mse_loss(y_hat, y)
        rmse = torch.sqrt(loss)
        self.log('val_loss', loss, on_step=False, on_epoch=True)
        
    def test_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self(X.squeeze(2))
        loss = F.mse_loss(y_hat, y)
        rmse = torch.sqrt(loss)
        self.log("loss", {"loss":loss, "rmse": rmse})

In [4]:
import optuna
from optuna.integration import PyTorchLightningPruningCallback

import torch
import pytorch_lightning as pl

from models.decoder.architectures import LightningNet
from data.gdm_dataset import GasDataModule

def objective(trial):
    num_layers = 5
    learning_rate = trial.suggest_float("lr", 1e-5, 1e-2)
    inner_dims = [trial.suggest_int("inner_dims_{}".format(i), 3, 100, log=True) for i in range(num_layers)]

    model = LightningNet(inner_dims, SEQ_LEN, learning_rate)
    datamodule = GasDataModule(data_dir=DIR, seq_len=SEQ_LEN, nth_frame=NTH_FRAME, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
    
    tb_logger = pl.loggers.TensorBoardLogger(
        save_dir="logs", 
        name="decoderconvlstm",
        #version=
    )
    trainer = pl.Trainer(
        logger = tb_logger,
        min_epochs = int(EPOCHS/2),
        max_epochs = EPOCHS,
        gpus=1 if torch.cuda.is_available() else None,
        #callbacks=[PyTorchLightningPruningCallback(trial, monitor="val_loss")],
        enable_progress_bar=False,
        enable_model_summary=False,
        fast_dev_run=True # set to False, if model should be trained
    )
    
    trainer.fit(model, datamodule=datamodule)
    
    return trainer.callback_metrics["val_loss"].item()


optuna.logging.set_verbosity(optuna.logging.ERROR)

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=1, show_progress_bar=True)

  self._init_valid()


  0%|          | 0/1 [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
`Trainer.fit` stopped: `max_steps=1` reached.
