In [1]:
import os
os.chdir(os.path.expanduser('..'))
import torch
from src.data.dataset import CustomDataset
from src.models.train_model import RNNModule
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor, TQDMProgressBar
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

In [2]:
dataset = CustomDataset('data/processed/data_licl.csv', savepath='models/preprocessing/scaler_cscl_kcl.pkl')

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [3]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1)

In [4]:
model = RNNModule(input_size=1, hidden_size=512, output_size=1, num_layers=2, bidirectional=True)

In [5]:
checkpoint_callback = ModelCheckpoint(monitor='validation_r2_score')
lr_monitor = LearningRateMonitor(logging_interval='epoch')
early_stop_callback = EarlyStopping(
    monitor='validation_r2_score',
    min_delta=.01,
    patience=20,
    verbose=True,
    mode='max'
)

logger = TensorBoardLogger('lightning_logs', name='licl')

In [6]:
trainer = pl.Trainer(max_epochs=200, callbacks=[checkpoint_callback, lr_monitor, early_stop_callback, TQDMProgressBar(refresh_rate=1)], accelerator='gpu', devices=1, logger=logger)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
trainer.fit(model, train_loader, val_loader)

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name | Type   | Params
--------------------------------
0 | rnn  | LSTM   | 8.4 M 
1 | fc   | Linear | 1.0 K 
--------------------------------
8.4 M     Trainable params
0         Non-trainable params
8.4 M     Total params
33.640    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  return F.mse_loss(input, target, reduction=self.reduction)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved. New best score: -1.782
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.662 >= min_delta = 0.01. New best score: -1.120
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.437 >= min_delta = 0.01. New best score: -0.683
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.011 >= min_delta = 0.01. New best score: -0.672
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.305 >= min_delta = 0.01. New best score: -0.367
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.107 >= min_delta = 0.01. New best score: -0.260
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.035 >= min_delta = 0.01. New best score: -0.224
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.238 >= min_delta = 0.01. New best score: 0.014
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Metric validation_r2_score improved by 0.075 >= min_delta = 0.01. New best score: 0.088
  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

Monitored metric validation_r2_score did not improve in the last 20 records. Best score: 0.088. Signaling Trainer to stop.


In [None]:
# trainer.save_checkpoint('models/serialized/model_cscl_kcl_checkpoint_19_06_2023.cpkt')