In [1]:
import torch

torch.manual_seed(42)
import lightning.pytorch as pl
import numpy as np
import pandas as pd
import torch.nn.functional as F
import torch.optim as optim
import torchmetrics
from sklearn.preprocessing import (
    LabelEncoder,
    MinMaxScaler,
    RobustScaler,
    StandardScaler,
)
from torch import nn, optim, utils
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader, TensorDataset, random_split
from tqdm.notebook import tqdm

torch.set_float32_matmul_precision("high")

In [2]:
df = pd.read_parquet("data/m4_preprocessed.parquet")
lengths = df.no_of_datapoints.values

le = LabelEncoder()
y = le.fit_transform(df.best_model.values)
classes = {idx: class_name for idx, class_name in enumerate(le.classes_)}


scaler = RobustScaler()
df = pd.DataFrame(
    scaler.fit_transform(df.drop(["best_model", "no_of_datapoints"], axis=1).T).T,
    columns=df.columns[:-2],
    index=df.index,
).fillna(0.0)
sequences = torch.tensor(df.values)

In [11]:
class LSTMDataLoader(pl.LightningDataModule):
    def __init__(self, sequences, lengths, y, batch_size=32):
        super().__init__()
        self.sequences = sequences
        self.lengths = lengths
        self.y = torch.tensor(y, dtype=torch.long)
        self.batch_size = batch_size

    def setup(self, stage=None):
        dataset = list(zip(self.sequences, self.lengths, self.y))

        # Sort by sequence length (important for packing)
        dataset.sort(key=lambda x: x[1], reverse=True)

        test_size = int(0.2 * len(dataset))
        val_size = int(0.1 * len(dataset))
        train_size = len(dataset) - test_size - val_size

        self.train_dataset, self.val_dataset, self.test_dataset = random_split(
            dataset, [train_size, val_size, test_size]
        )

    def collate_fn(self, batch):
        sequences, lengths, labels = zip(*batch)

        # Convert to tensor
        sequences = torch.stack(sequences)
        lengths = torch.tensor(lengths)
        labels = torch.tensor(labels, dtype=torch.long)

        return sequences, lengths, labels

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            collate_fn=self.collate_fn,
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset, batch_size=self.batch_size, shuffle=False, collate_fn=self.collate_fn
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            collate_fn=self.collate_fn,
        )

In [38]:
class LSTMClassifier(pl.LightningModule):
    def __init__(
        self, input_dim, hidden_dim=128, num_layers=2, num_classes=10, learning_rate=1e-3
    ):
        super().__init__()
        self.lstm = nn.LSTM(
            input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True, dropout=0.3
        )
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  # Bidirectional
        self.layer_norm = nn.LayerNorm(hidden_dim * 2)
        self.loss_fn = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(
            task="multiclass", num_classes=num_classes, average="macro"
        )
        self.f1_score = torchmetrics.F1Score(
            task="multiclass", num_classes=num_classes, average="macro"
        )
        self.lr = learning_rate

    def forward(self, x, lengths):
        x = x.unsqueeze(-1)
        packed_x = pack_padded_sequence(
            x.float(), lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        packed_out, (hn, cn) = self.lstm(packed_x)

        hn = torch.cat((hn[-2], hn[-1]), dim=1)  # Bidirectional concat
        hn = self.layer_norm(hn)  # Normalize
        out = self.fc(hn)
        return out

    def _common_step(self, batch, batch_idx):
        x, lengths, y = batch
        y_hat = self.forward(x, lengths)
        loss = self.loss_fn(y_hat, y)
        preds = torch.argmax(y_hat, dim=1)
        return loss, preds, y

    def training_step(self, batch, batch_idx):
        loss, preds, y = self._common_step(batch, batch_idx)
        accuracy = self.accuracy(preds, y)
        f1_score = self.f1_score(preds, y)
        self.log_dict(
            {"train_loss": loss, "train_accuracy": accuracy, "train_f1score": f1_score},
            prog_bar=True,
        )
        return loss

    def validation_step(self, batch, batch_idx):
        loss, preds, y = self._common_step(batch, batch_idx)
        accuracy = self.accuracy(preds, y)
        f1_score = self.f1_score(preds, y)
        self.log_dict(
            {"val_loss": loss, "val_accuracy": accuracy, "val_f1score": f1_score}, prog_bar=True
        )
        return loss

    def test_step(self, batch, batch_idx):
        loss, preds, y = self._common_step(batch, batch_idx)
        accuracy = self.accuracy(preds, y)
        f1_score = self.f1_score(preds, y)
        self.log_dict(
            {"test_loss": loss, "test_accuracy": accuracy, "test_f1score": f1_score}, prog_bar=True
        )
        return loss

    # def configure_optimizers(self):
    #     optimizer = optim.AdamW(self.parameters(), lr=self.lr, weight_decay=1e-4)

    #     # Dynamically calculate total steps based on the Trainer
    #     total_steps = self.trainer.estimated_stepping_batches

    #     scheduler = {
    #         "scheduler": optim.lr_scheduler.OneCycleLR(
    #             optimizer,
    #             max_lr=self.lr,
    #             epochs =  1000,
    #             steps_per_epoch= 10,
    #             # total_steps=1000,  # Ensure proper step count
    #             pct_start=0.1,
    #             anneal_strategy='cos'
    #         ),
    #         "interval": "epoch",  # OneCycleLR updates per step
    #     }

    #     return {"optimizer": optimizer, "lr_scheduler": scheduler}

    def configure_optimizers(self):
        optimizer = optim.AdamW(
            self.parameters(), lr=self.lr, weight_decay=1e-4
        )  # AdamW for better generalization
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode="min", factor=0.5, patience=5
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {"scheduler": scheduler, "monitor": "val_loss"},
        }

In [39]:
ds = LSTMDataLoader(sequences, lengths, y, batch_size=778)
model = LSTMClassifier(input_dim=1, num_classes=len(set(y)))  # sequences.shape[-1]

In [40]:
logger = pl.loggers.TensorBoardLogger(save_dir="./log/", name="lstm_model_classifier", version=0.1)


# saves top-K checkpoints based on "val_loss" metric
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    save_top_k=3,
    monitor="val_f1score",
    mode="max",
    dirpath="checkpoints/",
    filename="lstm-model-classifier-{epoch}-{val_f1score}",
)


trainer = pl.Trainer(
    logger=logger,
    accelerator="auto",
    devices=[0],
    min_epochs=1,
    max_epochs=1000,
    # precision='16-mixed',
    enable_model_summary=True,
    callbacks=[
        pl.callbacks.EarlyStopping("val_loss", patience=15, verbose=False),
        checkpoint_callback,
    ],
    #     default_root_dir="mnist_checkpoints/",
    enable_checkpointing=True,
)
ckpt_path = "model_checkpoints/lstm_classifier.ckpt"
if ckpt_path:
    trainer.fit(model, ds, ckpt_path="model_checkpoints/lstm_classifier.ckpt")
else:
    trainer.fit(model, ds)
trainer.save_checkpoint("model_checkpoints/lstm_classifier.ckpt")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at model_checkpoints/lstm_classifier.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params | Mode 
----------------------------------------------------------
0 | lstm       | LSTM               | 529 K  | train
1 | fc         | Linear             | 1.8 K  | train
2 | layer_norm | LayerNorm          | 512    | train
3 | loss_fn    | CrossEntropyLoss   | 0      | train
4 | accuracy   | MulticlassAccuracy | 0      | train
5 | f1_score   | MulticlassF1Score  | 0      | train
----------------------------------------------------------
531 K     Trainable params
0         Non-trainable params
531 K     Total params
2.127     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode
Restored all states from the checkpoint at model_checkpoints/lstm_classifie

Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

Training: |                                               | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

In [41]:
trainer.validate(model, ds);

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: |                                             | 0/? [00:00<?, ?it/s]

In [42]:
trainer.test(model, ds);

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/pranav-pc/projects/ts/ts/classification/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Testing: |                                                | 0/? [00:00<?, ?it/s]