# LSTM Classifier

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import numpy as np
from utils.set_seed import set_seed
from utils.load_data import load_and_split_data
import optuna

  from .autonotebook import tqdm as notebook_tqdm


## A Simple Example
🧠 Step 1: Understanding LSTM in PyTorch

PyTorch's ``nn.LSTM`` expects input of shape:
```
(seq_len, batch_size, input_size)
```
It returns:

- output: (seq_len, batch_size, hidden_size)

- (h_n, c_n): the hidden and cell states (each of shape: num_layers, batch_size, hidden_size)



In [2]:
# 🛠 Step 2: Creating a Dataset
# Example sequence
data = np.array([i for i in range(1, 101)], dtype=np.float32)  # [1, 2, ..., 100]

# Sequence parameters
seq_length = 5
X = []
Y = []

for i in range(len(data) - seq_length):
    X.append(data[i:i+seq_length])
    Y.append(data[i+seq_length])

X = torch.tensor(X).unsqueeze(-1)  # Shape: (num_samples, seq_len, 1)
Y = torch.tensor(Y).unsqueeze(-1)  # Shape: (num_samples, 1)


  X = torch.tensor(X).unsqueeze(-1)  # Shape: (num_samples, seq_len, 1)


In [None]:
# 🧱 Step 3: Defining the LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=1, output_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)

        # LSTM forward pass
        out, _ = self.lstm(x, (h0, c0))  # out: (batch, seq_len, hidden)
        out = self.fc(out[:, -1, :])     # Take the last time step
        return out


In [14]:
#🏋️ Step 4: Training the Model
# Initialize model, loss, optimizer
model = LSTMModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
num_epochs = 1000

for epoch in range(num_epochs):
    outputs = model(X)
    loss = criterion(outputs, Y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/1000], Loss: 3058.3525
Epoch [20/1000], Loss: 2550.8455
Epoch [30/1000], Loss: 2107.7151
Epoch [40/1000], Loss: 1748.3368
Epoch [50/1000], Loss: 1462.8076
Epoch [60/1000], Loss: 1241.3734
Epoch [70/1000], Loss: 1070.9705
Epoch [80/1000], Loss: 924.4969
Epoch [90/1000], Loss: 783.4571
Epoch [100/1000], Loss: 610.5067
Epoch [110/1000], Loss: 508.5166
Epoch [120/1000], Loss: 424.8292
Epoch [130/1000], Loss: 352.0236
Epoch [140/1000], Loss: 286.4334
Epoch [150/1000], Loss: 234.0060
Epoch [160/1000], Loss: 192.4662
Epoch [170/1000], Loss: 159.6376
Epoch [180/1000], Loss: 132.0133
Epoch [190/1000], Loss: 107.2719
Epoch [200/1000], Loss: 86.3053
Epoch [210/1000], Loss: 70.4205
Epoch [220/1000], Loss: 58.0133
Epoch [230/1000], Loss: 48.3615
Epoch [240/1000], Loss: 40.6656
Epoch [250/1000], Loss: 34.4655
Epoch [260/1000], Loss: 29.4186
Epoch [270/1000], Loss: 25.2660
Epoch [280/1000], Loss: 21.8175
Epoch [290/1000], Loss: 18.9309
Epoch [300/1000], Loss: 16.3271
Epoch [310/1000], Loss:

In [15]:
# 🔮 Step 5: Making Predictions
# Predict the next value for a new sequence
with torch.no_grad():
    test_seq = torch.tensor([[96, 97, 98, 99, 100]], dtype=torch.float32).unsqueeze(-1)
    prediction = model(test_seq)
    print(f"Predicted next number: {prediction.item():.2f}")


Predicted next number: 98.88


## Train the LSTM using SSA data 

📦 Step 1: Reshape Input for LSTM

We need to first standardise the data, then reshape the input.

LSTM expects input in the shape ``(batch_size, seq_len, num_features)``

In [2]:
# Train LSTM model using SSA data
output_file = 'data/mRNA_trajectories_example.csv'
X_train, X_val, X_test, y_train, y_val, y_test = load_and_split_data(output_file, split_val_size=0.2) # we must define split_val_size here to get a validation set
# Standardize the data 
# If your input features are too large (e.g., >1000) or too small (<0.0001), it can cause unstable training, so it's better to standardize the data.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
# Reshape input for LSTM, LSTM expects input in the shape (batch_size, seq_len, num_features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

print(X_train)
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

[[[ 0.        ]
  [-0.40815408]
  [-0.27320156]
  ...
  [-0.06262243]
  [-0.08873565]
  [-0.08873565]]

 [[ 0.        ]
  [-0.40815408]
  [-0.27320156]
  ...
  [-0.06262243]
  [-0.08873565]
  [-0.08873565]]

 [[ 0.        ]
  [ 0.96668072]
  [-0.27320156]
  ...
  [-0.06262243]
  [-0.08873565]
  [-0.08873565]]

 ...

 [[ 0.        ]
  [-0.40815408]
  [-0.27320156]
  ...
  [-0.06262243]
  [-0.08873565]
  [-0.08873565]]

 [[ 0.        ]
  [-0.40815408]
  [-0.27320156]
  ...
  [-0.06262243]
  [11.26942767]
  [-0.08873565]]

 [[ 0.        ]
  [-0.40815408]
  [-0.27320156]
  ...
  [-0.06262243]
  [-0.08873565]
  [-0.08873565]]]
X_train shape: (256, 144, 1)
y_train shape: (256,)


🧱 Step 2: Convert to PyTorch Tensors and Dataloaders

In [3]:
# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Create datasets and loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)


🧠 Step 3: Initialize and Train LSTM Model

In [4]:
from models.lstm import LSTMClassifier

input_size = X_train.shape[2]  # each time step is a single value
hidden_size = 64
num_layers = 2 # number of LSTM layers
output_size = len(torch.unique(y_train_tensor))  # number of classes
dropout_rate = 0.3
learning_rate = 0.001

model = LSTMClassifier(input_size=input_size,           
                       hidden_size=hidden_size,
                       num_layers=num_layers, output_size=output_size,
                       dropout_rate=dropout_rate, learning_rate=learning_rate)

# Train the model
history = model.train_model(train_loader, val_loader=val_loader,
                            epochs=50, patience=10,
                            # save_path='best_lstm_model.pt'
                            )


🔄 Using device: cuda (1 GPUs available)
DEBUG: Optimizer initialized? True
✅ Running on CUDA!
Epoch [1/50], Loss: 0.6901, Train Acc: 0.5234
Validation Acc: 0.4844
Epoch [2/50], Loss: 0.6916, Train Acc: 0.5312
Validation Acc: 0.5000
Epoch [3/50], Loss: 0.6877, Train Acc: 0.5352
Validation Acc: 0.5000
No improvement (1/10).
Epoch [4/50], Loss: 0.6932, Train Acc: 0.4805
Validation Acc: 0.5000
No improvement (2/10).
Epoch [5/50], Loss: 0.6909, Train Acc: 0.5195
Validation Acc: 0.5000
No improvement (3/10).
Epoch [6/50], Loss: 0.6837, Train Acc: 0.5391
Validation Acc: 0.4844
No improvement (4/10).
Epoch [7/50], Loss: 0.6831, Train Acc: 0.5312
Validation Acc: 0.4844
No improvement (5/10).
Epoch [8/50], Loss: 0.6751, Train Acc: 0.5547
Validation Acc: 0.4844
No improvement (6/10).
Epoch [9/50], Loss: 0.6639, Train Acc: 0.5625
Validation Acc: 0.5312
Epoch [10/50], Loss: 0.6495, Train Acc: 0.6055
Validation Acc: 0.5312
No improvement (1/10).
Epoch [11/50], Loss: 0.6707, Train Acc: 0.6289
Validat

🔮 Step 4: Evaluate on Test Set

In [5]:
# Prepare test data
X_test_tensor = torch.tensor(X_test.reshape(X_test.shape[0], X_test.shape[1], 1), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64)

# Evaluate
test_acc = model.evaluate(test_loader)
print(f"✅ Test accuracy: {test_acc:.4f}")

✅ Test accuracy: 0.6250


Altogether

In [7]:
# Train LSTM model using SSA data
output_file = 'data/mRNA_trajectories_example.csv'
X_train, X_val, X_test, y_train, y_val, y_test = load_and_split_data(output_file, split_val_size=0.2) # we must define split_val_size here to get a validation set
# Standardize the data 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
# Reshape input for LSTM, LSTM expects input in the shape (batch_size, seq_len, num_features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Create datasets and loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

from models.lstm import LSTMClassifier

input_size = X_train.shape[2]  # each time step is a single value
hidden_size = 64
num_layers = 2 # number of LSTM layers
output_size = len(torch.unique(y_train_tensor))  # number of classes
dropout_rate = 0.3
learning_rate = 0.001

model = LSTMClassifier(input_size=input_size, hidden_size=hidden_size, 
                       num_layers=num_layers, output_size=output_size,
                       dropout_rate=dropout_rate, learning_rate=learning_rate)

# Train the model
history = model.train_model(train_loader, val_loader=val_loader,
                            epochs=50, patience=10,
                            # save_path='best_lstm_model.pt'
                            )
# Prepare test data
X_test_tensor = torch.tensor(X_test.reshape(X_test.shape[0], X_test.shape[1], 1), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64)

# Evaluate
test_acc = model.evaluate(test_loader)
print(f"✅ Test accuracy: {test_acc:.4f}")

🔄 Using device: cuda (1 GPUs available)
DEBUG: Optimizer initialized? True
✅ Running on CUDA!
Epoch [1/50], Loss: 0.7000, Train Acc: 0.4922
Validation Acc: 0.5000
Epoch [2/50], Loss: 0.6967, Train Acc: 0.4805
Validation Acc: 0.5000
No improvement (1/10).
Epoch [3/50], Loss: 0.6948, Train Acc: 0.4609
Validation Acc: 0.5000
No improvement (2/10).
Epoch [4/50], Loss: 0.6906, Train Acc: 0.5000
Validation Acc: 0.5000
No improvement (3/10).
Epoch [5/50], Loss: 0.6889, Train Acc: 0.4961
Validation Acc: 0.5000
No improvement (4/10).
Epoch [6/50], Loss: 0.6873, Train Acc: 0.4961
Validation Acc: 0.4844
No improvement (5/10).
Epoch [7/50], Loss: 0.6837, Train Acc: 0.5352
Validation Acc: 0.4844
No improvement (6/10).
Epoch [8/50], Loss: 0.6762, Train Acc: 0.5234
Validation Acc: 0.4844
No improvement (7/10).
Epoch [9/50], Loss: 0.6700, Train Acc: 0.5508
Validation Acc: 0.4844
No improvement (8/10).
Epoch [10/50], Loss: 0.6626, Train Acc: 0.5508
Validation Acc: 0.5312
Epoch [11/50], Loss: 0.6482, Tr

One-liner

In [2]:
from classifiers.lstm_classifer import lstm_classifier

# Train SVM model using SSA data
output_file = 'data/mRNA_trajectories_example.csv'
X_train, X_val, X_test, y_train, y_val, y_test = load_and_split_data(output_file, split_val_size=0.2) # we must define split_val_size here to get a validation set
lstm_accuracy = lstm_classifier(X_train, X_val, X_test, y_train, y_val, y_test, epochs=50, bidirectional=True)


🔄 Using device: cuda (1 GPUs available)
DEBUG: Optimizer initialized? True
✅ Running on CUDA!
Epoch [1/50], Loss: 0.6920, Train Acc: 0.5156
Validation Acc: 0.5000
Epoch [2/50], Loss: 0.6928, Train Acc: 0.5156
Validation Acc: 0.5000
No improvement (1/10).
Epoch [3/50], Loss: 0.6926, Train Acc: 0.4727
Validation Acc: 0.4844
No improvement (2/10).
Epoch [4/50], Loss: 0.6932, Train Acc: 0.5039
Validation Acc: 0.5000
No improvement (3/10).
Epoch [5/50], Loss: 0.6894, Train Acc: 0.5352
Validation Acc: 0.4844
No improvement (4/10).
Epoch [6/50], Loss: 0.6900, Train Acc: 0.4961
Validation Acc: 0.5000
No improvement (5/10).
Epoch [7/50], Loss: 0.6862, Train Acc: 0.4688
Validation Acc: 0.5000
No improvement (6/10).
Epoch [8/50], Loss: 0.6770, Train Acc: 0.5195
Validation Acc: 0.4844
No improvement (7/10).
Epoch [9/50], Loss: 0.6704, Train Acc: 0.5625
Validation Acc: 0.4844
No improvement (8/10).
Epoch [10/50], Loss: 0.6578, Train Acc: 0.5703
Validation Acc: 0.5312
Epoch [11/50], Loss: 0.6541, Tr