In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Assuming we have training data
# X_train is the input sequence data, y_train is the target data
# 100 samples, sequence length of 6, 21 feature per time step
X_train = torch.randn(100, 6, 21)
# Target for each sample is 6 values, possibly for a regression task
y_train = torch.randn(100, 7)

* `pack_padded_sequence`: Convert a padded tensor to a PackedSequence, compress the padded part, and pass it to RNN or LSTM for calculation. The model will not consider the padded part.
* `pad_packed_sequence`: Restore the PackedSequence object to a standard tensor for subsequent processing.

In [20]:
import torch
from torch.nn.utils.rnn import pack_padded_sequence

# Assuming the input data is like this
x = torch.tensor([[[1, 2], [3, 4], [0, 0]],  # Length 2
                  [[5, 6], [7, 8], [0, 0]],  # Length 2
                  [[9, 10], [11, 12], [13, 14]]])  # Length 3

# x_lengths indicates the actual length of each sequence
x_lengths = torch.tensor([2, 2, 3])

print("Original x:", x)
print("x_lengths:", x_lengths)

# Use pack_padded_sequence to compress the padded sequences
packed_x = pack_padded_sequence(x, x_lengths, batch_first=True, enforce_sorted=False)
print(packed_x)


Original x: tensor([[[ 1,  2],
         [ 3,  4],
         [ 0,  0]],

        [[ 5,  6],
         [ 7,  8],
         [ 0,  0]],

        [[ 9, 10],
         [11, 12],
         [13, 14]]])
x_lengths: tensor([2, 2, 3])
PackedSequence(data=tensor([[ 9, 10],
        [ 1,  2],
        [ 5,  6],
        [11, 12],
        [ 3,  4],
        [ 7,  8],
        [13, 14]]), batch_sizes=tensor([3, 3, 1]), sorted_indices=tensor([2, 0, 1]), unsorted_indices=tensor([1, 2, 0]))


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class StatePredictLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, varstate_size):
        super(StatePredictLSTM, self).__init__()

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=1,
            batch_first=True,
        )
        self.state_layer = nn.Linear(hidden_size, varstate_size)

    def forward(self, x, x_lengths, h_state):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)

        # Handle variable-length sequences
        x = pack_padded_sequence(x, x_lengths, batch_first=True)
        r_out, h_state = self.lstm(x, h_state)

        # Convert the PackedSequence back to a regular Tensor
        paded_out, _ = pad_packed_sequence(r_out, batch_first=True)

        # Compute the output of the state_layer for each time step
        state_outs = []
        for time_step in range(paded_out.size(1)):  # Predict for each time step
            state_outs.append(self.state_layer(paded_out[:, time_step, :]))

        # Return the predicted results and the LSTM hidden state
        return torch.stack(state_outs, dim=1), h_state

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Assume input size is 5 (input_size), hidden state size is 10 (hidden_size),
# state variable size is 2 (varstate_size)
input_size = 21
hidden_size = 64
varstate_size = 7
batch_size = 4
max_sequence_length = 6

# Create fake training data X_train (batch_size, sequence_length, input_size)
# The input is random numbers
X_train = torch.randn(batch_size, max_sequence_length, input_size)

# Create fake target data y_train (batch_size, sequence_length, varstate_size)
# The target is also random numbers
y_train = torch.randn(batch_size, max_sequence_length, varstate_size)

# Define the actual lengths of the sequences
# Assume each sequence has a different length
x_lengths = torch.tensor([6, 5, 4, 3])

# Initialize the model
model = StatePredictLSTM(input_size=input_size, hidden_size=hidden_size, varstate_size=varstate_size)

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define the loss function (using mean squared error)
criterion = nn.MSELoss()

# Set the model to training mode
model.train()

# Forward pass: compute the model output
h_state = (torch.zeros(1, batch_size, hidden_size), torch.zeros(1, batch_size, hidden_size))  # Initialize the hidden state
output, _ = model(X_train, x_lengths, h_state)

# Print the output
print("Model output (predictions):")
print(output)

# Compute the loss
loss = criterion(output, y_train)
print("Loss:", loss.item())

# Backward pass: compute gradients and update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()

# Print the updated model parameters
for param in model.parameters():
    print(param)


Model output (predictions):
tensor([[[ 4.6051e-02, -5.1326e-02,  1.5498e-02, -9.5112e-03, -4.8703e-02,
           1.5998e-02,  1.6308e-01],
         [ 1.4444e-01,  1.1901e-02,  1.5310e-02,  7.5998e-02, -4.8189e-02,
          -4.6561e-02,  5.3164e-02],
         [ 7.9999e-02,  3.2026e-02,  3.9151e-02,  1.3521e-01, -2.3209e-05,
          -4.1681e-02,  1.3888e-01],
         [ 2.8237e-02,  3.4150e-02,  7.7798e-03,  1.1590e-01, -7.0034e-02,
           3.9986e-02,  1.0915e-01],
         [ 1.0844e-01,  2.6283e-02,  4.0038e-02,  1.2014e-01, -6.0351e-02,
          -9.6238e-03,  3.0930e-02],
         [ 5.4449e-02, -1.0091e-02, -8.1561e-03,  2.8222e-02,  6.2727e-03,
           4.9855e-02,  1.1689e-01]],

        [[ 7.1354e-02,  4.7599e-02,  2.5650e-02,  7.8342e-02, -8.6414e-02,
          -5.3234e-05,  1.4454e-01],
         [ 7.8583e-02,  1.9057e-02,  7.7142e-02,  1.5484e-02, -1.5784e-02,
           2.2203e-02,  5.2135e-02],
         [ 5.9223e-02, -4.2070e-03,  9.1932e-02, -2.0348e-03,  1.5848e-02,