In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Subset
import numpy as np
import h5py
import matplotlib.pyplot as plt

In [2]:
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
# 2. LSTM Autoencoder Model
class StatefulLSTM_Autoencoder(nn.Module):
    def __init__(self, num_channels, hidden_size, num_layers, feature_dim=10):
        super(StatefulLSTM_Autoencoder, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Encoder
        self.encoder = nn.LSTM(input_size=num_channels, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, feature_dim)

        # Decoder
        self.fc2 = nn.Linear(feature_dim, hidden_size)
        self.decoder = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc_out = nn.Linear(hidden_size, num_channels)

    def init_hidden(self, batch_size, device):
        """Create initial hidden state tensors (h_0, c_0)"""
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device))

    def forward(self, x, encoder_hidden, decoder_hidden):
        x = x.permute(0, 2, 1)  # (batch_size, num_samples, num_channels)

        # Encoder forward pass with provided hidden state
        _, encoder_hidden = self.encoder(x, encoder_hidden)
        # print("Encoder hidden shape:", [h.shape for h in encoder_hidden])  # Print shapes of h_0 and c_0

        # Feature bottleneck
        features = self.fc1(encoder_hidden[0][-1])  # Last layer's hidden state
        # print(features.shape)
        expanded_features = self.fc2(features).unsqueeze(1).repeat(1, x.shape[1], 1)
        # print(expanded_features.shape)
        # print("Decoder hidden shape:", [h.shape for h in decoder_hidden])  # Print shapes of h_0 and c_0

        # Decoder forward pass with provided hidden state
        decoded_output, decoder_hidden = self.decoder(expanded_features, decoder_hidden)
        decoded_output = self.fc_out(decoded_output)

        # Detach hidden states to prevent backpropagation across batches
        encoder_hidden = (encoder_hidden[0].detach(), encoder_hidden[1].detach())
        decoder_hidden = (decoder_hidden[0].detach(), decoder_hidden[1].detach())

        return decoded_output.permute(0, 2, 1), features, encoder_hidden, decoder_hidden  # (batch_size, num_channels, num_samples)

In [4]:
# Save reconstructed data to HDF5
def save_data(filepath, data):
    with h5py.File(filepath, 'w') as f:
        f.create_dataset('reconstructed_data', data=data)
    print(f"Reconstructed data saved to {filepath}")

In [5]:
# 5. Load Data
def load_data(file_path):
    with h5py.File(file_path, 'r') as f:
        X = f['data'][:]
    # data = torch.from_numpy(X).float()  # Convert to PyTorch tensor
    return X

In [6]:
def process_data(X, order):
    # Reshape for fitting: (samples * features, timesteps) → (20*6, 150)
    # X_reshaped = X.reshape(-1, X.shape[2])  

    # Fit scaler on entire dataset
    # scaler = StandardScaler()
    # X_standardized = scaler.fit_transform(X_reshaped)

    # Reshape back to (20, 6, 150)
    # X_standardized = X_standardized.reshape(X.shape[0], X.shape[1], X.shape[2])

    # Reorder using NumPy indexing
    X_reorderd = X[:, order, :]

    data = torch.from_numpy(X_reorderd).float()  # Convert to PyTorch tensor

    # Print shape to confirm it remains unchanged
    print("Original shape:", X.shape)
    print("Standardized and reordered shape:", data.shape)
    return data

In [7]:
# Load model function
def load_model(filepath, num_channels, hidden_size, num_layers, feature_dim):
    model = StatefulLSTM_Autoencoder(num_channels, hidden_size, num_layers, feature_dim)
    model.load_state_dict(torch.load(filepath))
    model.eval()  # Set the model to evaluation mode
    return model

In [8]:
# Inference function
def run_inference(model, data_loader):
    model.to(device)
    all_reconstructed = []
    all_features = []

    with torch.no_grad():
        # Initialize hidden states
        batch_size = next(iter(data_loader))[0].size(0)
        encoder_hidden = model.init_hidden(batch_size, device)
        decoder_hidden = model.init_hidden(batch_size, device)

        for inputs, _ in data_loader:
            inputs = inputs.to(device)
            decoded, features, encoder_hidden, decoder_hidden = model(inputs, encoder_hidden, decoder_hidden)
            all_reconstructed.append(decoded.cpu().numpy())  # Store the reconstructed output
            all_features.append(features.cpu().numpy())  # Store the reconstructed output


    return np.concatenate(all_reconstructed, axis=0), np.concatenate(all_features, axis=0)  # Combine all outputs

In [10]:
# Main execution for inference
# Load the data
file_path = r"D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\P13_5_sec_30hz_sequences_sensor_data_std_normalized.h5"
X = load_data(file_path)

order = [1, 2, 3, 4, 5, 0]  # Acc_X, Acc_y, Acc_z, BvP, TEMP, EDA
data = process_data(X, order)

num_channels = data.shape[1]

# Define batch size (can be same as used during training)
batch_size = 16  # Or any suitable batch size for inference
data_loader = DataLoader(TensorDataset(data, data), batch_size=batch_size, shuffle=False, drop_last=True)

# Load the trained model
model_path = r"D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\features_extraction\trial_12\lstm_autoencoder.pth"
model = load_model(model_path, num_channels, hidden_size=64, num_layers=2, feature_dim=10)

# Run inference
reconstructed_data, features = run_inference(model, data_loader)

# Save the reconstructed data to an HDF5 file
save_data(r'D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\features_extraction\trial_12\reconstructed_data.h5', reconstructed_data)
np.savez_compressed(r'D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\features_extraction\trial_12\features.npz', features)

# Optionally: save or visualize the reconstructed data
print("Reconstructed Data Shape:", reconstructed_data.shape)
print("Reconstructed Data Shape:", features.shape)

Original shape: (417840, 6, 150)
Standardized and reordered shape: torch.Size([417840, 6, 150])
Reconstructed data saved to D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\features_extraction\trial_12\reconstructed_data.h5
Reconstructed Data Shape: (417840, 6, 150)
Reconstructed Data Shape: (417840, 10)
