In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Subset
import numpy as np
import h5py
import matplotlib.pyplot as plt
import os

In [2]:
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
# LSTM Stateful LSTM Seq2Seq Model
class Stateful_LSTM_Seq2Seq(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout):
        super(Stateful_LSTM_Seq2Seq, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Encoder
        self.encoder = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)

        # Decoder
        self.decoder = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc_out = nn.Linear(hidden_size, output_size)

        self.target_proj = nn.Linear(output_size, hidden_size)

    def forward(self, x, encoder_hidden, decoder_hidden, decoder_input, target_seq=None, teacher_forcing_ratio=0.0):
        """
        x: (batch_size, seq_len, input_size)
        encoder_hidden: Tuple (h_0, c_0) from previous batch
        decoder_hidden: Tuple (h_0, c_0) for decoder
        target_seq: (batch_size, seq_len, output_size) - Used for teacher forcing
        """
        batch_size, seq_len, _ = x.shape
        device = x.device

        n = torch.randn(batch_size, seq_len, 1).to(device)
        x = torch.cat((x, n), dim=-1)

        # ---- ENCODER ----
        _, encoder_hidden = self.encoder(x, encoder_hidden)

        # ---- DECODER ----
        decoder_hidden = encoder_hidden

        target_seq_len = target_seq.shape[1] if target_seq is not None else seq_len
        # decoder_input = torch.zeros(batch_size, 1, self.hidden_size).to(device)  # Start token ????

        outputs = []
        for t in range(target_seq_len):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            output_t = self.fc_out(decoder_output)  # (batch_size, 1, output_size)
            outputs.append(output_t)

            # ---- TEACHER FORCING ----
            if target_seq is not None and torch.rand(1).item() < teacher_forcing_ratio:
                decoder_input = self.target_proj(target_seq[:, t:t+1, :])
            else:
                decoder_input = decoder_output

        outputs = torch.cat(outputs, dim=1)  # (batch_size, seq_len, output_size)

        # # ---- DETACH HIDDEN STATES ----
        # encoder_hidden = (encoder_hidden[0].detach(), encoder_hidden[1].detach())
        # decoder_hidden = (decoder_hidden[0].detach(), decoder_hidden[1].detach())
        # decoder_input = (decoder_input.detach())

        # ---- NORMALIZE & DETACH HIDDEN STATES ----
        encoder_hidden = (nn.functional.normalize(encoder_hidden[0], dim=-1).detach(),
                        nn.functional.normalize(encoder_hidden[1], dim=-1).detach())
        decoder_hidden = (nn.functional.normalize(decoder_hidden[0], dim=-1).detach(),
                        nn.functional.normalize(decoder_hidden[1], dim=-1).detach())
        decoder_input = decoder_input.detach()

        return outputs, encoder_hidden, decoder_hidden, decoder_input

In [4]:
# Save reconstructed data to HDF5
def save_data(filepath, data):
    with h5py.File(filepath, 'w') as f:
        f.create_dataset('reconstructed_features', data=data)
    print(f"Reconstructed data saved to {filepath}")

In [5]:
# 5. Load Data
def load_data(file_path):
    with h5py.File(file_path, 'r') as f:
        X = f['data'][:]
    # data = torch.from_numpy(X).float()  # Convert to PyTorch tensor
    return X

In [6]:
# def process_data(X, order):
#     # Reshape for fitting: (samples * features, timesteps) â†’ (20*6, 150)
#     # X_reshaped = X.reshape(-1, X.shape[2])  

#     # Fit scaler on entire dataset
#     # scaler = StandardScaler()
#     # X_standardized = scaler.fit_transform(X_reshaped)

#     # Reshape back to (20, 6, 150)
#     # X_standardized = X_standardized.reshape(X.shape[0], X.shape[1], X.shape[2])

#     # Reorder using NumPy indexing
#     # X_reorderd = X[:, order, :]

#     data = torch.from_numpy(X).float()  # Convert to PyTorch tensor

#     # Print shape to confirm it remains unchanged
#     print("Original shape:", X.shape)
#     print("Standardized and reordered shape:", data.shape)
#     return data

In [6]:
# Load model function
def load_model(filepath, feature_dim, hidden_dim, condition_dim, num_layers, dropout):
    model = Stateful_LSTM_Seq2Seq(
        input_size=condition_dim,
        hidden_size=hidden_dim,
        num_layers=num_layers,
        output_size=feature_dim,
        dropout=dropout
    ).to(device)
    model.load_state_dict(torch.load(filepath))
    model.eval()  # Set the model to evaluation mode
    return model

In [7]:
BATCH_SIZE = 16
HIDDEN_SIZE = 64
NUM_LAYERS = 2
INPUT_SIZE = 8   # Number of input conditions
OUTPUT_SIZE = 10  # Number of output features
DROPOUT = 0.2

In [8]:
# Inference function
def run_inference(model, data_loader):
    model.to(device)
    all_reconstructed = []

    encoder_hidden = None
    decoder_hidden = None
    decoder_input = None

    with torch.no_grad():
        # Initialize hidden states
        # batch_size = next(iter(data_loader))[0].size(0)
        
        # for real_features, conditions in data_loader:
        #     real_features, conditions = real_features.to(device), conditions.to(device)
        #     # noise = torch.randn_like(real_features).to(device)

        #     synthetic_features = model(noise, conditions)
        for conditions, real_features in data_loader:
            real_features, conditions = real_features.to(device), conditions.to(device)
            # noise = torch.randn_like(real_features).to(device)
            # ---- If first batch, initialize hidden state ----
            if encoder_hidden is None:
                encoder_hidden = (torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE).to(device),
                                  torch.zeros(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE).to(device))
            if decoder_hidden is None:
                decoder_hidden = encoder_hidden

            if decoder_input is None:
                decoder_input = torch.zeros(BATCH_SIZE, 1, HIDDEN_SIZE).to(device)  # Start token ????

            synthetic_features, encoder_hidden, decoder_hidden, decoder_input = model(
                conditions, 
                encoder_hidden, 
                decoder_hidden,
                decoder_input, 
                real_features, 
                teacher_forcing_ratio=0.5)
            all_reconstructed.append(synthetic_features.cpu().numpy())  # Store the reconstructed output

    return np.concatenate(all_reconstructed, axis=0)  # Combine all outputs

In [9]:
# Load Data
def load_features(file_path):
    feat = np.load(file_path)['arr_0']
    
    # Processing input
    f_rs = feat.reshape(-1, 12, feat.shape[1])

    f = torch.from_numpy(f_rs).float()  # Convert to PyTorch tensor
    print(f'Features Shape: {f.shape}')
    return f

# Load Data
def load_conditions(file_path):
    cond = np.load(file_path)['arr_0']

    # Processing input
    c_rs = cond.reshape(-1, 12, cond.shape[1])

    c = torch.from_numpy(c_rs).float()
    print(f'Conditions Shape: {c.shape}')
    return c

In [10]:
# Main execution for inference
# Load the data
feature_path = r'D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\features_extraction\trial_12\features.npz'
condition_path = r'D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\Activities_probs\trial_3_scipy_resample\P13_conditions_onehot.npz'
# condition_path = r'D:\Ali_Thesis\synthetic_data_generation\Data\Process_canada_data\Activities_probs\trial_2_best\P13_conditions_onehot.npz'

features = load_features(feature_path)
conditions = load_conditions(condition_path)

# num_features = features.shape[2]
# condition_dim = conditions.shape[2]
# hidden_size = 256
# num_layers = 3
# batch_size = 32
# num_epochs = 80
# # lr = 0.0002305394710098038
# lr = 0.001

# order = [1, 2, 3, 4, 5, 0]  # Acc_X, Acc_y, Acc_z, BvP, TEMP, EDA
# data = process_data(X, order)

# num_channels = data.shape[1]

# Define batch size (can be same as used during training)
# batch_size = 16  # Or any suitable batch size for inference
data_loader = DataLoader(TensorDataset(conditions, features), batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

# Load the trained model
current_dir = os.getcwd()
model_path = os.path.join(current_dir, "lstm_encoder_decoder.pth")

model = load_model(model_path, OUTPUT_SIZE, HIDDEN_SIZE, INPUT_SIZE, NUM_LAYERS, DROPOUT)

# Run inference
reconstructed_features = run_inference(model, data_loader)

# Save the reconstructed data to an HDF5 file
save_data(os.path.join(current_dir, 'reconstructed_features.h5'), reconstructed_features)

# Optionally: save or visualize the reconstructed data
print("Reconstructed Data Shape:", reconstructed_features.shape)

Features Shape: torch.Size([34820, 12, 10])
Conditions Shape: torch.Size([34820, 12, 7])
Reconstructed data saved to d:\Ali_Thesis\synthetic_data_generation\c_lstm_decoder_16\reconstructed_features.h5
Reconstructed Data Shape: (34816, 12, 10)
