In [78]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import scipy
import scipy.io as sio

In [79]:
# Create a synthetic dataset
mat_data = scipy.io.loadmat('8000_2LVSI_passed_input_data_designs.mat')
data = mat_data.get('input')
data = torch.Tensor(data)

In [80]:
# Define a function to scale the data and save the scaling parameters
def min_max_scaling(data):
    # Calculate min and max values for each column
    min_vals, _ = torch.min(data, dim=0)
    max_vals, _ = torch.max(data, dim=0)
    
    # Perform min-max scaling
    scaled_data = (data - min_vals) / (max_vals - min_vals)
    
    # Save the min and max values for each column
    scaling_params = {'min_vals': min_vals, 'max_vals': max_vals}
    
    return scaled_data, scaling_params

# Scale your data and get the scaling parameters
scaled_data, scaling_params = min_max_scaling(data)

# Save the scaling parameters to a file
np.save('scaling_params_vae.npy', scaling_params)


In [81]:
# Define the VAE architecture
#model complexity
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(VAE, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
        )
        self.mu = nn.Linear(128, latent_dim)
        self.logvar = nn.Linear(128, latent_dim)
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, input_dim),
        )
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std
    
    def forward(self, x):
        x = self.encoder(x)
        mu = self.mu(x)
        logvar = self.logvar(x)
        z = self.reparameterize(mu, logvar)
        x_hat = self.decoder(z)
        return x_hat, mu, logvar

In [82]:
# Split the data into training and validation sets (e.g., 80% training, 20% validation)
split_ratio = 0.8
train_size = int(split_ratio * len(scaled_data))
train_data, val_data = scaled_data[:train_size], scaled_data[train_size:]

In [88]:
# Initialize the VAE
input_dim = scaled_data.shape[1]
latent_dim = 50 # You can adjust this as needed
vae = VAE(input_dim, latent_dim)
data_size = len(data)

# Loss function
def vae_loss(x_hat, x, mu, logvar):
    recon_loss = nn.functional.mse_loss(x_hat, x, reduction='sum')
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_divergence

# Optimizer
optimizer = optim.Adam(vae.parameters(), lr=0.00001)

# Training loop
num_epochs = 1000
batch_size = 64
early_stopping_patience = 20  # Number of epochs with no improvement to wait before stopping

best_val_loss = float('inf')  # Initialize with a very large value
no_improvement_count = 0  # Counter to track the number of epochs with no improvement

for epoch in range(num_epochs):
    dataloader = DataLoader(TensorDataset(train_data), batch_size=batch_size, shuffle=True)
    total_loss = 0
    
    for batch in dataloader:
        optimizer.zero_grad()
        x = batch[0]
        x_hat, mu, logvar = vae(x)
        loss = vae_loss(x_hat, x, mu, logvar)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    # Calculate the average training loss for this epoch
    avg_train_loss = total_loss / len(dataloader.dataset)
    
    # Compute validation loss
    val_loss = 0
    with torch.no_grad():
        val_dataloader = DataLoader(TensorDataset(val_data), batch_size=batch_size)
        for batch in val_dataloader:
            x = batch[0]
            x_hat, mu, logvar = vae(x)
            loss = vae_loss(x_hat, x, mu, logvar)
            val_loss += loss.item()
    
    # Calculate the average validation loss
    avg_val_loss = val_loss / len(val_dataloader.dataset)
    
    print(f"Epoch [{epoch + 1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")
    
    # Check if the current validation loss is the best so far
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        no_improvement_count = 0
    else:
        no_improvement_count += 1
    
    # Check if early stopping criteria are met
    if no_improvement_count >= early_stopping_patience:
        print(f"Early stopping after {epoch + 1} epochs with no improvement in validation loss.")
        break

Epoch [1/1000], Training Loss: 0.8466, Validation Loss: 0.7638
Epoch [2/1000], Training Loss: 0.5539, Validation Loss: 0.7563
Epoch [3/1000], Training Loss: 0.5471, Validation Loss: 0.7495
Epoch [4/1000], Training Loss: 0.5427, Validation Loss: 0.7527
Epoch [5/1000], Training Loss: 0.5399, Validation Loss: 0.7571
Epoch [6/1000], Training Loss: 0.5378, Validation Loss: 0.7476
Epoch [7/1000], Training Loss: 0.5363, Validation Loss: 0.7484
Epoch [8/1000], Training Loss: 0.5353, Validation Loss: 0.7392
Epoch [9/1000], Training Loss: 0.5346, Validation Loss: 0.7437
Epoch [10/1000], Training Loss: 0.5343, Validation Loss: 0.7430
Epoch [11/1000], Training Loss: 0.5341, Validation Loss: 0.7466
Epoch [12/1000], Training Loss: 0.5341, Validation Loss: 0.7401
Epoch [13/1000], Training Loss: 0.5338, Validation Loss: 0.7528
Epoch [14/1000], Training Loss: 0.5336, Validation Loss: 0.7447
Epoch [15/1000], Training Loss: 0.5335, Validation Loss: 0.7356
Epoch [16/1000], Training Loss: 0.5333, Validatio

In [89]:
num_samples = 1000000
 # Use the same latent dimension as your trained model
# Generating new samples from the VAE
vae.eval()
with torch.no_grad():
    z = torch.randn(num_samples, latent_dim)  # Generate 10 samples in the latent space
    generated_data = vae.decoder(z)

print(generated_data)

tensor([[0.6577, 0.5693, 0.5124,  ..., 0.6701, 0.4807, 0.3866],
        [0.6752, 0.5988, 0.5207,  ..., 0.6859, 0.4818, 0.4084],
        [0.6659, 0.5830, 0.4993,  ..., 0.6657, 0.4763, 0.3914],
        ...,
        [0.6551, 0.5759, 0.4964,  ..., 0.6527, 0.4665, 0.3828],
        [0.6574, 0.5800, 0.5077,  ..., 0.6543, 0.4674, 0.3837],
        [0.6478, 0.5678, 0.5039,  ..., 0.6494, 0.4593, 0.3797]])


In [None]:
import numpy as np

# Load the scaling parameters
loaded_scaling_params = np.load('scaling_params_vae.npy', allow_pickle=True).item()

# Scale back the generated data
scaled_generated_data = (generated_data * (loaded_scaling_params['max_vals'] - loaded_scaling_params['min_vals'])) + loaded_scaling_params['min_vals']

# Now, 'scaled_generated_data' contains your generated data in the original unscaled range.


In [None]:
print(original_generated_data.numpy())

[[32.601345 19.42127  82.63575  ... 23.109585 27.072033 73.017784]
 [32.601345 19.42127  82.63575  ... 23.109585 27.072033 73.017784]
 [32.601345 19.42127  82.63575  ... 23.109585 27.072033 73.017784]
 ...
 [32.601345 19.42127  82.63575  ... 23.109585 27.072033 73.017784]
 [32.601345 19.42127  82.63575  ... 23.109585 27.072033 73.017784]
 [32.601345 19.42127  82.63575  ... 23.109585 27.072033 73.017784]]


In [None]:
sio.savemat('VAE_2LVSI_generated_data.mat', {'vae_input': original_data.numpy()})