In [129]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchvision.models import inception_v3
from scipy.stats import entropy
import scipy

In [130]:
# Create a synthetic dataset
mat_data = scipy.io.loadmat('8000_2LVSI_passed_input_data_designs.mat')
data = mat_data.get('input')
data = torch.Tensor(data)

In [156]:
# Define a function to scale the data and save the scaling parameters
def min_max_scaling(data):
    # Calculate min and max values for each column
    min_vals, _ = torch.min(data, dim=0)
    max_vals, _ = torch.max(data, dim=0)
    
    # Perform min-max scaling
    scaled_data = (data - min_vals) / (max_vals - min_vals)
    
    # Save the min and max values for each column
    scaling_params = {'min_vals': min_vals, 'max_vals': max_vals}
    
    return scaled_data, scaling_params

# Scale your data and get the scaling parameters
scaled_data, scaling_params = min_max_scaling(data)
print(scaled_data)
# Save the scaling parameters to a file
np.save('scaling_params.npy', scaling_params)

# To reproduce the scaling later (e.g., when generating data):
# Load the scaling parameters
#loaded_scaling_params = np.load('scaling_params.npy', allow_pickle=True).item()

# Scale the data back to its original range
#original_data = scaled_data * (loaded_scaling_params['max_vals'] - loaded_scaling_params['min_vals']) + loaded_scaling_params['min_vals']

tensor([[0.5918, 0.4000, 0.1362,  ..., 0.3108, 0.8667, 0.0000],
        [0.6327, 0.6400, 0.7387,  ..., 0.4408, 0.9333, 0.0000],
        [0.1633, 0.4800, 0.5078,  ..., 0.5360, 0.1333, 0.0000],
        ...,
        [0.6939, 0.9600, 0.7001,  ..., 0.5698, 0.2000, 1.0000],
        [0.8776, 0.9200, 0.4463,  ..., 0.8887, 0.1333, 1.0000],
        [0.1020, 0.7200, 0.9685,  ..., 0.4474, 0.4667, 1.0000]])


In [132]:
# Define the Generator class
class Generator(nn.Module):
    def __init__(self, latent_dim, data_dim):
        super(Generator, self).__init__()
        self.latent_dim = latent_dim
        self.data_dim = data_dim
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, data_dim)  # No Sigmoid activation here
        )

    def forward(self, z):
        return self.model(z)

# Define the Discriminator class
class Discriminator(nn.Module):
    def __init__(self, data_dim):
        super(Discriminator, self).__init__()
        self.data_dim = data_dim
        self.model = nn.Sequential(
            nn.Linear(data_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [155]:
# ... (Define Generator and Discriminator as before) ...
data_dim = scaled_data.shape[1]
latent_dim = 5 # You can adjust this as needed
# Initialize the Generator and Discriminator
generator = Generator(latent_dim, data_dim)
discriminator = Discriminator(data_dim)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the loss function and optimizers
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer_G = optim.Adam(generator.parameters(), lr=0.0003)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0003)

# Load a pre-trained Inception v3 model (used for inception score)
inception_model = inception_v3(pretrained=True, transform_input=False)
inception_model.eval()

# Training loop
num_epochs = 12000
batch_size = 64

for epoch in range(num_epochs):
    # Train the Discriminator
    for _ in range(1):
        optimizer_D.zero_grad()
        
        # You should use scaled_data here instead of real_data
        real_data = scaled_data[torch.randperm(len(scaled_data))][:batch_size].to(device)
        # real_data = real_data.to(device)  # Remove this line
        
        # Generate fake data from the Generator
        z = torch.randn(batch_size, latent_dim).to(device)
        fake_data = generator(z)
        
        # You should scale the fake_data to match the scaled_data range
        fake_data = fake_data * (scaled_data.max(0)[0] - scaled_data.min(0)[0]) + scaled_data.min(0)[0]
        
        # Calculate the loss for real and fake data
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)
        
        output_real = discriminator(real_data)
        output_fake = discriminator(fake_data.detach())
        
        loss_real = criterion(output_real, real_labels)
        loss_fake = criterion(output_fake, fake_labels)
        
        loss_D = loss_real + loss_fake
        loss_D.backward()
        optimizer_D.step()
    
    # Train the Generator
    optimizer_G.zero_grad()
    
    z = torch.randn(batch_size, latent_dim).to(device)
    fake_data = generator(z)
    
    # You should scale the fake_data to match the scaled_data range
    fake_data = fake_data * (scaled_data.max(0)[0] - scaled_data.min(0)[0]) + scaled_data.min(0)[0]
    
    output_fake = discriminator(fake_data)
    
    loss_G = criterion(output_fake, real_labels)
    loss_G.backward()
    optimizer_G.step()
    
    if (epoch + 1) % 1000 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss_D: {loss_D.item():.4f}, Loss_G: {loss_G.item():.4f}")

Epoch [1000/12000], Loss_D: 1.2390, Loss_G: 0.8789
Epoch [2000/12000], Loss_D: 1.4200, Loss_G: 0.7337
Epoch [3000/12000], Loss_D: 1.3842, Loss_G: 0.7026
Epoch [4000/12000], Loss_D: 1.3914, Loss_G: 0.6955
Epoch [5000/12000], Loss_D: 1.4084, Loss_G: 0.6963
Epoch [6000/12000], Loss_D: 1.4442, Loss_G: 0.6714
Epoch [7000/12000], Loss_D: 1.2826, Loss_G: 0.6904
Epoch [8000/12000], Loss_D: 1.3920, Loss_G: 0.7006
Epoch [9000/12000], Loss_D: 1.6495, Loss_G: 0.5838
Epoch [10000/12000], Loss_D: 1.3533, Loss_G: 2.1340
Epoch [11000/12000], Loss_D: 1.2974, Loss_G: 0.7541
Epoch [12000/12000], Loss_D: 1.0671, Loss_G: 0.9647


In [157]:
# Generating new samples from the GAN
generator.eval()
with torch.no_grad():
    z = torch.randn(1000000, latent_dim)  # Generate 500,000 samples in the latent space
    generated_data = generator(z)

print(generated_data)

tensor([[ 0.5553,  0.1604, -0.0019,  ...,  0.7763,  0.6165,  0.7628],
        [ 0.9449,  0.5639,  0.9340,  ...,  1.0188,  0.4706,  0.7386],
        [ 0.9247,  0.4971,  0.9829,  ...,  1.0235,  0.5762,  0.7356],
        ...,
        [ 0.7152,  0.5058,  0.8644,  ...,  0.9219,  0.4789,  0.9849],
        [ 0.8547,  0.6960,  0.7316,  ...,  0.7296,  0.4897,  0.4097],
        [ 0.8549,  0.4276,  0.0546,  ...,  0.8377,  1.0275,  0.7192]])


In [161]:
import numpy as np

# Load the scaling parameters
loaded_scaling_params = np.load('scaling_params_vae.npy', allow_pickle=True).item()

# Scale back the generated data
original_generated_data = (generated_data * (loaded_scaling_params['max_vals'] - loaded_scaling_params['min_vals'])) + loaded_scaling_params['min_vals']
# Assuming original_generated_data is your generated data after scaling back

# Assuming you have generated_data as your generated data with shape (num_samples, num_features)
# and column 8 is the one you want to adjust

## Convert the PyTorch tensor to a NumPy array
original_generated_data_np = original_generated_data.numpy()

# Round columns 3, 4, and 6 to 2 decimal places
original_generated_data_np[:, [2, 3, 5]] = np.round(original_generated_data_np[:, [2, 3, 5]], 2)

# Round column 4 to 4 decimal places
original_generated_data_np[:, 4] = np.round(original_generated_data_np[:, 4], 4)

# Convert columns 1, 2, 7, and 8 to integers
original_generated_data_np[:, [0, 1, 6, 7]] = original_generated_data_np[:, [0, 1, 6, 7]].astype(int)

# Sort the rows based on column 8 (assuming column 8 is your label)
sorted_indices = np.argsort(original_generated_data_np[:, 7])
sorted_data_np = original_generated_data_np[sorted_indices]

In [162]:
print(sorted_data_np)

[[ 39.    16.    71.78 ...  20.69  26.    -9.  ]
 [ 43.    17.    85.57 ...  22.32  27.    -7.  ]
 [ 43.    17.    87.05 ...  22.35  27.    -6.  ]
 ...
 [ 62.     1.    20.4  ...  42.46  23.   331.  ]
 [ 65.     1.    20.45 ...  43.56  24.   333.  ]
 [ 70.     0.    25.2  ...  46.17  25.   358.  ]]


In [153]:
import scipy.io as sio
sio.savemat('GAN_2LVSI_generated_data.mat', {'gan_input': original_generated_data.numpy()})