In [30]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [31]:
# Load original dataset
df = pd.read_csv("bending_machine_data.csv")  # Replace with actual dataset path
target_col = "result"  # Define the target column

In [32]:
# Prepare data for training
X = df.drop(columns=[target_col]).values
y = df[target_col].values
input_dim = X.shape[1]

In [33]:
# Define Variational Autoencoder (VAE)
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.mu_layer = nn.Linear(64, latent_dim)
        self.logvar_layer = nn.Linear(64, latent_dim)
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        hidden = self.encoder(x)
        mu, logvar = self.mu_layer(hidden), self.logvar_layer(hidden)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar


In [34]:
# Initialize VAE
latent_dim = 16
vae = VAE(input_dim, latent_dim)
optimizer = optim.Adam(vae.parameters(), lr=0.001)

def loss_function(recon_x, x, mu, logvar):
    recon_loss = nn.MSELoss()(recon_x, x)
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_divergence

In [35]:
# Train VAE
def train_vae(epochs=500, batch_size=32):
    dataset = TensorDataset(torch.tensor(X, dtype=torch.float32))
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        for real_data in dataloader:
            real_data = real_data[0]
            optimizer.zero_grad()
            recon_data, mu, logvar = vae(real_data)
            loss = loss_function(recon_data, real_data, mu, logvar)
            loss.backward()
            optimizer.step()

        if epoch % 50 == 0:
            print(f"Epoch {epoch}: Loss {loss.item():.4f}")

train_vae()

Epoch 0: Loss 1851.6326
Epoch 50: Loss 124.7452
Epoch 100: Loss 75.2751
Epoch 150: Loss 60.3964
Epoch 200: Loss 51.3613
Epoch 250: Loss 70.6211
Epoch 300: Loss 51.9873
Epoch 350: Loss 51.6538
Epoch 400: Loss 38.7268
Epoch 450: Loss 49.7153


In [36]:
# Generate synthetic data
z = torch.randn(500, latent_dim)
synthetic_data = vae.decoder(z).detach().numpy()

In [37]:
# Save synthetic data
synthetic_df = pd.DataFrame(synthetic_data, columns=df.drop(columns=[target_col]).columns)
synthetic_df[target_col] = np.random.uniform(y.min(), y.max(), size=synthetic_df.shape[0])
synthetic_df.to_csv("ve_synthetic_data.csv", index=False)

In [38]:
# Create augmented dataset
augmented_df = pd.concat([df, synthetic_df], ignore_index=True)
augmented_df.to_csv("ve_augmented_data.csv", index=False)