In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [3]:
# Load original dataset
df = pd.read_csv("bending_machine_data.csv")  # Replace with actual dataset path
target_col = "result"  # Define the target column


In [4]:
# Prepare data for GAN
X = df.drop(columns=[target_col]).values
y = df[target_col].values
input_dim = X.shape[1]

In [5]:
# Define GAN components
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, output_dim),
        )
    def forward(self, z):
        return self.model(z)

In [6]:
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )
    def forward(self, x):
        return self.model(x)

In [8]:
# Initialize models
generator = Generator(input_dim, input_dim)
discriminator = Discriminator(input_dim)

# Initialize models
generator = Generator(input_dim, input_dim)
discriminator = Discriminator(input_dim)

criterion = nn.BCELoss()
optimizer_g = optim.Adam(generator.parameters(), lr=0.001)
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.001)

In [9]:
def train_gan(epochs=500, batch_size=32):
    dataset = TensorDataset(torch.tensor(X, dtype=torch.float32))
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        for real_data in dataloader:
            real_data = real_data[0]
            batch_size = real_data.size(0)
            real_labels = torch.ones(batch_size, 1)
            fake_labels = torch.zeros(batch_size, 1)

            # Train Discriminator
            optimizer_d.zero_grad()
            outputs = discriminator(real_data)
            loss_real = criterion(outputs, real_labels)
            loss_real.backward()

            z = torch.randn(batch_size, input_dim)
            fake_data = generator(z)
            outputs = discriminator(fake_data.detach())
            loss_fake = criterion(outputs, fake_labels)
            loss_fake.backward()
            optimizer_d.step()

            # Train Generator
            optimizer_g.zero_grad()
            outputs = discriminator(fake_data)
            loss_g = criterion(outputs, real_labels)
            loss_g.backward()
            optimizer_g.step()

        if epoch % 50 == 0:
            print(f"Epoch {epoch}: D Loss {loss_real+loss_fake:.4f}, G Loss {loss_g:.4f}")

train_gan()

Epoch 0: D Loss 0.6689, G Loss 0.7402
Epoch 50: D Loss 2.1878, G Loss 3.4207
Epoch 100: D Loss 0.3312, G Loss 2.8698
Epoch 150: D Loss 0.3494, G Loss 3.0417
Epoch 200: D Loss 1.7184, G Loss 1.0367
Epoch 250: D Loss 1.8110, G Loss 0.7176
Epoch 300: D Loss 1.8005, G Loss 0.9141
Epoch 350: D Loss 1.4108, G Loss 0.7899
Epoch 400: D Loss 1.6202, G Loss 0.5215
Epoch 450: D Loss 1.3029, G Loss 0.6038


In [10]:
# Generate synthetic data
z = torch.randn(500, input_dim)  # Generate 500 synthetic samples
synthetic_data = generator(z).detach().numpy()

In [11]:
# Create augmented dataset
synthetic_df = pd.DataFrame(synthetic_data, columns=df.drop(columns=[target_col]).columns)
synthetic_df[target_col] = np.random.uniform(y.min(), y.max(), size=synthetic_df.shape[0])
augmented_df = pd.concat([df, synthetic_df], ignore_index=True)

In [29]:
# Create augmented dataset
augmented_df = pd.concat([df, synthetic_df], ignore_index=True)
augmented_df.to_csv("ga_augmented_data.csv", index=False)
synthetic_df.to_csv("ga_synthetic_data.csv", index=False)