In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential, Model
from keras.layers import Dense, Input

class GANDataBalancer:
    def __init__(self, dataset, target_column, latent_dim=100):
        self.dataset = dataset
        self.target_column = target_column
        self.latent_dim = latent_dim

        self.X_train = dataset.drop(columns=[target_column])
        self.y_train = dataset[target_column]

        # Automatically identify minority and majority classes
        self.X_minority = self.X_train[self.y_train == 1]
        self.X_majority = self.X_train[self.y_train == 0]

        self.generator = None
        self.discriminator = None
        self.gan = None

    def build_generator(self, output_dim):
        model = Sequential([
            Dense(16, activation='relu', input_dim=self.latent_dim),
            Dense(32, activation='relu'),
            Dense(output_dim, activation='linear')
        ])
        return model

    def build_discriminator(self, input_dim):
        model = Sequential([
            Dense(32, activation='relu', input_dim=input_dim),
            Dense(16, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def build_gan(self):
        self.discriminator.trainable = False
        gan_input = Input(shape=(self.latent_dim,))
        x = self.generator(gan_input)
        gan_output = self.discriminator(x)
        gan = Model(gan_input, gan_output)
        gan.compile(optimizer='adam', loss='binary_crossentropy')
        return gan

    def train(self, epochs=1000, batch_size=64):
        minority_class_samples = self.X_minority.values
        self.generator = self.build_generator(self.X_train.shape[1])
        self.discriminator = self.build_discriminator(self.X_train.shape[1])
        self.gan = self.build_gan()

        for epoch in range(epochs):
            # Train Discriminator
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            fake_data = self.generator.predict(noise)
            real_data = minority_class_samples[
                np.random.randint(0, minority_class_samples.shape[0], batch_size)
            ]
            X_combined = np.vstack((real_data, fake_data))
            y_combined = np.hstack((np.ones(batch_size), np.zeros(batch_size)))
            self.discriminator.trainable = True
            d_loss = self.discriminator.train_on_batch(X_combined, y_combined)

            # Train Generator
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            y_gen = np.ones(batch_size)
            self.discriminator.trainable = False
            g_loss = self.gan.train_on_batch(noise, y_gen)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}/{epochs} | Discriminator Loss: {d_loss} | Generator Loss: {g_loss}")

    def generate_balanced_dataset(self, synthetic_size=1000):
        synthetic_data = self.generator.predict(
            np.random.normal(0, 1, (synthetic_size, self.latent_dim))
        )
        synthetic_df = pd.DataFrame(synthetic_data, columns=self.X_train.columns)
        balanced_X = pd.concat([self.X_train, synthetic_df])
        balanced_y = pd.concat([self.y_train, pd.Series([1] * synthetic_df.shape[0])])
        balanced_dataset = pd.concat([balanced_X, balanced_y], axis=1)
        return balanced_dataset

    def save_balanced_dataset(self, filename='balanced_dataset.csv', synthetic_size=1000):
        balanced_dataset = self.generate_balanced_dataset(synthetic_size)
        balanced_dataset.to_csv(filename, index=False)
        print(f"Balanced dataset saved to '{filename}'")


In [2]:
# Load dataset
data = pd.read_csv('creditcard.csv')

# Initialize GANDataBalancer
gan_balancer = GANDataBalancer(data, target_column='Class', latent_dim=100)

# Train the GAN
gan_balancer.train(epochs=1000, batch_size=64)

# Generate and save the balanced dataset
gan_balancer.save_balanced_dataset('balanced_dataset.csv')


FileNotFoundError: [Errno 2] No such file or directory: 'creditcard.csv'