In [1]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
import numpy as np
import pandas as pd

In [2]:
class GAN:
    def __init__(self, input_dim, latent_dim, hidden_dim,columns):
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.hidden_dim = hidden_dim

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy', optimizer=Adam())

        # Build the generator
        self.generator = self.build_generator()

        # Build the GAN model
        self.gan = self.build_gan()
        
        #Define the columns
        self.columns = columns


    def build_discriminator(self):
        inputs = Input(shape=(self.input_dim,))
        x = Dense(self.hidden_dim, activation='relu')(inputs)
        outputs = Dense(1, activation='sigmoid')(x)
        discriminator = Model(inputs=inputs, outputs=outputs)
        return discriminator

    def build_generator(self):
        inputs = Input(shape=(self.latent_dim,))
        x = Dense(self.hidden_dim, activation='relu')(inputs)
        outputs = Dense(self.input_dim, activation='linear')(x)
        generator = Model(inputs=inputs, outputs=outputs)
        return generator

    def build_gan(self):
        self.discriminator.trainable = False
        inputs = Input(shape=(self.latent_dim,))
        generated_data = self.generator(inputs)
        outputs = self.discriminator(generated_data)
        gan = Model(inputs=inputs, outputs=outputs)
        gan.compile(loss='binary_crossentropy', optimizer=Adam())
        return gan

    def train_gan(self, data, epochs, batch_size):
        for epoch in range(epochs):
            for batch in range(data.shape[0] // batch_size):
                # Train the discriminator
                real_data = data.iloc[np.random.randint(0, data.shape[0], size=batch_size), :]
                noise = np.random.normal(0, 1, size=(batch_size, self.latent_dim))
                fake_data = self.generator.predict(noise)
                x = np.concatenate((real_data, fake_data))
                y = np.concatenate((np.ones(batch_size), np.zeros(batch_size)))
                discriminator_loss = self.discriminator.train_on_batch(x, y)

                # Freeze the discriminator's weights during generator training
                self.discriminator.trainable = False

                # Train the generator
                noise = np.random.normal(0, 1, size=(batch_size, self.latent_dim))
                y = np.ones(batch_size)
                generator_loss = self.gan.train_on_batch(noise, y)

                # Unfreeze the discriminator's weights after generator training
                self.discriminator.trainable = True

                # Print the progress
                print("Epoch {}/{} Batch {}/{} D loss {:.4f} G loss {:.4f}".format(
                    epoch+1, epochs, batch+1, data.shape[0]//batch_size, discriminator_loss, generator_loss))

    def generate_data(self, num_samples):
        noise = np.random.normal(0, 1, size=(num_samples, self.latent_dim))
        generated_data = self.generator.predict(noise)
        return generated_data

    def generate_and_save_data(self, num_samples, output_file):
        # Generate new data using the generator network
        noise = np.random.normal(0, 1, (num_samples, self.latent_dim))
        generated_data = self.generator.predict(noise)

        # Convert the generated data to a DataFrame
        generated_df = pd.DataFrame(generated_data, columns=self.columns)
    
        # Merge the generated data with the original data
        merged_df = pd.concat([data, generated_df], axis=0)

        # Save the merged data to a CSV file
        merged_df.to_csv(output_file, index=False)
    
    def save_model(self, gan_path, generator_path, discriminator_path):
        self.gan.save(gan_path)
        self.generator.save(generator_path)
        self.discriminator.save(discriminator_path)
        print("Models saved successfully.")

In [3]:
# Train the GAN model on the cleaned data
data_full = pd.read_csv('Data_Cleaned.csv')
data = data_full.drop(['Misaligned'],axis=1)

In [4]:
# Define the input dimensions, latent dimensions, and hidden dimensions
input_dim = 7
latent_dim = 100
hidden_dim = 32
columns = data.columns.tolist()

In [5]:
# Instantiate the GAN model
gan = GAN(input_dim, latent_dim, hidden_dim,columns=data.columns.tolist())

In [6]:
# Train the GAN model on the cleaned data
gan.train_gan(data, epochs=100, batch_size=32) ##EPOCHS SIZE

Epoch 1/100 Batch 1/72 D loss 0.4066 G loss 0.5628
Epoch 1/100 Batch 2/72 D loss 0.4247 G loss 0.6128
Epoch 1/100 Batch 3/72 D loss 0.4162 G loss 0.6224
Epoch 1/100 Batch 4/72 D loss 0.4009 G loss 0.6273
Epoch 1/100 Batch 5/72 D loss 0.3731 G loss 0.5649
Epoch 1/100 Batch 6/72 D loss 0.4222 G loss 0.6292
Epoch 1/100 Batch 7/72 D loss 0.4120 G loss 0.5983
Epoch 1/100 Batch 8/72 D loss 0.3987 G loss 0.6182
Epoch 1/100 Batch 9/72 D loss 0.3957 G loss 0.5564
Epoch 1/100 Batch 10/72 D loss 0.4010 G loss 0.6298
Epoch 1/100 Batch 11/72 D loss 0.4067 G loss 0.6084
Epoch 1/100 Batch 12/72 D loss 0.4023 G loss 0.6118
Epoch 1/100 Batch 13/72 D loss 0.3823 G loss 0.6428
Epoch 1/100 Batch 14/72 D loss 0.3903 G loss 0.6366
Epoch 1/100 Batch 15/72 D loss 0.3703 G loss 0.6411
Epoch 1/100 Batch 16/72 D loss 0.3874 G loss 0.6682
Epoch 1/100 Batch 17/72 D loss 0.3970 G loss 0.6548
Epoch 1/100 Batch 18/72 D loss 0.3974 G loss 0.6409
Epoch 1/100 Batch 19/72 D loss 0.3942 G loss 0.6453
Epoch 1/100 Batch 20/

In [7]:
# Generate new data and save it to a CSV file
num_samples = 1000 ## NUMBER OF SAMPLES
output_file = "generated_data.csv"
column_names = data.columns.tolist()

gan.generate_and_save_data(num_samples=100, output_file="generated_data.csv")



In [8]:
gan = GAN(input_dim, latent_dim, hidden_dim, columns)
gan.train_gan(data, epochs=100, batch_size=32)
gan.save_model("gan.h5", "generator.h5", "discriminator.h5")

Epoch 1/100 Batch 1/72 D loss 8.5145 G loss 0.7443
Epoch 1/100 Batch 2/72 D loss 8.2618 G loss 0.7744
Epoch 1/100 Batch 3/72 D loss 8.0359 G loss 0.7536
Epoch 1/100 Batch 4/72 D loss 7.9545 G loss 0.7451
Epoch 1/100 Batch 5/72 D loss 7.7667 G loss 0.7797
Epoch 1/100 Batch 6/72 D loss 7.5584 G loss 0.7610
Epoch 1/100 Batch 7/72 D loss 7.2884 G loss 0.8099
Epoch 1/100 Batch 8/72 D loss 7.1102 G loss 0.7669
Epoch 1/100 Batch 9/72 D loss 7.0237 G loss 0.8143
Epoch 1/100 Batch 10/72 D loss 6.7964 G loss 0.7258
Epoch 1/100 Batch 11/72 D loss 6.6186 G loss 0.8213
Epoch 1/100 Batch 12/72 D loss 6.5021 G loss 0.7154
Epoch 1/100 Batch 13/72 D loss 6.2010 G loss 0.7773
Epoch 1/100 Batch 14/72 D loss 6.0919 G loss 0.8044
Epoch 1/100 Batch 15/72 D loss 5.8758 G loss 0.6838
Epoch 1/100 Batch 16/72 D loss 5.6164 G loss 0.6545
Epoch 1/100 Batch 17/72 D loss 5.5405 G loss 0.6444
Epoch 1/100 Batch 18/72 D loss 5.3875 G loss 0.6589
Epoch 1/100 Batch 19/72 D loss 5.1272 G loss 0.6038
Epoch 1/100 Batch 20/

In [15]:
Generated_Data = pd.read_csv('generated_data.csv')
Generated_Data.columns = ['f','e','hmin0','hminL','PSIx th','PSIy th','D0 th']
Generated_Data['Misaligned'] = np.where((Generated_Data['PSIx th']<=0.05) & (Generated_Data['PSIy th']<=0.05), 0, 1)
Generated_Data['Misaligned'].value_counts()

1    1296
0    1120
Name: Misaligned, dtype: int64