In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers

# Load the dataset
file_path = "heartdiseasedatafile.csv"  
cardio_data = pd.read_csv(file_path)

# Prepare features
features = cardio_data.drop(columns=["target"]).values  # Exclude target column
labels = cardio_data["target"].values  # Use target column

# Normalize features for stability
features_normalized = (features - features.mean(axis=0)) / features.std(axis=0)

# Define GAN components
latent_dim = 10  # Size of noise vector
input_dim = features.shape[1]  # Number of features

# Generator
def build_generator():
    model = tf.keras.Sequential([
        layers.Input(shape=(latent_dim,)),
        layers.Dense(64, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dense(input_dim, activation="linear")
    ])
    return model

# Discriminator
def build_discriminator():
    model = tf.keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation="leaky_relu"),
        layers.Dense(64, activation="leaky_relu"),
        layers.Dense(1, activation="sigmoid")
    ])
    return model

# Build and compile GAN
generator = build_generator()
discriminator = build_discriminator()
discriminator.compile(optimizer="adam", loss="binary_crossentropy")

# Combined model (Generator + Discriminator)
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(optimizer="adam", loss="binary_crossentropy")

# Training the GAN
def train_gan(epochs, batch_size):
    for epoch in range(epochs):
        # Select random real samples
        idx = np.random.randint(0, features_normalized.shape[0], batch_size)
        real_samples = features_normalized[idx]

        # Generate fake samples
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        fake_samples = generator.predict(noise)

        # Train discriminator
        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))
        d_loss_real = discriminator.train_on_batch(real_samples, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_samples, fake_labels)
        d_loss = 0.5 * (np.mean(d_loss_real) + np.mean(d_loss_fake))  # Ensure scalar

        # Train generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        g_loss = gan.train_on_batch(noise, real_labels)
        g_loss = np.mean(g_loss)  # Ensure scalar

        # Print progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch}/{epochs} | D Loss: {d_loss:.4f} | G Loss: {g_loss:.4f}")

# Train the GAN
train_gan(epochs=400, batch_size=64)

# Generate synthetic data
num_new_samples = 100
noise = np.random.normal(0, 1, (num_new_samples, latent_dim))
synthetic_data = generator.predict(noise)

# Denormalize the data
synthetic_data = synthetic_data * features.std(axis=0) + features.mean(axis=0)

# Create a DataFrame
synthetic_df = pd.DataFrame(synthetic_data, columns=cardio_data.columns[:-1])

# Save the generated dataset
output_file = "GAN_Synthetic_dataset.csv"
synthetic_df.to_csv(output_file, index=False)

print(f"GAN Applied Synthetic Dataset Saved As {output_file}")

2024-11-20 03:59:25.342152: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step 




Epoch 0/400 | D Loss: 0.6164 | G Loss: 0.6568
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━