In [6]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load and preprocess the dataset
df = pd.read_csv('Cardiovascular_Disease_Dataset.csv')  # Replace with your dataset path
features = df.iloc[:, :-1].values  # Features (13 columns)
labels = df.iloc[:, -1].values  # Binary target (0 or 1)

scaler = StandardScaler()
features = scaler.fit_transform(features)  # Normalize features

In [14]:
features = df.values.astype('float32')
features = (features - features.min(axis=0)) / (features.max(axis=0) - features.min(axis=0))

In [15]:
# Hyperparameters
batch_size = 32
noise_dim = 16
epochs = 1000
lambda_gp = 10  # Gradient penalty coefficient
learning_rate = 1e-4

# Build the generator
def build_generator():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_dim=noise_dim),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(features.shape[1])  # Output matches feature dimensions
    ])
    return model

# Build the discriminator
def build_discriminator():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_dim=features.shape[1]),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1)  # Output a single scalar for WGAN
    ])
    return model

# Gradient penalty function
def gradient_penalty(discriminator, real_data, fake_data):
    batch_size = tf.shape(real_data)[0]
    alpha = tf.random.uniform([batch_size, 1], 0.0, 1.0, dtype=tf.float32)
    interpolated = alpha * real_data + (1 - alpha) * fake_data
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = discriminator(interpolated, training=True)
    grads = tape.gradient(pred, interpolated)
    grads_norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
    return tf.reduce_mean((grads_norm - 1.0) ** 2)

# Instantiate models
generator = build_generator()
discriminator = build_discriminator()
def weight_init(shape, dtype=None):
    return tf.random.normal(shape, stddev=0.02, dtype=dtype)

# Apply to each layer
generator.layers[0].kernel_initializer = weight_init
discriminator.layers[0].kernel_initializer = weight_init


# Optimizers
generator_optimizer = tf.keras.optimizers.Adam(1e-5, beta_1=0.5, beta_2=0.9)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-5, beta_1=0.5, beta_2=0.9)


# Training step
@tf.function
def train_step(real_data):
    batch_size = tf.shape(real_data)[0]
    noise = tf.random.normal([batch_size, noise_dim], dtype=tf.float32)

    # Train Discriminator
    with tf.GradientTape() as disc_tape:
        fake_data = generator(noise, training=True)
        real_output = discriminator(real_data, training=True)
        fake_output = discriminator(fake_data, training=True)
        gp = gradient_penalty(discriminator, real_data, fake_data)
        disc_loss = tf.reduce_mean(fake_output) - tf.reduce_mean(real_output) + lambda_gp * gp

    disc_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    discriminator_optimizer.apply_gradients(zip(disc_gradients, discriminator.trainable_variables))

    # Train Generator
    with tf.GradientTape() as gen_tape:
        fake_data = generator(noise, training=True)
        fake_output = discriminator(fake_data, training=True)
        gen_loss = -tf.reduce_mean(fake_output)

    gen_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables)
    generator_optimizer.apply_gradients(zip(gen_gradients, generator.trainable_variables))

    return gen_loss, disc_loss

# Training loop
def train(dataset, epochs):
    for epoch in range(epochs):
        for i in range(0, len(dataset), batch_size):
            real_data = dataset[i:i + batch_size]
            real_data = tf.convert_to_tensor(real_data, dtype=tf.float32)  # Ensure float32
            gen_loss, disc_loss = train_step(real_data)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}/{epochs}, Generator Loss: {gen_loss.numpy()}, Discriminator Loss: {disc_loss.numpy()}")

# Train the WGAN
train(features, epochs)


Epoch 0/1000, Generator Loss: 0.04555933177471161, Discriminator Loss: 4.068548679351807
Epoch 100/1000, Generator Loss: -2.5829949378967285, Discriminator Loss: 1.062143325805664
Epoch 200/1000, Generator Loss: 0.32400456070899963, Discriminator Loss: -0.4675522446632385
Epoch 300/1000, Generator Loss: 1.5044524669647217, Discriminator Loss: -1.2937288284301758
Epoch 400/1000, Generator Loss: -0.07329723238945007, Discriminator Loss: 0.41540175676345825
Epoch 500/1000, Generator Loss: -1.205414891242981, Discriminator Loss: 1.416489839553833
Epoch 600/1000, Generator Loss: 0.7858185768127441, Discriminator Loss: -0.8817535638809204
Epoch 700/1000, Generator Loss: -0.0331575870513916, Discriminator Loss: 0.022462081164121628
Epoch 800/1000, Generator Loss: -0.45740824937820435, Discriminator Loss: 0.9616893529891968
Epoch 900/1000, Generator Loss: 0.0797375813126564, Discriminator Loss: -0.4087076187133789


In [19]:
import pandas as pd
import tensorflow as tf

# Assuming the 'generator' model is already trained
# 'noise_dim' should match the input size of your generator model

def generate_synthetic_data(generator, num_samples, noise_dim):
    # Generate random noise input for the generator
    noise = tf.random.normal([num_samples, noise_dim], dtype=tf.float32)
    
    # Generate synthetic data
    synthetic_data = generator(noise, training=False).numpy()
    
    # Convert synthetic data to a DataFrame
    synthetic_df = pd.DataFrame(synthetic_data, columns=[f'feature_{i+1}' for i in range(synthetic_data.shape[1])])
    
    return synthetic_df

# Example: Generate 500 synthetic samples
synthetic_data_df = generate_synthetic_data(generator, num_samples=500, noise_dim=noise_dim)

# Display the generated synthetic data
print("Generated Synthetic Data:")
print(synthetic_data_df.head())


Generated Synthetic Data:
   feature_1  feature_2  feature_3  feature_4  feature_5  feature_6  \
0  -0.186791  -0.072328   1.711799   0.496327   0.444162   1.003199   
1  -0.430517  -0.000606   1.159562   0.437986   0.519269   0.941822   
2  -0.182776   0.210462   1.656037   0.539154   1.120157   0.418823   
3   0.250664   0.174054   1.715939   0.075578   0.463148   0.827110   
4   0.099859  -0.278009   1.285033   0.384079   0.571976   0.883264   

   feature_7  feature_8  feature_9  feature_10  feature_11  feature_12  \
0   1.161993   0.516793   1.087988    0.784765    1.100292    0.031853   
1   0.666007   0.139558   0.291172    0.343342    0.708575    0.586641   
2   0.757976   0.130427   1.206694    0.256304    0.438453    1.233352   
3   0.690844   0.345309   1.356646    0.547505    0.637118   -0.150519   
4   0.319432   0.162772   0.984755    0.833282    1.415848    0.812654   

   feature_13  feature_14  
0    0.468930    0.714676  
1    0.653042    0.919934  
2    0.821630    1

In [22]:
df.head()

Unnamed: 0,patientid,age,gender,chestpain,restingBP,serumcholestrol,fastingbloodsugar,restingrelectro,maxheartrate,exerciseangia,oldpeak,slope,noofmajorvessels,target
0,103368,53,1,2,171,0,0,1,147,0,5.3,3,3,1
1,119250,40,1,0,94,229,0,1,115,0,3.7,1,1,0
2,119372,49,1,2,133,142,0,0,202,1,5.0,1,0,0
3,132514,43,1,0,138,295,1,1,153,0,3.2,2,2,1
4,146211,31,1,1,199,0,0,2,136,0,5.3,3,2,1
