In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam

# Load Dataset
file_path = 'heart_attack_germany.csv'
data = pd.read_csv(file_path)

# Select Numerical Columns for GAN
numerical_columns = [
    "BMI",
    "Alcohol_Consumption",
    "Cholesterol_Level",
    "Air_Pollution_Index",
    "Region_Heart_Attack_Rate",
]
target_column = "Heart_Attack_Incidence"

# Data Preprocessing
data_filtered = data[numerical_columns + [target_column]]
scaler = MinMaxScaler(feature_range=(-1, 1))
data_filtered[numerical_columns] = scaler.fit_transform(data_filtered[numerical_columns])

# Separate data into heart attack (1) and no heart attack (0)
minority_class = data_filtered[data_filtered[target_column] == 1][numerical_columns].values
majority_class = data_filtered[data_filtered[target_column] == 0][numerical_columns].values

# GAN Parameters
latent_dim = 10  # Size of noise vector
num_features = minority_class.shape[1]

# Define the Generator
def build_generator():
    model = Sequential([
        Dense(64, input_dim=latent_dim),
        LeakyReLU(alpha=0.2),
        Dense(128),
        LeakyReLU(alpha=0.2),
        Dense(num_features, activation='tanh')
    ])
    return model

# Define the Discriminator
def build_discriminator():
    model = Sequential([
        Dense(128, input_shape=(num_features,)),
        LeakyReLU(alpha=0.2),
        Dense(64),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid')
    ])
    return model

# Build the GAN
def build_gan(generator, discriminator):
    discriminator.compile(optimizer=Adam(0.0002, 0.5), loss='binary_crossentropy', metrics=['accuracy'])
    discriminator.trainable = False
    gan = Sequential([generator, discriminator])
    gan.compile(optimizer=Adam(0.0002, 0.5), loss='binary_crossentropy')
    return gan

# Initialize GAN models
generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)

# Train GAN
def train_gan(generator, discriminator, gan, real_data, epochs=1000, batch_size=64):
    for epoch in range(epochs):
        # Train Discriminator
        idx = np.random.randint(0, real_data.shape[0], batch_size)
        real_samples = real_data[idx]
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        fake_samples = generator.predict(noise)
        d_loss_real = discriminator.train_on_batch(real_samples, np.ones((batch_size, 1)))
        d_loss_fake = discriminator.train_on_batch(fake_samples, np.zeros((batch_size, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

        # Print progress every 100 epochs
        if epoch % 100 == 0:
          d = int(d_loss[0])
          d2 = int(d_loss[1])
          print("Epoch " + str(epoch) + ", D Loss: " + str(round(d, 4)) + ", D Acc: " + str(round(d2 * 100, 2)) + "%, G Loss: " + str(g_loss))

train_gan(generator, discriminator, gan, minority_class, epochs=1000, batch_size=64)

# Generate Synthetic Data
def generate_synthetic_data(generator, num_samples):
    noise = np.random.normal(0, 1, (num_samples, latent_dim))
    synthetic_data = generator.predict(noise)
    return synthetic_data

synthetic_minority_data = generate_synthetic_data(generator, len(majority_class))

# Combine Real and Synthetic Data
X = np.vstack((majority_class, minority_class, synthetic_minority_data))
y = np.array([0] * len(majority_class) + [1] * (len(minority_class) + len(synthetic_minority_data)))

# Split Data for Classification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Classifier
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Evaluate Classifier
y_pred = classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_filtered[numerical_columns] = scaler.fit_transform(data_filtered[numerical_columns])
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 




Epoch 0, D Loss: 0, D Acc: 0%, G Loss: [array(0.66683316, dtype=float32), array(0.66683316, dtype=float32), array(0.703125, dtype=float32)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/st