<a href="https://colab.research.google.com/github/AfshinRezakhani/Thesis1/blob/main/GAN_LSTM_d6_FPR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, RepeatVector
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Load dataset
file_path = "/content/d6.csv"
df = pd.read_csv(file_path)

# Separate features and target
X = df.drop(columns=['F'])
y = df['F'].astype(int)  # Convert target to integer if needed

# Display dataset distribution
print(f"Class Distribution: \n{df['F'].value_counts()}")

# Scale the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Separate classes
X_class0 = X_scaled[y == 0]
X_class1 = X_scaled[y == 1]

latent_dim = 10

# Define Generator Model
def build_generator():
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(1, latent_dim)),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dense(X_class0.shape[1], activation='tanh'),
        RepeatVector(1)
    ])
    return model

# Define Discriminator Model
def build_discriminator():
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(1, X_class0.shape[1])),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])
    return model

generator = build_generator()
discriminator = build_discriminator()
discriminator.trainable = False

gan_input = tf.keras.Input(shape=(1, latent_dim))
gan_output = discriminator(generator(gan_input))
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

# Training parameters
batch_size = 64
epochs = 200

# Train GAN
for epoch in range(epochs):
    noise = np.random.normal(0, 1, (batch_size, 1, latent_dim))
    generated_data = generator.predict(noise)

    idx = np.random.randint(0, X_class0.shape[0], batch_size)
    real_data = X_class0[idx].reshape(batch_size, 1, -1)

    real_labels = np.ones((batch_size, 1))
    fake_labels = np.zeros((batch_size, 1))

    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    noise = np.random.normal(0, 1, (batch_size, 1, latent_dim))
    g_loss = gan.train_on_batch(noise, real_labels)

if epoch % 20 == 0:
    print(f"Epoch {epoch} | D Loss Real: {d_loss_real[0]:.4f} | D Loss Fake: {d_loss_fake[0]:.4f} | G Loss: {g_loss:.4f}")
# Generate synthetic samples
num_samples_needed = len(X_class1) - len(X_class0)
noise = np.random.normal(0, 1, (num_samples_needed, 1, latent_dim))
synthetic_data = generator.predict(noise).reshape(num_samples_needed, -1)

synthetic_data = scaler.inverse_transform(synthetic_data)

# Balance dataset
X_balanced = np.vstack((X_scaled, synthetic_data))
y_balanced = np.hstack((y, np.zeros(num_samples_needed)))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42, stratify=y_balanced)

# Define LSTM Classifier
classifier = Sequential([
    LSTM(64, return_sequences=True, input_shape=(1, X_train.shape[1])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dense(1, activation='sigmoid')
])
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Reshape data
X_train = X_train.reshape(-1, 1, X_train.shape[1])
X_test = X_test.reshape(-1, 1, X_test.shape[1])

# Train classifier
classifier.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)

# Make predictions
y_pred = (classifier.predict(X_test) > 0.5).astype(int)

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
f_score = f1_score(y_test, y_pred)

# Compute confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Compute False Positive Rate (FPR) and False Negative Rate (FNR)
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
fnr = fn / (fn + tp) if (fn + tp) > 0 else 0

# Print results
print("\nModel Evaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-Score: {f_score:.4f}")
print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")
print(f"False Positive Rate (FPR): {fpr:.4f}")
print(f"False Negative Rate (FNR): {fnr:.4f}")

# Save balanced dataset
balanced_df = pd.DataFrame(np.column_stack((y_balanced, X_balanced)), columns=['F'] + list(df.columns[1:]))
balanced_df.to_csv("/content/d5_balanced.csv", index=False)

print("Balanced dataset saved as d5_balanced.csv")


Class Distribution: 
F
1    7022
0    2978
Name: count, dtype: int64


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step 




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33

  super().__init__(**kwargs)


[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.7527 - loss: 0.5834
Epoch 2/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7855 - loss: 0.4401
Epoch 3/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7825 - loss: 0.4407
Epoch 4/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7837 - loss: 0.4401
Epoch 5/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7859 - loss: 0.4351
Epoch 6/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7908 - loss: 0.4312
Epoch 7/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7856 - loss: 0.4357
Epoch 8/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7977 - loss: 0.4273
Epoch 9/10
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━