In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


In [2]:
data = pd.read_csv('/content/creditcard.csv')
scaler = StandardScaler()
data['scaled_amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))
data['scaled_time'] = scaler.fit_transform(data['Time'].values.reshape(-1, 1))
data = data.drop(['Time', 'Amount'], axis=1)

In [None]:
fraud = data[data['Class'] == 1]
non_fraud = data[data['Class'] == 0]

In [13]:
data['Class'].value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0.0,1983
1.0,2


In [3]:
#generator
def build_generator(latent_dim, output_dim):
    model = tf.keras.Sequential([
        layers.Dense(128, activation="relu", input_dim=latent_dim),
        layers.Dense(256, activation="relu"),
        layers.Dense(output_dim, activation="tanh")
    ])
    return model

# discriminator
def build_discriminator(input_dim):
    model = tf.keras.Sequential([
        layers.Dense(256, activation="leaky_relu", input_dim=input_dim),
        layers.Dense(128, activation="leaky_relu"),
        layers.Dense(1, activation="sigmoid")
    ])
    return model


In [5]:
latent_dim = 100
feature_size = fraud.drop('Class', axis=1).shape[1]

# Build models
generator = build_generator(latent_dim, feature_size)
discriminator = build_discriminator(feature_size)

# Compile the discriminator
discriminator.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002), metrics=["accuracy"])

# The GAN model combines the generator and the discriminator
discriminator.trainable = False
gan_input = layers.Input(shape=(latent_dim,))
generated_data = generator(gan_input)
gan_output = discriminator(generated_data)

gan = tf.keras.Model(gan_input, gan_output)
gan.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002))


In [6]:
# GAN
def train_gan(generator, discriminator, gan, fraud_data, latent_dim, epochs=200, batch_size=32):
    X_train = fraud_data.drop('Class', axis=1).values
    half_batch = int(batch_size / 2)

    for epoch in range(epochs):
        # Train discriminator
        idx = np.random.randint(0, X_train.shape[0], half_batch)
        real_data = X_train[idx]

        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        generated_data = generator.predict(noise)

        real_labels = np.ones((half_batch, 1))
        fake_labels = np.zeros((half_batch, 1))

        d_loss_real = discriminator.train_on_batch(real_data, real_labels)
        d_loss_fake = discriminator.train_on_batch(generated_data, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.ones((batch_size, 1))

        g_loss = gan.train_on_batch(noise, valid_y)

        # Print losses
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch + 1}/{epochs}, D Loss: {d_loss[0]}, G Loss: {g_loss}")


In [16]:
def generate_synthetic_data(generator, num_samples, latent_dim):
    noise = np.random.normal(0, 1, (num_samples, latent_dim))
    synthetic_data = generator.predict(noise)
    return synthetic_data

# Generate n synthetic fraud samples
synthetic_fraud_data = generate_synthetic_data(generator, 1983, latent_dim)
synthetic_fraud_df = pd.DataFrame(synthetic_fraud_data, columns=fraud.drop('Class', axis=1).columns)
synthetic_fraud_df['Class'] = 1




In [17]:
# Merge synthetic fraud data with non-fraud data
augmented_data = pd.concat([non_fraud, synthetic_fraud_df], axis=0)
print(augmented_data['Class'].value_counts())


Class
0.0    1983
1.0    1983
Name: count, dtype: int64
