In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv("C:/Users/Dell/Downloads/creditcard")

# Display the first few rows of the dataset
print(df.head())


In [None]:
# Separate features and target variable
X = df.drop(columns=['Class'])
y = df['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Display the shape of training and testing data
print("Shape of X_train_scaled:", X_train_scaled.shape)
print("Shape of X_test_scaled:", X_test_scaled.shape)


In [None]:
import numpy as np

def geometric_masking(data, p=0.1):
    """
    Apply geometric masking to the input data.
    
    Parameters:
    data (numpy.ndarray): Input time series data of shape (num_samples, num_features).
    p (float): Probability of applying geometric masking.
    
    Returns:
    numpy.ndarray: Time series data with geometric masking applied.
    """
    masked_data = np.copy(data)
    num_samples, num_features = data.shape
    
    # Apply geometric masking with probability p
    for i in range(num_samples):
        if np.random.rand() < p:
            mask_length = np.random.geometric(0.5)  # Geometrically distributed mask length
            start_idx = np.random.randint(0, num_features - mask_length)
            end_idx = start_idx + mask_length
            masked_data[i, start_idx:end_idx] = 0
    
    return masked_data

def random_rotation(data, max_angle=15):
    """
    Apply random rotation to the input data.
    
    Parameters:
    data (numpy.ndarray): Input time series data of shape (num_samples, num_features).
    max_angle (float): Maximum rotation angle in degrees.
    
    Returns:
    numpy.ndarray: Time series data with random rotation applied.
    """
    rotated_data = np.copy(data)
    num_samples, num_features = data.shape
    
    # Apply random rotation
    for i in range(num_samples):
        angle = np.random.uniform(-max_angle, max_angle)
        rotation_matrix = np.array([[np.cos(np.radians(angle)), -np.sin(np.radians(angle))],
                                    [np.sin(np.radians(angle)), np.cos(np.radians(angle))]])
        rotated_data[i] = np.dot(data[i].reshape(-1, 2), rotation_matrix).flatten()
    
    return rotated_data

# Example usage:
# Apply geometric masking
X_train_augmented = geometric_masking(X_train_scaled, p=0.1)

# Apply random rotation
X_train_augmented = random_rotation(X_train_augmented, max_angle=15)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, Conv1D
from tensorflow.keras.models import Model

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = LayerNormalization(epsilon=1e-6)(res)
    x = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout=0, max_length=2048):
    inputs = Input(shape=input_shape)
    x = inputs
    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    return Model(inputs, x)

# Define hyperparameters
input_shape = X_train_augmented.shape[1:]  # Shape of input data
head_size = 256
num_heads = 4
ff_dim = 512
num_layers = 4
dropout = 0.1

# Build the Transformer-based autoencoder
autoencoder = build_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder on the augmented data
autoencoder.fit(X_train_augmented, X_train_scaled, epochs=10, batch_size=64, validation_split=0.1)

# Obtain reconstructed sequences
reconstructed_sequences = autoencoder.predict(X_test_scaled)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.losses import ContrastiveLoss
from tensorflow.keras.optimizers import Adam

def build_contrastive_model(input_shape, latent_dim):
    inputs = Input(shape=input_shape)
    x = Dense(128, activation='relu')(inputs)
    x = Dense(latent_dim, activation='relu')(x)
    outputs = Dense(latent_dim)(x)
    return Model(inputs, outputs)

def contrastive_loss(y_true, y_pred, margin=1.0):
    # Euclidean distance between the embeddings
    distance = tf.reduce_sum(tf.square(y_true - y_pred), axis=-1)
    # Contrastive loss
    return tf.reduce_mean(y_true * distance + (1 - y_true) * tf.maximum(0, margin - distance))

# Define hyperparameters
input_shape = X_train_augmented.shape[1:]  # Shape of input data
latent_dim = 64  # Dimensionality of the latent space
margin = 1.0  # Margin for the contrastive loss

# Build the contrastive model
contrastive_model = build_contrastive_model(input_shape, latent_dim)

# Compile the model with contrastive loss
contrastive_model.compile(optimizer=Adam(), loss=contrastive_loss)

# Train the contrastive model
contrastive_model.fit(X_train_augmented, X_train_scaled, epochs=10, batch_size=64, validation_split=0.1)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

def build_generator(latent_dim, output_shape):
    inputs = Input(shape=(latent_dim,))
    x = Dense(128, activation='relu')(inputs)
    x = Dense(256, activation='relu')(x)
    outputs = Dense(np.prod(output_shape), activation='tanh')(x)
    return Model(inputs, outputs)

def build_discriminator(input_shape):
    inputs = Input(shape=input_shape)
    x = Dense(256, activation='relu')(inputs)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(1, activation='sigmoid')(x)
    return Model(inputs, outputs)

def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = Input(shape=(latent_dim,))
    gan_output = discriminator(generator(gan_input))
    gan = Model(gan_input, gan_output)
    gan.compile(optimizer=Adam(), loss='binary_crossentropy')
    return gan

# Define hyperparameters
latent_dim = 100  # Dimensionality of the latent space
output_shape = X_train_augmented.shape[1:]  # Shape of generated samples

# Build and compile the generator
generator = build_generator(latent_dim, output_shape)
generator.compile(optimizer=Adam(), loss='binary_crossentropy')

# Build and compile the discriminator
discriminator = build_discriminator(output_shape)
discriminator.compile(optimizer=Adam(), loss='binary_crossentropy')

# Build and compile the GAN
gan = build_gan(generator, discriminator)

# Train the GAN
epochs = 100
batch_size = 64
for epoch in range(epochs):
    # Generate random noise as input for the generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    
    # Generate synthetic samples using the generator
    generated_samples = generator.predict(noise)
    
    # Combine real and synthetic samples
    real_samples = X_train_scaled[np.random.randint(0, X_train_scaled.shape[0], batch_size)]
    X = np.concatenate([real_samples, generated_samples])
    
    # Labels for the discriminator
    y_discriminator = np.zeros(2 * batch_size)
    y_discriminator[:batch_size] = 1  # Labeling real samples as 1
    
    # Train the discriminator
    discriminator_loss = discriminator.train_on_batch(X, y_discriminator)
    
    # Generate new noise for the GAN
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    
    # Labels for the generator (tricking the discriminator)
    y_gan = np.ones(batch_size)
    
    # Train the GAN (only the generator)
    gan_loss = gan.train_on_batch(noise, y_gan)
    
    # Print progress
    print(f"Epoch {epoch + 1}/{epochs} - Discriminator Loss: {discriminator_loss}, GAN Loss: {gan_loss}")


In [None]:
# Pass test data through the trained autoencoder to obtain reconstruction errors
reconstructed_sequences = autoencoder.predict(X_test_scaled)
reconstruction_errors = np.mean(np.square(X_test_scaled - reconstructed_sequences), axis=1)

# Pass test data through the trained contrastive model to obtain contrastive scores
contrastive_scores = contrastive_model.predict(X_test_scaled)

# Pass test data through the trained generator of the GAN to obtain synthetic samples
noise = np.random.normal(0, 1, (len(X_test_scaled), latent_dim))
generated_samples = generator.predict(noise)

# Compute distances between original and generated samples
gan_distances = np.mean(np.square(X_test_scaled - generated_samples), axis=1)

# Combine anomaly scores from different models (e.g., reconstruction errors, contrastive scores, GAN distances)
anomaly_scores = (reconstruction_errors + contrastive_scores.flatten() + gan_distances) / 3

# Set threshold for anomaly detection
threshold = np.percentile(anomaly_scores, 95)  # Adjust percentile as needed

# Detect anomalies based on the threshold
anomalies = (anomaly_scores > threshold).astype(int)

# Print number of anomalies detected
print("Number of anomalies detected:", np.sum(anomalies))


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Assume we have ground truth labels for the test data (0 for normal, 1 for anomaly)
# Replace `ground_truth_labels` with the actual ground truth labels
ground_truth_labels = y_test

# Compute evaluation metrics
precision = precision_score(ground_truth_labels, anomalies)
recall = recall_score(ground_truth_labels, anomalies)
f1 = f1_score(ground_truth_labels, anomalies)
roc_auc = roc_auc_score(ground_truth_labels, anomaly_scores)

# Print evaluation metrics
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("ROC AUC:", roc_auc)
