In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data1 = pd.read_csv('/content/PS_20174392719_1491204439457_log.csv')
data = data1.sample(frac=0.1, random_state=42)

In [None]:
#Explore the dataset
data.head(n=10)

In [None]:
data.describe()

In [None]:
data.hist (bins=50, figsize=(15,15), color = 'green')
plt.show()

In [None]:
##Distribution of transactions wrt to source account
data['nameOrig'].value_counts().hist (bins=500, figsize=(15,5), color = 'blue')
plt.show()

In [None]:
data['nameOrig'].value_counts().describe()

In [None]:
#Distribution of transactions wrt to dest account
data['nameDest'].value_counts().hist (bins=500, figsize=(15,5), color = 'purple')
plt.show()

In [None]:
#Explore the class distribution
data.isFraud.value_counts().plot.pie(autopct='%.2f',figsize=(5, 5), colors=["purple","cyan"], explode=[0,.1])
plt.title('Class Distribution')
plt.tight_layout()

In [None]:
#Check if there is anu null values
data.isna().sum().sum()



#check for duplicate values
data.duplicated(keep='first').any()

In [None]:
# Filter data by the labels. Safe and Fraud transaction
safe = data[data['isFraud']==0]
fraud = data[data['isFraud']==1]
#See the frequency of the transactions for each class on the same plot.
plt.figure(figsize=(10, 3))
sns.distplot(safe.step, label="Safe Transaction")
sns.distplot(fraud.step, label='Fraud Transaction')
plt.xlabel('Hour')
plt.ylabel('Number of Transactions')
plt.title('Distribution of Transactions over the Time')
plt.legend()

In [None]:
#Type of Transactions for fraud
#checking type of fraud transactions
fraud.type.value_counts()

In [None]:
#filtering only transfer and cash_out data
data_by_type=data[data['type'].isin(['TRANSFER','CASH_OUT'])]

In [None]:
X = data.drop(['isFraud','nameOrig', 'nameDest'], axis=1)
y = data['isFraud']

In [None]:
X = pd.get_dummies(X, columns=['type'], drop_first=True)

In [None]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

from imblearn.under_sampling import RandomUnderSampler



In [None]:
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X_train, y_train)

In [None]:
import matplotlib.pyplot as plt

# Check class distribution before RUS
print("Class Distribution before RUS:")
print(y_train.value_counts())

# Check class distribution after RUS
print("\nClass Distribution after RUS:")
print(pd.Series(y_resampled).value_counts())

# Visualize class distribution before and after RUS
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

# Before RUS
axes[0].bar(y_train.value_counts().index, y_train.value_counts().values, color=['blue', 'green'])
axes[0].set_title('Class Distribution before RUS')
axes[0].set_xlabel('Class')
axes[0].set_ylabel('Count')

# After RUS
axes[1].bar(pd.Series(y_resampled).value_counts().index, pd.Series(y_resampled).value_counts().values, color=['blue', 'green'])
axes[1].set_title('Class Distribution after RUS')
axes[1].set_xlabel('Class')
axes[1].set_ylabel('Count')

plt.tight_layout()
plt.show()

In [None]:
import xgboost as xgb
from xgboost import XGBClassifier

In [None]:
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_resampled, y_resampled)

In [None]:
# Predictions on the test set
y_pred = xgb_model.predict(X_test)

In [None]:
# Model Evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
# Confusion Matrix

conf_matrix = confusion_matrix(y_test, y_pred)

# Create a heatmap using seaborn
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Non-Fraud', 'Fraud'],
            yticklabels=['Non-Fraud', 'Fraud'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


In [None]:

from sklearn.neighbors import KNeighborsClassifier
# KNN Model without Feature Scaling
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

# Predictions on the test set
y_pred = knn_model.predict(X_test)

# Model Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
# Confusion Matrix

conf_matrix = confusion_matrix(y_test, y_pred)

# Create a heatmap using seaborn
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Non-Fraud', 'Fraud'],
            yticklabels=['Non-Fraud', 'Fraud'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# KNN Model on RUS data
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_resampled, y_resampled)

# Predictions on the test set
y_pred = knn_model.predict(X_test)

# Model Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

# Confusion Matrix

conf_matrix = confusion_matrix(y_test, y_pred)

# Create a heatmap using seaborn
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Non-Fraud', 'Fraud'],
            yticklabels=['Non-Fraud', 'Fraud'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the resampled data
rf_model.fit(X_resampled, y_resampled)

# Predictions on the test set
y_pred = rf_model.predict(X_test)

# Model Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Create a heatmap using seaborn
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Non-Fraud', 'Fraud'],
            yticklabels=['Non-Fraud', 'Fraud'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


In [None]:
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Assuming X contains your dataset

# Step 1: Use k-means to partition the data into clusters
n_clusters = 5  # Adjust the number of clusters as needed
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(X)

# Step 2: Identify clusters that are likely to contain outliers
cluster_centers = kmeans.cluster_centers_
cluster_labels = kmeans.labels_

# Calculate the distances of instances from cluster centers
neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(cluster_centers)
distances, _ = neigh.kneighbors(X)

# Determine a threshold for outlier detection (e.g., distance above a certain percentile)
threshold = np.percentile(distances, 95)

# Step 3: Detect outliers based on distance threshold
outlier_indices = np.where(distances > threshold)[0]

# Print the indices of outliers
print("Indices of outliers:", outlier_indices)

# Convert outlier indices to a binary array indicating outliers (1) and inliers (0)
y_pred_binary = np.zeros(len(X))
y_pred_binary[outlier_indices] = 1

# Calculate evaluation metrics
precision = precision_score(y, y_pred_binary)
recall = recall_score(y, y_pred_binary)
f1 = f1_score(y, y_pred_binary)
accuracy = accuracy_score(y, y_pred_binary)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)

In [None]:
pip install pyod
from pyod.models.cblof import CBLOF


# Assuming X contains your dataset and y contains true labels for outliers

# Instantiate the CBLOF model
cblof_model = CBLOF(contamination=0.1)  # Adjust contamination parameter as needed

# Fit the model
cblof_model.fit(X)

# Predict outliers
y_pred_binary = cblof_model.predict(X)

# Calculate evaluation metrics
precision = precision_score(y, y_pred_binary)
recall = recall_score(y, y_pred_binary)
f1 = f1_score(y, y_pred_binary)
accuracy = accuracy_score(y, y_pred_binary)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)


In [None]:
from pyod.models.abod import ABOD


# Assuming X contains your dataset and y contains true labels for outliers

# Instantiate the ABOD model
abod_model = ABOD(contamination=0.1)  # Adjust contamination parameter as needed

# Fit the model
abod_model.fit(X)

# Predict outliers
y_pred_binary = abod_model.predict(X)

# Calculate evaluation metrics
precision = precision_score(y, y_pred_binary)
recall = recall_score(y, y_pred_binary)
f1 = f1_score(y, y_pred_binary)
accuracy = accuracy_score(y, y_pred_binary)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)


In [None]:
from pyod.models.hbos import HBOS


# Assuming X contains your dataset and y contains true labels for outliers

# Instantiate the HBOS model
hbos_model = HBOS(contamination=0.1)  # Adjust contamination parameter as needed

# Fit the model
hbos_model.fit(X)

# Predict outliers
y_pred_binary = hbos_model.predict(X)

# Calculate evaluation metrics
precision = precision_score(y, y_pred_binary)
recall = recall_score(y, y_pred_binary)
f1 = f1_score(y, y_pred_binary)
accuracy = accuracy_score(y, y_pred_binary)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)


In [None]:
from sklearn.ensemble import IsolationForest


# Assuming X contains your dataset and y contains true labels for outliers

# Instantiate the Isolation Forest model
isolation_forest_model = IsolationForest(contamination=0.1)  # Adjust contamination parameter as needed

# Fit the model
isolation_forest_model.fit(X)

# Predict outliers
y_pred_binary = isolation_forest_model.predict(X)
y_pred_binary[y_pred_binary == 1] = 0  # Inliers
y_pred_binary[y_pred_binary == -1] = 1  # Outliers

# Calculate evaluation metrics
precision = precision_score(y, y_pred_binary)
recall = recall_score(y, y_pred_binary)
f1 = f1_score(y, y_pred_binary)
accuracy = accuracy_score(y, y_pred_binary)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y, y_pred_binary)

# Plot confusion matrix
plt.figure(figsize=(4, 3))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Normal', 'Outlier'],
            yticklabels=['Normal', 'Outlier'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()



In [None]:
pip install keras

In [None]:
pip install tensorflow

In [None]:
from keras.layers import Input, Dense
from keras.models import Model

# Assuming X contains your dataset and y contains true labels for outliers

# Define the autoencoder architecture
input_dim = X.shape[1]  # Number of features
encoding_dim = 32  # Number of neurons in the hidden layer

input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation='relu')(input_layer)
decoder = Dense(input_dim, activation='sigmoid')(encoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mse')

# Fit the model
autoencoder.fit(X, X, epochs=10, batch_size=32, shuffle=True, validation_split=0.1)

# Use the autoencoder to reconstruct the data
reconstructed_X = autoencoder.predict(X)

# Calculate reconstruction errors
mse = np.mean(np.square(X - reconstructed_X), axis=1)

# Define a threshold for outlier detection
threshold = np.percentile(mse, 95)

# Predict outliers based on the reconstruction errors
y_pred_binary = (mse > threshold).astype(int)

# Calculate evaluation metrics
precision = precision_score(y, y_pred_binary)
recall = recall_score(y, y_pred_binary)
f1 = f1_score(y, y_pred_binary)
accuracy = accuracy_score(y, y_pred_binary)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y, y_pred_binary)

# Plot confusion matrix
plt.figure(figsize=(4, 3))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=['Normal', 'Outlier'],
            yticklabels=['Normal', 'Outlier'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


In [None]:
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Generator and Discriminator Networks
def make_generator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(100,)),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(X.shape[1])
    ])
    return model

def make_discriminator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu', input_shape=(X.shape[1],)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Define the GAN Model
generator = make_generator_model()
discriminator = make_discriminator_model()

def make_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = tf.keras.Input(shape=(100,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    gan = tf.keras.Model(gan_input, gan_output)
    gan.compile(optimizer='adam', loss='binary_crossentropy')
    return gan

gan = make_gan(generator, discriminator)

# Compile the Generator and Discriminator models
generator.compile(optimizer='adam', loss='binary_crossentropy')
discriminator.compile(optimizer='adam', loss='binary_crossentropy')

# Train the GAN
batch_size = 32
epochs = 100

for epoch in range(epochs):
    noise = np.random.normal(0, 1, (batch_size, 100))
    generated_data = generator.predict(noise)

    real_data = X_scaled[np.random.randint(0, X_scaled.shape[0], batch_size)]

    combined_data = np.concatenate([real_data, generated_data])
    labels = np.concatenate([np.ones((batch_size, 1)), np.zeros((batch_size, 1))])

    discriminator_loss = discriminator.train_on_batch(combined_data, labels)

    noise = np.random.normal(0, 1, (batch_size, 100))
    misleading_targets = np.zeros((batch_size, 1))

    gan_loss = gan.train_on_batch(noise, misleading_targets)

# Generate Synthetic Data
synthetic_data = generator.predict(np.random.normal(0, 1, (1000, 100)))

# Identify Outliers (Example: comparing real data with synthetic data)
# You can use a threshold or other anomaly detection techniques to identify outliers

# Example evaluation metrics
y_true = np.zeros((X_scaled.shape[0], 1))  # Assuming all data is normal
y_pred = np.zeros((X_scaled.shape[0], 1))  # Assuming all data is normal

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping


# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)

# Define the Generator and Discriminator Networks with dropout
def make_generator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(100,)),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(X.shape[1])
    ])
    return model

def make_discriminator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu', input_shape=(X.shape[1],)),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Define the GAN Model
generator = make_generator_model()
discriminator = make_discriminator_model()

def make_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = tf.keras.Input(shape=(100,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    gan = tf.keras.Model(gan_input, gan_output)
    gan.compile(optimizer='adam', loss='binary_crossentropy')
    return gan

gan = make_gan(generator, discriminator)

# Compile the Generator and Discriminator models
generator.compile(optimizer='adam', loss='binary_crossentropy')
discriminator.compile(optimizer='adam', loss='binary_crossentropy')

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the GAN with early stopping
batch_size = 32
epochs = 100

history = gan.fit(np.random.normal(0, 1, (X_train.shape[0], 100)), np.ones((X_train.shape[0], 1)),
                  validation_data=(np.random.normal(0, 1, (X_test.shape[0], 100)), np.ones((X_test.shape[0], 1))),
                  epochs=epochs, batch_size=batch_size, callbacks=[early_stopping])

# Generate Synthetic Data
synthetic_data = generator.predict(np.random.normal(0, 1, (X_scaled.shape[0], 100)))

# Identify Outliers (Example: comparing real data with synthetic data)
# You can use a threshold or other anomaly detection techniques to identify outliers

# Example evaluation metrics
y_true = np.zeros((X_scaled.shape[0], 1))  # Assuming all data is normal
y_pred = np.zeros((X_scaled.shape[0], 1))  # Assuming all data is normal

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)



In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping


# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply data augmentation
X_augmented = X_scaled + np.random.normal(0, 0.1, size=X_scaled.shape)

# Combine original and augmented data
X_combined = np.concatenate([X_scaled, X_augmented])

# Train/test split
X_train, X_test = train_test_split(X_combined, test_size=0.2, random_state=42)

# Define the Generator and Discriminator Networks with dropout
def make_generator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(100,)),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(X.shape[1])
    ])
    return model

def make_discriminator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu', input_shape=(X.shape[1],)),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),  # Dropout layer
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Define the GAN Model with Wasserstein loss
generator = make_generator_model()
discriminator = make_discriminator_model()

def make_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = tf.keras.Input(shape=(100,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    gan = tf.keras.Model(gan_input, gan_output)
    gan.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.00005), loss='binary_crossentropy')
    return gan

gan = make_gan(generator, discriminator)

# Compile the Generator and Discriminator models
generator.compile(optimizer='adam', loss='binary_crossentropy')
discriminator.compile(optimizer='adam', loss='binary_crossentropy')

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the GAN with early stopping and augmented data
batch_size = 32
epochs = 100

history = gan.fit(np.random.normal(0, 1, (X_train.shape[0], 100)), np.ones((X_train.shape[0], 1)),
                  validation_data=(np.random.normal(0, 1, (X_test.shape[0], 100)), np.ones((X_test.shape[0], 1))),
                  epochs=epochs, batch_size=batch_size, callbacks=[early_stopping])

# Generate Synthetic Data
synthetic_data = generator.predict(np.random.normal(0, 1, (X_scaled.shape[0], 100)))

# Identify Outliers (Example: comparing real data with synthetic data)
# You can use a threshold or other anomaly detection techniques to identify outliers

# Example evaluation metrics
y_true = np.zeros((X_scaled.shape[0], 1))  # Assuming all data is normal
y_pred = np.zeros((X_scaled.shape[0], 1))  # Assuming all data is normal

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)

print("Evaluation Metrics:")
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Define Generator and Discriminator architectures
def make_generator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(100,)),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(X.shape[1], activation='sigmoid')  # Output layer
    ])
    return model

def make_discriminator_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation='relu', input_shape=(X.shape[1],)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer
    ])
    return model

# Define MO-GAAL model
def make_mogaal_model(generator, discriminator):
    gan_input = tf.keras.Input(shape=(100,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    gan = tf.keras.Model(gan_input, gan_output)
    gan.compile(optimizer='adam', loss='binary_crossentropy')
    return gan

# Define active learning strategy
class ActiveLearningStrategy:
    def __init__(self, X_unlabeled):
        self.X_unlabeled = X_unlabeled

    def select_samples(self, num_samples):
        # Implement your active learning strategy here
        # For example, select samples with highest uncertainty
        selected_indices = np.random.choice(len(self.X_unlabeled), num_samples, replace=False)
        return self.X_unlabeled[selected_indices]

# Load data (labeled and unlabeled)
paysim_data = pd.read_csv("data1.csv")
X_labeled = paysim_data.drop(['isFraud', 'nameOrig', 'nameDest', 'type'], axis=1).values
y_labeled = paysim_data['isFraud'].values
X_unlabeled = paysim_data.drop(['isFraud', 'nameOrig', 'nameDest', 'type'], axis=1).values

# Train/test split for labeled data (for evaluation)
X_train_labeled, X_test_labeled, y_train_labeled, y_test_labeled = train_test_split(X_labeled, y_labeled, test_size=0.2, random_state=42)

# Define models
generator = make_generator_model()
discriminator = make_discriminator_model()
mogaal_model = make_mogaal_model(generator, discriminator)

# Active learning strategy
active_learning_strategy = ActiveLearningStrategy(X_unlabeled)

# Training loop
num_epochs = 100
batch_size = 32
num_active_samples = 100  # Number of samples to label at each iteration

for epoch in range(num_epochs):
    # Active learning: select informative samples from unlabeled data
    selected_samples = active_learning_strategy.select_samples(num_active_samples)
    # Label selected samples (you need to implement this)
    # X_labeled, y_labeled = ...
    # Update labeled and unlabeled data sets
    # X_unlabeled = ...
    # Train GAN with labeled and unlabeled data
    mogaal_model.train_on_batch(X_labeled, y_labeled)

# Generate synthetic data samples
noise_samples = np.random.normal(0, 1, (X_train_labeled.shape[0], 100))
synthetic_data = generator.predict(noise_samples)

# Evaluate performance (you need to implement this)
# For example, you can use synthetic data for anomaly detection and evaluate accuracy
# accuracy = ...

# Print evaluation metrics
# print("Accuracy:", accuracy)
