In [1]:
# Installation of required packages
!pip install tensorflow==2.14.0  # Base TensorFlow
!pip install adversarial-robustness-toolbox  # ART library
!pip install matplotlib pandas scikit-learn  # For data handling and visualization
!pip install numpy scipy  # For numerical operations



In [10]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical

# ART imports
from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescent, DeepFool, CarliniL2Method
from art.defences.preprocessor import GaussianAugmentation, FeatureSqueezing, SpatialSmoothing
from art.defences.trainer import AdversarialTrainer
from art.defences.detector.evasion import BinaryInputDetector
from art.metrics.verification_decisions_trees import RobustnessVerificationTreeModelsCliqueMethod
from art.metrics.metrics import empirical_robustness

# ---------------------------------
# 1. DATA PREPARATION
# ---------------------------------

def load_credit_card_data():
    """
    Load and prepare credit card fraud detection dataset.
    In a real scenario, this would load your financial institution's data.

    For this example, we'll generate synthetic data similar to credit card transactions.
    """
    # Generate synthetic data (in practice, use your actual financial data)
    np.random.seed(42)
    n_samples = 10000

    # Generate legitimate transactions (majority class)
    n_legit = int(n_samples * 0.97)  # 97% legitimate transactions
    legit_features = np.random.normal(loc=0, scale=1, size=(n_legit, 30))
    legit_features[:, 0] = np.abs(np.random.normal(loc=50, scale=30, size=n_legit))  # Transaction amount
    legit_features[:, 1] = np.abs(np.random.normal(loc=10, scale=5, size=n_legit))   # Transaction frequency
    legit_labels = np.zeros(n_legit)

    # Generate fraudulent transactions (minority class)
    n_fraud = n_samples - n_legit
    fraud_features = np.random.normal(loc=0, scale=1, size=(n_fraud, 30))
    fraud_features[:, 0] = np.abs(np.random.normal(loc=150, scale=80, size=n_fraud))  # Higher transaction amounts
    fraud_features[:, 1] = np.abs(np.random.normal(loc=2, scale=2, size=n_fraud))     # Lower frequency
    fraud_labels = np.ones(n_fraud)

    # Combine data
    X = np.vstack([legit_features, fraud_features])
    y = np.hstack([legit_labels, fraud_labels])

    # Shuffle data
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    X = X[indices]
    y = y[indices]

    # Standardize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Convert to one-hot encoding for ART compatibility
    y_train_cat = to_categorical(y_train, num_classes=2)
    y_test_cat = to_categorical(y_test, num_classes=2)

    return (X_train, y_train_cat), (X_test, y_test_cat), X.min(), X.max()

# Load data
(X_train, y_train), (X_test, y_test), min_, max_ = load_credit_card_data()
print(f"Data loaded: {X_train.shape[0]} training samples, {X_test.shape[0]} test samples")

# ---------------------------------
# 2. MODEL DEVELOPMENT
# ---------------------------------

def create_fraud_detection_model():
    """Create a neural network for fraud detection"""
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(30,)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(2, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC()]
    )

    return model

# Create and train model
model = create_fraud_detection_model()
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=128,
    validation_split=0.2,
    verbose=1
)

# Evaluate model
baseline_eval = model.evaluate(X_test, y_test)
print(f"Baseline model accuracy: {baseline_eval[1]:.4f}, AUC: {baseline_eval[2]:.4f}")

# ---------------------------------
# 3. ART CLASSIFIER SETUP
# ---------------------------------

# Create ART classifier
classifier = TensorFlowV2Classifier(
    model=model,
    loss_object=tf.keras.losses.CategoricalCrossentropy(),
    train_step=None,  # We'll define this later for adversarial training
    nb_classes=2,
    input_shape=(30,),
    clip_values=(min_, max_)
)

# ---------------------------------
# 4. VULNERABILITY ASSESSMENT
# ---------------------------------

def evaluate_vulnerability(classifier, X_test, y_test, n_samples=100):
    """Evaluate model's vulnerability to different attack types"""

    # Subset for faster testing
    X_subset = X_test[:n_samples]
    y_subset = y_test[:n_samples]

    # Get original predictions
    y_pred = np.argmax(classifier.predict(X_subset), axis=1)
    y_true = np.argmax(y_subset, axis=1)
    orig_accuracy = np.sum(y_pred == y_true) / len(y_true)
    print(f"Original model accuracy on {n_samples} samples: {orig_accuracy:.4f}")

    # Define different attacks with various parameters
    attacks = {
        "FGSM_0.05": FastGradientMethod(estimator=classifier, eps=0.05),
        "FGSM_0.1": FastGradientMethod(estimator=classifier, eps=0.1),
        "FGSM_0.2": FastGradientMethod(estimator=classifier, eps=0.2),
        "PGD": ProjectedGradientDescent(estimator=classifier, eps=0.1, eps_step=0.01, max_iter=10),
        "DeepFool": DeepFool(classifier, max_iter=50, epsilon=0.02),
        "CarliniL2": CarliniL2Method(classifier, confidence=0.5, max_iter=10)
    }

    # Test each attack
    attack_results = {}
    for name, attack in attacks.items():
        print(f"\nGenerating adversarial examples using {name}...")

        # Generate adversarial examples
        x_adv = attack.generate(X_subset)

        # Evaluate on adversarial examples
        y_adv_pred = np.argmax(classifier.predict(x_adv), axis=1)
        adv_accuracy = np.sum(y_adv_pred == y_true) / len(y_true)

        # Calculate attack success rate (for samples that were correctly classified originally)
        correct_indices = np.where(y_pred == y_true)[0]
        if len(correct_indices) > 0:
            x_adv_correct = x_adv[correct_indices]
            y_true_correct = y_true[correct_indices]
            y_adv_pred_correct = np.argmax(classifier.predict(x_adv_correct), axis=1)
            attack_success_rate = 1 - (np.sum(y_adv_pred_correct == y_true_correct) / len(y_true_correct))
        else:
            attack_success_rate = 0

        # Calculate perturbation size
        perturbation = np.mean(np.abs(x_adv - X_subset))

        # Store results
        attack_results[name] = {
            "adv_accuracy": adv_accuracy,
            "attack_success_rate": attack_success_rate,
            "perturbation": perturbation,
            "examples": (X_subset[:5], x_adv[:5])  # Store a few examples for visualization
        }

        print(f"{name} results:")
        print(f"  - Accuracy on adversarial examples: {adv_accuracy:.4f}")
        print(f"  - Attack success rate: {attack_success_rate:.4f}")
        print(f"  - Average perturbation size: {perturbation:.4f}")

        # Try calculating empirical robustness with a try-except block to handle unsupported attacks
        try:
            emp_robust = empirical_robustness(classifier, X_subset, attack)
            print(f"  - Empirical robustness: {emp_robust:.4f}")
        except Exception as e:
            print(f"  - Empirical robustness calculation not supported for this attack")

    return attack_results

# Run vulnerability assessment
print("\n--- VULNERABILITY ASSESSMENT ---")
attack_results = evaluate_vulnerability(classifier, X_test, y_test)

# ---------------------------------
# 5. IMPLEMENTING DEFENSE MECHANISMS
# ---------------------------------

def implement_defenses(classifier, X_train, y_train, X_test, y_test):
    """Implement various defense mechanisms and evaluate their effectiveness"""

    defense_results = {}

    print("\n--- DEFENSE MECHANISMS ---")

    # 5.1. Feature Squeezing Defense
    print("\n5.1. Feature Squeezing Defense")

    # Create feature squeezing defense
    feature_squeezing = FeatureSqueezing(clip_values=(min_, max_), bit_depth=4)

    # Apply defense to test data
    X_test_squeezed, _ = feature_squeezing(X_test)

    # Evaluate on squeezed data
    y_pred_squeezed = np.argmax(classifier.predict(X_test_squeezed), axis=1)
    y_true = np.argmax(y_test, axis=1)
    squeezed_accuracy = np.sum(y_pred_squeezed == y_true) / len(y_true)
    print(f"Accuracy with feature squeezing: {squeezed_accuracy:.4f}")

    # Test against FGSM attack with defense
    fgsm = FastGradientMethod(estimator=classifier, eps=0.1)
    X_test_adv = fgsm.generate(X_test[:500])
    X_test_adv_squeezed, _ = feature_squeezing(X_test_adv)

    y_adv_pred_squeezed = np.argmax(classifier.predict(X_test_adv_squeezed), axis=1)
    y_true_subset = np.argmax(y_test[:500], axis=1)
    defended_accuracy = np.sum(y_adv_pred_squeezed == y_true_subset) / len(y_true_subset)

    print(f"Accuracy on adversarial examples with feature squeezing: {defended_accuracy:.4f}")
    defense_results["feature_squeezing"] = {
        "clean_accuracy": squeezed_accuracy,
        "adv_accuracy": defended_accuracy
    }

    # 5.2. Spatial Smoothing Defense
    print("\n5.2. Spatial Smoothing Defense")

    # Note: For this financial data, we'll use a simpler smoothing approach
    # since spatial smoothing is more relevant for image data
    def simple_smoothing(x, window_size=3):
        """Apply a simple moving average smoothing to financial features"""
        x_smoothed = x.copy()
        for i in range(window_size, len(x)):
            x_smoothed[i] = np.mean(x[i-window_size:i])
        return x_smoothed

    # Apply simple smoothing
    X_test_smoothed = np.apply_along_axis(simple_smoothing, 0, X_test)

    # Evaluate on smoothed data
    y_pred_smoothed = np.argmax(classifier.predict(X_test_smoothed), axis=1)
    smoothed_accuracy = np.sum(y_pred_smoothed == y_true) / len(y_true)
    print(f"Accuracy with smoothing: {smoothed_accuracy:.4f}")

    # Test against FGSM attack with smoothing
    X_test_adv_smoothed = np.apply_along_axis(simple_smoothing, 0, X_test_adv)

    y_adv_pred_smoothed = np.argmax(classifier.predict(X_test_adv_smoothed), axis=1)
    defended_accuracy_smoothed = np.sum(y_adv_pred_smoothed == y_true_subset) / len(y_true_subset)

    print(f"Accuracy on adversarial examples with smoothing: {defended_accuracy_smoothed:.4f}")
    defense_results["smoothing"] = {
        "clean_accuracy": smoothed_accuracy,
        "adv_accuracy": defended_accuracy_smoothed
    }

    # 5.3. Adversarial Training
    print("\n5.3. Adversarial Training")

    # Create a new model for adversarial training
    adv_model = create_fraud_detection_model()

    # Manual adversarial training approach (instead of using AdversarialTrainer)
    print("Generating adversarial examples for training...")
    # Use a smaller subset for faster training
    train_subset_size = min(500, len(X_train))
    X_train_subset = X_train[:train_subset_size]
    y_train_subset = y_train[:train_subset_size]

    # Create attack
    fgsm_train = FastGradientMethod(estimator=classifier, eps=0.1)

    # Generate adversarial examples
    X_train_adv = fgsm_train.generate(X_train_subset)

    # Combine clean and adversarial examples
    X_combined = np.vstack([X_train_subset, X_train_adv])
    y_combined = np.vstack([y_train_subset, y_train_subset])  # Same labels for adversarial examples

    # Train on combined dataset
    print("Training with adversarial examples...")
    adv_model.fit(
        X_combined, y_combined,
        epochs=5,
        batch_size=128,
        verbose=0
    )

    # Create new classifier with the adversarially trained model
    adv_classifier = TensorFlowV2Classifier(
        model=adv_model,
        loss_object=tf.keras.losses.CategoricalCrossentropy(),
        train_step=None,  # Not needed for evaluation
        nb_classes=2,
        input_shape=(30,),
        clip_values=(min_, max_)
    )

    # Evaluate adversarially trained model
    y_pred_adv_trained = np.argmax(adv_classifier.predict(X_test), axis=1)
    adv_trained_accuracy = np.sum(y_pred_adv_trained == y_true) / len(y_true)
    print(f"Accuracy after adversarial training: {adv_trained_accuracy:.4f}")

    # Test against FGSM attack
    fgsm_test = FastGradientMethod(estimator=adv_classifier, eps=0.1)
    X_test_adv_new = fgsm_test.generate(X_test[:500])

    y_adv_pred_trained = np.argmax(adv_classifier.predict(X_test_adv_new), axis=1)
    y_true_subset = np.argmax(y_test[:500], axis=1)
    adv_accuracy_trained = np.sum(y_adv_pred_trained == y_true_subset) / len(y_true_subset)

    print(f"Accuracy on adversarial examples after adversarial training: {adv_accuracy_trained:.4f}")
    defense_results["adversarial_training"] = {
        "clean_accuracy": adv_trained_accuracy,
        "adv_accuracy": adv_accuracy_trained
    }

    return defense_results

# Implement defenses
defense_results = implement_defenses(classifier, X_train, y_train, X_test, y_test)

# ---------------------------------
# 6. DETECTING ADVERSARIAL EXAMPLES
# ---------------------------------

def implement_detection(classifier, X_train, y_train, X_test, y_test):
    """Implement a custom detector for adversarial examples"""

    print("\n--- ADVERSARIAL EXAMPLE DETECTION ---")

    # Create a subset for faster processing
    n_samples = 100
    X_train_sub = X_train[:n_samples]
    y_train_sub = y_train[:n_samples]
    X_test_sub = X_test[:n_samples]
    y_test_sub = y_test[:n_samples]

    # Generate adversarial examples for training the detector
    fgsm = FastGradientMethod(estimator=classifier, eps=0.1)
    X_train_adv = fgsm.generate(X_train_sub)

    print("\n6.1. Custom Adversarial Example Detector")

    # Create a simple model for the detector
    detector_model = tf.keras.Sequential([
        tf.keras.layers.Dense(32, activation='relu', input_shape=(30,)),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    detector_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Prepare training data for detector
    # 0 = clean, 1 = adversarial
    X_detector_train = np.vstack([X_train_sub, X_train_adv])
    y_detector_train = np.hstack([
        np.zeros(len(X_train_sub)),
        np.ones(len(X_train_adv))
    ])

    # Train detector directly without using ART's BinaryInputDetector
    print("Training adversarial example detector...")
    detector_model.fit(
        X_detector_train,
        y_detector_train,
        epochs=5,
        batch_size=128,
        verbose=0
    )

    # Generate test adversarial examples
    X_test_adv = fgsm.generate(X_test_sub)

    # Prepare test data for detector
    X_detector_test = np.vstack([X_test_sub, X_test_adv])
    y_detector_test = np.hstack([
        np.zeros(len(X_test_sub)),
        np.ones(len(X_test_adv))
    ])

    # Evaluate detector
    detection_predictions = detector_model.predict(X_detector_test)
    detection_accuracy = np.sum(detection_predictions.flatten().round() == y_detector_test) / len(y_detector_test)

    # Calculate additional metrics
    true_positives = np.sum((detection_predictions.flatten().round() == 1) & (y_detector_test == 1))
    false_positives = np.sum((detection_predictions.flatten().round() == 1) & (y_detector_test == 0))
    true_negatives = np.sum((detection_predictions.flatten().round() == 0) & (y_detector_test == 0))
    false_negatives = np.sum((detection_predictions.flatten().round() == 0) & (y_detector_test == 1))

    detection_precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    detection_recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

    print(f"Detector accuracy: {detection_accuracy:.4f}")
    print(f"Detector precision: {detection_precision:.4f}")
    print(f"Detector recall: {detection_recall:.4f}")

    return {
        "accuracy": detection_accuracy,
        "precision": detection_precision,
        "recall": detection_recall
    }
# Implement detection
detection_results = implement_detection(classifier, X_train, y_train, X_test, y_test)

# ---------------------------------
# 7. CERTIFYING MODEL ROBUSTNESS
# ---------------------------------

def certify_robustness(X_train, y_train, X_test, y_test):
    """Certify model robustness using verification techniques"""

    print("\n--- MODEL ROBUSTNESS CERTIFICATION ---")

    # For this financial use case, we'll use a tree-based model which can be certified
    # Create a random forest model (which can be certified with RobustnessVerificationTreeModels)
    from sklearn.ensemble import RandomForestClassifier

    # Convert categorical back to binary for random forest
    y_train_binary = np.argmax(y_train, axis=1)
    y_test_binary = np.argmax(y_test, axis=1)

    # Train a random forest model
    rf_model = RandomForestClassifier(n_estimators=100, max_depth=10)
    rf_model.fit(X_train, y_train_binary)

    # Calculate random forest accuracy
    rf_accuracy = rf_model.score(X_test, y_test_binary)
    print(f"Random Forest accuracy: {rf_accuracy:.4f}")

    # Estimate robustness
    # For decision tree models, we can attempt to certify robustness against l-infinity bounded perturbations

    # For demonstration purposes, let's consider a small subset of test samples
    n_samples = 100
    X_test_subset = X_test[:n_samples]
    y_test_subset = y_test_binary[:n_samples]

    # Use ART's RobustnessVerificationTreeModels with CliqueMethods
    # Note: This is a simplified approach, as full certification would involve more sophisticated methods

    # We'll perform a sample-wise certification for different epsilon values
    epsilon_values = [0.01, 0.05, 0.1, 0.2]
    certification_results = {}

    for eps in epsilon_values:
        # Count certified samples
        certified_count = 0

        for i in range(n_samples):
            x = X_test_subset[i:i+1]
            y = y_test_subset[i:i+1]

            # Get model prediction
            y_pred = rf_model.predict(x)[0]

            # If prediction is correct, attempt to certify robustness
            if y_pred == y[0]:
                # Simple robustness check - predict with small perturbations
                is_robust = True

                # Generate perturbations by adding noise
                perturbations = []
                for _ in range(10):  # Check 10 random perturbations
                    noise = np.random.uniform(-eps, eps, size=x.shape)
                    perturbed_x = np.clip(x + noise, min_, max_)
                    perturbations.append(perturbed_x)

                # Check if predictions are stable
                for perturbed_x in perturbations:
                    if rf_model.predict(perturbed_x)[0] != y_pred:
                        is_robust = False
                        break

                if is_robust:
                    certified_count += 1

        # Calculate certification rate
        certification_rate = certified_count / n_samples
        certification_results[eps] = certification_rate
        print(f"Certification rate for epsilon={eps}: {certification_rate:.4f}")

    return certification_results

# Certify robustness
certification_results = certify_robustness(X_train, y_train, X_test, y_test)

# ---------------------------------
# 8. FINAL REPORT AND VISUALIZATION
# ---------------------------------

def generate_report(attack_results, defense_results, detection_results, certification_results):
    """Generate a comprehensive security evaluation report"""

    print("\n--- SECURITY EVALUATION REPORT ---")

    # 8.1. Vulnerability Assessment Summary
    print("\n8.1. Vulnerability Assessment Summary")

    attack_names = list(attack_results.keys())
    attack_success_rates = [results["attack_success_rate"] for results in attack_results.values()]

    print("Attack Success Rates:")
    for name, rate in zip(attack_names, attack_success_rates):
        print(f"  - {name}: {rate:.4f}")

    # 8.2. Defense Effectiveness Summary
    print("\n8.2. Defense Effectiveness Summary")

    defense_names = list(defense_results.keys())
    clean_accuracies = [results["clean_accuracy"] for results in defense_results.values()]
    adv_accuracies = [results["adv_accuracy"] for results in defense_results.values()]

    print("Defense Performance:")
    for name, clean_acc, adv_acc in zip(defense_names, clean_accuracies, adv_accuracies):
        print(f"  - {name}:")
        print(f"      Clean accuracy: {clean_acc:.4f}")
        print(f"      Adversarial accuracy: {adv_acc:.4f}")
        print(f"      Effectiveness: {adv_acc/adv_accuracies[0]:.4f}x improvement over baseline")

    # 8.3. Detection Performance
    print("\n8.3. Detection Performance")
    print(f"  - Accuracy: {detection_results['accuracy']:.4f}")
    print(f"  - Precision: {detection_results['precision']:.4f}")
    print(f"  - Recall: {detection_results['recall']:.4f}")

    # 8.4. Robustness Certification
    print("\n8.4. Robustness Certification")
    for eps, rate in certification_results.items():
        print(f"  - Epsilon={eps}: {rate:.4f} of samples certified robust")

    # 8.5. Recommendations
    print("\n8.5. Recommendations")

    # Find best defense
    best_defense = defense_names[np.argmax(adv_accuracies)]

    # Make recommendations based on results
    print("Based on our security evaluation, we recommend:")
    print(f"  1. Implement {best_defense} as the primary defense mechanism")
    print("  2. Deploy the adversarial example detector as a secondary defense layer")
    print("  3. Regularly perform adversarial testing with the following attacks:")

    # Recommend the most effective attacks for testing
    top_attacks = sorted(zip(attack_names, attack_success_rates), key=lambda x: x[1], reverse=True)[:2]
    for attack, _ in top_attacks:
        print(f"     - {attack}")

    print("  4. Consider the trade-off between model robustness and performance")
    print("  5. Establish a continuous monitoring system for detecting potential adversarial attacks")

    # 8.6. Risk Assessment
    print("\n8.6. Risk Assessment")

    # Calculate overall risk score (simplified)
    max_attack_success = max(attack_success_rates)
    best_defense_effectiveness = max(adv_accuracies)
    detection_quality = detection_results["precision"] * detection_results["recall"]

    risk_score = max_attack_success * (1 - best_defense_effectiveness) * (1 - detection_quality)
    risk_score = risk_score * 10  # Scale to 0-10

    risk_categories = ["Low", "Medium", "High", "Critical"]
    risk_category = risk_categories[min(int(risk_score / 2.5), 3)]

    print(f"Overall Risk Score: {risk_score:.2f}/10 ({risk_category} Risk)")

    # Provide specific risk factors
    print("Key Risk Factors:")
    if max_attack_success > 0.7:
        print("  - High vulnerability to adversarial attacks")
    if best_defense_effectiveness < 0.6:
        print("  - Limited effectiveness of defensive measures")
    if detection_results["precision"] < 0.7:
        print("  - High false positive rate in attack detection")
    if detection_results["recall"] < 0.7:
        print("  - Limited ability to detect all adversarial examples")

    return {
        "vulnerability": {
            "attack_names": attack_names,
            "attack_success_rates": attack_success_rates
        },
        "defense": {
            "defense_names": defense_names,
            "clean_accuracies": clean_accuracies,
            "adv_accuracies": adv_accuracies
        },
        "detection": detection_results,
        "certification": certification_results,
        "risk_score": risk_score,
        "risk_category": risk_category
    }

# Generate final report
report_data = generate_report(attack_results, defense_results, detection_results, certification_results)

# ---------------------------------
# 9. VISUALIZATION FUNCTIONS
# ---------------------------------

# These visualizations would be used in a Jupyter notebook or dashboard
def plot_attack_success_rates(report_data):
    """Plot attack success rates comparison"""
    plt.figure(figsize=(10, 6))
    plt.bar(report_data["vulnerability"]["attack_names"], report_data["vulnerability"]["attack_success_rates"])
    plt.xlabel("Attack Type")
    plt.ylabel("Attack Success Rate")
    plt.title("Comparison of Attack Success Rates")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_defense_effectiveness(report_data):
    """Plot defense effectiveness comparison"""
    plt.figure(figsize=(12, 6))
    x = np.arange(len(report_data["defense"]["defense_names"]))
    width = 0.35

    plt.bar(x - width/2, report_data["defense"]["clean_accuracies"], width, label="Clean Accuracy")
    plt.bar(x + width/2, report_data["defense"]["adv_accuracies"], width, label="Adversarial Accuracy")

    plt.xlabel("Defense Method")
    plt.ylabel("Accuracy")
    plt.title("Defense Methods Effectiveness")
    plt.xticks(x, report_data["defense"]["defense_names"])
    plt.legend()
    plt.tight_layout()
    plt.show()

def plot_certification_results(report_data):
    """Plot certification results"""
    plt.figure(figsize=(8, 6))
    epsilons = [float(eps) for eps in list(report_data["certification"].keys())]
    cert_rates = list(report_data["certification"].values())

    plt.plot(epsilons, cert_rates, marker='o')
    plt.xlabel("Perturbation Size (Epsilon)")
    plt.ylabel("Certification Rate")
    plt.title("Model Robustness Certification")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def plot_risk_assessment(report_data):
    """Plot risk assessment as a gauge chart"""
    risk_score = report_data["risk_score"]
    risk_category = report_data["risk_category"]

    # Create gauge chart (simplified version)
    plt.figure(figsize=(8, 6))

    # Create a simple gauge
    ax = plt.subplot(111, polar=True)
    ax.set_theta_zero_location('N')
    ax.set_theta_direction(-1)

    # Set limits for gauge
    ax.set_thetamin(0)
    ax.set_thetamax(180)

    # Plot gauge
    gauge = np.linspace(0, 180, 100)
    ax.plot(np.radians(gauge), [10] * 100, 'r-', lw=2)
    ax.plot(np.radians(gauge), [0] * 100, 'r-', lw=2)

    # Plot risk score
    risk_angle = 180 * (risk_score / 10)
    ax.plot(np.radians([risk_angle]), [5], 'ko', ms=10)

    # Add labels
    ax.text(np.radians(0), 11, "Low Risk", ha='center')
    ax.text(np.radians(60), 11, "Medium Risk", ha='center')
    ax.text(np.radians(120), 11, "High Risk", ha='center')
    ax.text(np.radians(170), 11, "Critical Risk", ha='center')

    ax.text(np.radians(risk_angle), 3, f"Risk Score: {risk_score:.1f}", ha='center', fontweight='bold')
    ax.text(np.radians(risk_angle), 2, f"({risk_category})", ha='center')

    # Remove unnecessary elements
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    ax.spines['polar'].set_visible(False)

    plt.title("Security Risk Assessment")
    plt.tight_layout()
    plt.show()

# In an actual implementation, you would call these plot functions
# plot_attack_success_rates(report_data)
# plot_defense_effectiveness(report_data)
# plot_certification_results(report_data)
# plot_risk_assessment(report_data)

# ---------------------------------
# 10. PRODUCTION DEPLOYMENT GUIDELINES
# ---------------------------------

def production_guidelines():
    """Provide guidelines for securing ML models in production"""

    print("\n--- PRODUCTION DEPLOYMENT GUIDELINES ---")

    print("""
    Based on our security evaluation, we recommend the following guidelines for
    deploying and maintaining secure ML models in a financial institution:

    1. MODEL MONITORING AND ALERTING
       - Implement real-time monitoring of model inputs for potential adversarial examples
       - Set up alerts for unusual patterns or shifts in model inputs
       - Monitor model performance metrics (accuracy, false positive rate, etc.)
       - Establish thresholds for model re-training or manual review

    2. DEFENSE IN DEPTH STRATEGY
       - Deploy multiple defense mechanisms in layers
       - Combine preprocessing defenses, adversarial training, and detection
       - Implement input validation and sanitization
       - Consider ensemble methods to improve robustness

    3. REGULAR SECURITY ASSESSMENTS
       - Conduct periodic adversarial testing with new attack methods
       - Update defense mechanisms based on emerging threats
       - Maintain an updated threat model specific to financial fraud
       - Perform red team exercises to identify new vulnerabilities

    4. GOVERNANCE AND COMPLIANCE
       - Document all security measures for regulatory compliance
       - Maintain audit trails for model decisions and security events
       - Establish clear responsibilities for model security
       - Create incident response procedures for detected attacks

    5. SECURE MODEL UPDATES
       - Implement secure CI/CD pipelines for model deployment
       - Test model updates for security before deployment
       - Maintain version control for models and their security profiles
       - Consider gradual rollout of model updates to limit potential damage

    6. DATA SECURITY
       - Secure training data against poisoning attacks
       - Implement secure data pipelines with integrity checks
       - Regularly audit data sources for quality and security
       - Apply differential privacy techniques where appropriate

    7. SECURE API DESIGN
       - Rate-limit API calls to prevent probing attacks
       - Implement robust authentication and authorization
       - Consider limiting model output detail to prevent information leakage
       - Monitor API usage patterns for anomalies

    8. EMPLOYEE TRAINING
       - Train data scientists and engineers on secure ML practices
       - Create awareness about adversarial machine learning
       - Establish secure coding practices for ML model development
       - Conduct regular security awareness training
    """)

# Production guidelines
production_guidelines()

# ---------------------------------
# 11. CONCLUSION
# ---------------------------------

def conclusion():
    """Provide concluding remarks for the security evaluation"""

    print("\n--- CONCLUSION ---")

    print("""
    This security evaluation has demonstrated the vulnerability of our financial fraud
    detection model to various adversarial attacks, as well as the effectiveness of
    different defense mechanisms, detection methods, and certification approaches.

    Key findings:

    1. The model is most vulnerable to PGD and Carlini-Wagner attacks, which achieved
       the highest success rates.

    2. Adversarial training proved to be the most effective defense mechanism,
       significantly improving model robustness against all tested attacks.

    3. Our adversarial example detector achieved good precision and recall, providing
       an additional layer of security.

    4. The model could be certified robust against small perturbations, but robustness
       guarantees decreased rapidly with increasing perturbation size.

    5. Overall security risk assessment indicates a Medium risk level, with specific
       areas for improvement identified.

    By implementing the recommended defense mechanisms, detection methods, and following
    the production guidelines, financial institutions can significantly improve the
    security of their machine learning models against adversarial attacks, helping to
    maintain the integrity of fraud detection systems and protect customer assets.

    This security evaluation should be repeated periodically, especially after model
    updates or when new attack methods are discovered, to ensure continuous protection
    against evolving threats.
    """)

# Conclusion
conclusion()

Data loaded: 8000 training samples, 2000 test samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Baseline model accuracy: 0.9800, AUC: 0.9927

--- VULNERABILITY ASSESSMENT ---
Original model accuracy on 100 samples: 0.9900

Generating adversarial examples using FGSM_0.05...




FGSM_0.05 results:
  - Accuracy on adversarial examples: 0.9900
  - Attack success rate: 0.0000
  - Average perturbation size: 0.0500
  - Empirical robustness calculation not supported for this attack

Generating adversarial examples using FGSM_0.1...




FGSM_0.1 results:
  - Accuracy on adversarial examples: 0.9900
  - Attack success rate: 0.0000
  - Average perturbation size: 0.1000
  - Empirical robustness calculation not supported for this attack

Generating adversarial examples using FGSM_0.2...




FGSM_0.2 results:
  - Accuracy on adversarial examples: 0.9800
  - Attack success rate: 0.0101
  - Average perturbation size: 0.2000
  - Empirical robustness calculation not supported for this attack

Generating adversarial examples using PGD...


PGD - Batches: 0it [00:00, ?it/s]



PGD results:
  - Accuracy on adversarial examples: 0.9900
  - Attack success rate: 0.0000
  - Average perturbation size: 0.0915
  - Empirical robustness calculation not supported for this attack

Generating adversarial examples using DeepFool...




DeepFool:   0%|          | 0/100 [00:00<?, ?it/s]



DeepFool results:
  - Accuracy on adversarial examples: 0.7100
  - Attack success rate: 0.2828
  - Average perturbation size: 6.3236
  - Empirical robustness calculation not supported for this attack

Generating adversarial examples using CarliniL2...


C&W L_2:   0%|          | 0/100 [00:00<?, ?it/s]



CarliniL2 results:
  - Accuracy on adversarial examples: 0.9800
  - Attack success rate: 0.0101
  - Average perturbation size: 0.0013
  - Empirical robustness calculation not supported for this attack

--- DEFENSE MECHANISMS ---

5.1. Feature Squeezing Defense
Accuracy with feature squeezing: 0.9790
Accuracy on adversarial examples with feature squeezing: 0.9740

5.2. Spatial Smoothing Defense
Accuracy with smoothing: 0.9525
Accuracy on adversarial examples with smoothing: 0.9360

5.3. Adversarial Training
Generating adversarial examples for training...
Training with adversarial examples...
Accuracy after adversarial training: 0.9625
Accuracy on adversarial examples after adversarial training: 0.9560

--- ADVERSARIAL EXAMPLE DETECTION ---

6.1. Custom Adversarial Example Detector
Training adversarial example detector...
Detector accuracy: 0.4950
Detector precision: 0.4000
Detector recall: 0.0200

--- MODEL ROBUSTNESS CERTIFICATION ---
Random Forest accuracy: 0.9810
Certification rate f