In [None]:
# Lab 6: Adversarial Machine Learning for Cybersecurity
# Student Interactive Jupyter Notebook

# CELL 1: Introduction and Setup
"""
Course: AI in Cybersecurity
Lab Duration: 60-75 minutes

INSTRUCTIONS:
1. Run each cell in order (Shift+Enter)
2. Read the markdown cells between code cells for guidance
3. Fill in observations when prompted
4. Don't skip sections - each builds on the previous
"""

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
import warnings
import seaborn as sns
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Configuration
EPSILON = 0.1  # Attack strength
EPOCHS_STANDARD = 10  # Training epochs for standard model
EPOCHS_ROBUST = 5     # Training epochs for robust model

print("✓ All libraries imported successfully!")
print("✓ Random seeds set for reproducible results")
print(f"✓ Configuration set: ε={EPSILON}, Standard epochs={EPOCHS_STANDARD}, Robust epochs={EPOCHS_ROBUST}")


## Part 1: Understanding the Threat

### What are adversarial attacks?
Adversarial attacks are small, carefully crafted modifications to input data that cause 
machine learning models to make incorrect predictions while appearing normal to humans.

### Why do they matter in cybersecurity?
- Malware can be modified to evade detection systems
- Facial recognition can be fooled with special patterns  
- Network intrusion detection can be bypassed
- Any ML-based security system is potentially vulnerable

### Your Task:
Write your hypothesis about what makes ML models vulnerable:

**HYPOTHESIS:** _______________________________________________


In [None]:
# CELL 2: Demo Class Definition
class AdversarialMLDemo:
    def __init__(self):
        self.model = None
        self.robust_model = None
        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
                           'dog', 'frog', 'horse', 'ship', 'truck']
        self.training_history = None
        self.robust_training_history = None
    
    def load_and_preprocess_data(self):
        """Load and preprocess CIFAR-10 dataset"""
        print("Loading CIFAR-10 dataset...")
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        
        # Normalize pixel values to [0, 1]
        self.x_train = x_train.astype('float32') / 255.0
        self.x_test = x_test.astype('float32') / 255.0
        
        # Convert labels to categorical
        self.y_train = to_categorical(y_train, 10)
        self.y_test = to_categorical(y_test, 10)
        
        print(f"✓ Training data shape: {self.x_train.shape}")
        print(f"✓ Test data shape: {self.x_test.shape}")
        
        # Show sample images
        fig, axes = plt.subplots(2, 5, figsize=(12, 5))
        fig.suptitle('Sample CIFAR-10 Images', fontsize=16)
        for i in range(10):
            row, col = i // 5, i % 5
            axes[row, col].imshow(self.x_train[i])
            axes[row, col].set_title(f'{self.class_names[np.argmax(self.y_train[i])]}')
            axes[row, col].axis('off')
        plt.tight_layout()
        plt.show()
    
    def create_simple_cnn(self):
        """Create a simple CNN model"""
        model = models.Sequential([
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.Flatten(),
            layers.Dense(64, activation='relu'),
            layers.Dense(10, activation='softmax')
        ])
        
        model.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])
        return model
    
    def train_model(self, epochs=EPOCHS_STANDARD):
        """Train the standard (vulnerable) model"""
        print("🏗️ Training standard model...")
        print("This represents any ML model used in cybersecurity!")
        
        self.model = self.create_simple_cnn()
        
        history = self.model.fit(self.x_train, self.y_train,
                                epochs=epochs,
                                batch_size=32,
                                validation_data=(self.x_test, self.y_test),
                                verbose=1)
        
        self.training_history = history.history
        
        # Evaluate model and show results clearly
        test_loss, test_acc = self.model.evaluate(self.x_test, self.y_test, verbose=0)
        
        print(f"\n📊 TRAINING COMPLETE!")
        print(f"Final Training Accuracy: {history.history['accuracy'][-1]:.3f}")
        print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.3f}")
        print(f"Test Accuracy: {test_acc:.3f}")
        
        return test_acc  # Return useful info instead of history object

# Initialize demo
demo = AdversarialMLDemo()

# CELL 3: Load Data
print("=" * 60)
print("PART 2: LOADING DATA")
print("=" * 60)

demo.load_and_preprocess_data()

## Part 2: Building a Vulnerable Model

Now we'll create a "normal" CNN that we can attack. This represents any 
ML model used in cybersecurity (malware detector, intrusion detection, etc.).

**Observation:** Look at the sample images above. These are the 10 classes our model will learn to recognize.


In [None]:
# CELL 4: Train Model
print("=" * 60)
print("PART 2: TRAINING VULNERABLE MODEL")
print("=" * 60)

# Train the model (this will take a few minutes)
final_accuracy = demo.train_model(epochs=EPOCHS_STANDARD)

## 🤔 OBSERVATION CHECKPOINT - Part 2

Record your results:
- **Final Training Accuracy:** _______%
- **Final Test Accuracy:** _______%
- **Training time:** _______ minutes

This model will now be our "victim" - let's see how easily we can fool it!


In [None]:
# CELL 5: FGSM Attack Function and Quick Demo
def fgsm_attack(model, image, label, epsilon=EPSILON):
    """Fast Gradient Sign Method attack"""
    image = tf.cast(image, tf.float32)
    
    with tf.GradientTape() as tape:
        tape.watch(image)
        prediction = model(image)
        loss = tf.keras.losses.categorical_crossentropy(label, prediction)
    
    # Get the gradients - this shows model's weaknesses!
    gradient = tape.gradient(loss, image)
    
    # Create the attack by following the gradient
    signed_grad = tf.sign(gradient)
    adversarial_image = image + epsilon * signed_grad
    adversarial_image = tf.clip_by_value(adversarial_image, 0, 1)
    
    return adversarial_image

print("✓ FGSM attack function defined")

# Quick demonstration with one image
print("🎯 Testing the attack function with one image...")

# Get a random test image
test_idx = np.random.randint(0, len(demo.x_test))
test_image = demo.x_test[test_idx:test_idx+1]
test_label = demo.y_test[test_idx:test_idx+1]

# Get original prediction
original_pred = demo.model.predict(test_image, verbose=0)
original_class = np.argmax(original_pred)

# Generate adversarial example
adversarial_image = fgsm_attack(demo.model, test_image, test_label, EPSILON)

# Get adversarial prediction
adv_pred = demo.model.predict(adversarial_image, verbose=0)
adv_class = np.argmax(adv_pred)

# Show the results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

ax1.imshow(test_image[0])
ax1.set_title(f'Original\nPrediction: {demo.class_names[original_class]}\nConfidence: {original_pred[0][original_class]:.3f}')
ax1.axis('off')

ax2.imshow(adversarial_image[0])
ax2.set_title(f'Adversarial\nPrediction: {demo.class_names[adv_class]}\nConfidence: {adv_pred[0][adv_class]:.3f}')
ax2.axis('off')

plt.suptitle('First Adversarial Attack Success!', fontsize=14)
plt.tight_layout()
plt.show()

attack_success = (original_class != adv_class)
print(f"Attack successful: {attack_success}")
print(f"Changed prediction from '{demo.class_names[original_class]}' to '{demo.class_names[adv_class]}'")
print("Now let's see more examples...")

# CELL 6: Generate First Adversarial Examples
def generate_and_show_attacks(demo, num_examples=5, epsilon=EPSILON):
    """Generate and visualize adversarial examples"""
    print(f"🎯 Generating {num_examples} adversarial examples with epsilon={epsilon}...")
    
    # Select random test examples
    indices = np.random.choice(len(demo.x_test), num_examples, replace=False)
    
    fig, axes = plt.subplots(2, num_examples, figsize=(15, 6))
    fig.suptitle('Your First Adversarial Attack Results! 🚨', fontsize=16)
    
    attack_results = []
    
    for i, idx in enumerate(indices):
        # Get original image and label
        original_img = demo.x_test[idx:idx+1]
        true_label = demo.y_test[idx:idx+1]
        true_class = np.argmax(true_label)
        
        # Generate adversarial example
        adv_img = fgsm_attack(demo.model, original_img, true_label, epsilon)
        
        # Get predictions
        orig_pred = demo.model.predict(original_img, verbose=0)
        adv_pred = demo.model.predict(adv_img, verbose=0)
        
        orig_pred_class = np.argmax(orig_pred)
        adv_pred_class = np.argmax(adv_pred)
        
        # Check if attack was successful
        attack_success = (orig_pred_class == true_class and adv_pred_class != true_class)
        attack_results.append({
            'original': demo.class_names[orig_pred_class],
            'adversarial': demo.class_names[adv_pred_class], 
            'true': demo.class_names[true_class],
            'success': attack_success
        })
        
        # Original image
        axes[0, i].imshow(original_img[0])
        axes[0, i].set_title(f'Original\nTrue: {demo.class_names[true_class]}\nPred: {demo.class_names[orig_pred_class]}')
        axes[0, i].axis('off')
        
        # Adversarial image  
        axes[1, i].imshow(adv_img[0])
        success_marker = "✓" if attack_success else "✗"
        axes[1, i].set_title(f'Adversarial {success_marker}\nTrue: {demo.class_names[true_class]}\nPred: {demo.class_names[adv_pred_class]}')
        axes[1, i].axis('off')
        
        # Highlight successful attacks with red border
        if attack_success:
            from matplotlib.patches import Rectangle
            for ax in [axes[0, i], axes[1, i]]:
                ax.add_patch(Rectangle((0, 0), 32, 32, fill=False, edgecolor='red', linewidth=2))
    
    plt.tight_layout()
    plt.show()
    
    # Show statistics
    success_count = sum(r['success'] for r in attack_results)
    success_rate = success_count / len(attack_results) * 100
    print(f"\n🎯 Attack Success Rate: {success_rate:.1f}%")
    print(f"💀 Successfully fooled the model {success_count} out of {len(attack_results)} times!")
    
    return attack_results

# Run the attack!
print("=" * 60)
print("PART 3: YOUR FIRST ADVERSARIAL ATTACK")
print("=" * 60)

attack_results = generate_and_show_attacks(demo, num_examples=5, epsilon=EPSILON)

## 🤔 OBSERVATION CHECKPOINT - Part 3

Fill in your attack results:

| Image | Original Prediction | Adversarial Prediction | Successful Attack? |
|-------|--------------------|-----------------------|-------------------|
| 1.    |                    |                       |                   |
| 2.    |                    |                       |                   |
| 3.    |                    |                       |                   |
| 4.    |                    |                       |                   |
| 5.    |                    |                       |                   |

**CRITICAL QUESTION:** Can you visually tell the difference between original and adversarial images?

**YOUR ANSWER:** ___________________________________________


In [None]:
# CELL 7: Visualize Attack Mechanics
def visualize_attack_mechanics(demo, num_examples=3, epsilon=EPSILON):
    """Show the perturbations and how attacks work"""
    print("🔬 Analyzing attack mechanics...")
    
    indices = np.random.choice(len(demo.x_test), num_examples, replace=False)
    
    fig, axes = plt.subplots(3, num_examples, figsize=(12, 9))
    fig.suptitle(f'How FGSM Attacks Work (ε={epsilon})', fontsize=16)
    
    for i, idx in enumerate(indices):
        original_img = demo.x_test[idx:idx+1]
        true_label = demo.y_test[idx:idx+1]
        
        # Generate adversarial example and get perturbation
        adv_img = fgsm_attack(demo.model, original_img, true_label, epsilon)
        perturbation = adv_img[0] - original_img[0]
        
        # Get predictions
        orig_pred = demo.model.predict(original_img, verbose=0)
        adv_pred = demo.model.predict(adv_img, verbose=0)
        
        # Original image
        axes[0, i].imshow(original_img[0])
        axes[0, i].set_title(f'Original\n{demo.class_names[np.argmax(true_label)]}')
        axes[0, i].axis('off')
        
        # Perturbation (amplified for visibility)
        pert_vis = perturbation * 10 + 0.5  # Amplify and center
        pert_vis = np.clip(pert_vis, 0, 1)
        axes[1, i].imshow(pert_vis)
        axes[1, i].set_title('Perturbation\n(10x amplified)')
        axes[1, i].axis('off')
        
        # Adversarial result
        axes[2, i].imshow(adv_img[0])
        axes[2, i].set_title(f'Result\n{demo.class_names[np.argmax(adv_pred)]}')
        axes[2, i].axis('off')
    
    plt.tight_layout()
    plt.show()

print("=" * 60)
print("PART 4: UNDERSTANDING ATTACK MECHANICS")
print("=" * 60)

visualize_attack_mechanics(demo, num_examples=3, epsilon=EPSILON)

## 🤔 OBSERVATION CHECKPOINT - Part 4

**PERTURBATION ANALYSIS:**
- The perturbations are amplified **10x** for visibility
- Without amplification, would you notice them? ______
- Do they look random or structured? ________________

**LEARNING CHECK:** Explain FGSM in your own words:
___________________________________________________________________
___________________________________________________________________


In [None]:
# CELL 8: Confidence Analysis
def analyze_confidence_changes(demo, num_examples=3, epsilon=EPSILON):
    """Show how model confidence changes with attacks"""
    print("📊 Analyzing confidence changes...")
    
    indices = np.random.choice(len(demo.x_test), num_examples, replace=False)
    
    fig, axes = plt.subplots(1, num_examples, figsize=(15, 5))
    fig.suptitle('Model Confidence: Original vs Adversarial', fontsize=16)
    
    for i, idx in enumerate(indices):
        original_img = demo.x_test[idx:idx+1]
        true_label = demo.y_test[idx:idx+1]
        true_class = np.argmax(true_label)
        
        # Generate adversarial example
        adv_img = fgsm_attack(demo.model, original_img, true_label, epsilon)
        
        # Get prediction probabilities
        orig_probs = demo.model.predict(original_img, verbose=0)[0]
        adv_probs = demo.model.predict(adv_img, verbose=0)[0]
        
        # Create bar plot
        x = np.arange(len(demo.class_names))
        width = 0.35
        
        bars1 = axes[i].bar(x - width/2, orig_probs, width, label='Original', alpha=0.7, color='blue')
        bars2 = axes[i].bar(x + width/2, adv_probs, width, label='Adversarial', alpha=0.7, color='red')
        
        # Highlight true class
        bars1[true_class].set_color('green')
        bars2[true_class].set_color('darkred')
        
        axes[i].set_ylabel('Confidence')
        axes[i].set_title(f'True: {demo.class_names[true_class]}')
        axes[i].set_xticks(x)
        axes[i].set_xticklabels(demo.class_names, rotation=45, ha='right')
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

print("=" * 60)
print("PART 5: MODEL CONFIDENCE ANALYSIS") 
print("=" * 60)

analyze_confidence_changes(demo, num_examples=3, epsilon=EPSILON)

## 🤔 OBSERVATION CHECKPOINT - Part 5

**CONFIDENCE ANALYSIS:**
- Does the model become more or less confident in wrong predictions? ________________
- Which class appears frequently as an attack target? ________________

**What epsilon provides good balance between effectiveness and imperceptibility?**

**YOUR ANSWER:** ___________________________________________


In [None]:
# CELL 9: Adversarial Training (Fixed)
def train_robust_model(demo, epochs=EPOCHS_ROBUST, epsilon=EPSILON):
    """Train a robust model using adversarial training"""
    print("🛡️ Training robust model with adversarial examples...")
    print("This is like training a security system to recognize attack patterns!")
    
    # Create new model
    demo.robust_model = demo.create_simple_cnn()
    
    batch_size = 32
    num_batches = len(demo.x_train) // batch_size
    
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        epoch_loss = 0
        epoch_acc = 0
        
        for batch in range(num_batches):
            start_idx = batch * batch_size
            end_idx = start_idx + batch_size
            
            # Get batch
            x_batch = demo.x_train[start_idx:end_idx]
            y_batch = demo.y_train[start_idx:end_idx]
            
            # Generate adversarial examples for half the batch using the standard model
            adv_indices = np.random.choice(batch_size, batch_size // 2, replace=False)
            
            x_adv_batch = x_batch.copy()
            for idx in adv_indices:
                x_single = x_batch[idx:idx+1]
                y_single = y_batch[idx:idx+1]
                # Key fix: use original model to generate adversarial examples
                x_adv_batch[idx] = fgsm_attack(demo.model, x_single, y_single, epsilon)[0]
            
            # Train on mixed batch (original + adversarial)
            x_mixed = np.concatenate([x_batch, x_adv_batch])
            y_mixed = np.concatenate([y_batch, y_batch])
            
            # Train step
            loss, acc = demo.robust_model.train_on_batch(x_mixed, y_mixed)
            epoch_loss += loss
            epoch_acc += acc
        
        avg_loss = epoch_loss / num_batches
        avg_acc = epoch_acc / num_batches
        print(f"  Loss: {avg_loss:.4f}, Accuracy: {avg_acc:.4f}")
    
    # Evaluate robust model properly
    test_loss, clean_acc = demo.robust_model.evaluate(demo.x_test, demo.y_test, verbose=0)
    print(f"\n✓ Robust model clean accuracy: {clean_acc:.4f}")
    
    # Test adversarial robustness using attacks from the ORIGINAL model
    print("Testing adversarial robustness against original model attacks...")
    adv_correct = 0
    test_samples = 500
    
    for i in range(test_samples):
        original_img = demo.x_test[i:i+1]
        true_label = demo.y_test[i:i+1]
        true_class = np.argmax(true_label)
        
        # CRITICAL: Generate adversarial examples using the ORIGINAL vulnerable model
        adv_img = fgsm_attack(demo.model, original_img, true_label, epsilon)
        
        # Test how the ROBUST model handles these attacks
        adv_pred = np.argmax(demo.robust_model.predict(adv_img, verbose=0))
        
        if adv_pred == true_class:
            adv_correct += 1
    
    adv_accuracy = adv_correct / test_samples
    print(f"✓ Robust model adversarial accuracy: {adv_accuracy:.4f} ({adv_accuracy*100:.1f}%)")
    print(f"Defense improvement: Successfully resists {adv_accuracy*100:.1f}% of attacks!")
    
    return clean_acc, adv_accuracy

print("=" * 60)
print("PART 6: BUILDING DEFENSES - ADVERSARIAL TRAINING")
print("=" * 60)

robust_accuracy = train_robust_model(demo, epochs=EPOCHS_ROBUST, epsilon=EPSILON)

In [None]:
# CELL 10: Fixed Final Comparison
def compare_models_final(demo, epsilon=EPSILON, num_samples=200):
    """Compare standard vs robust model performance - FIXED VERSION"""
    print("⚖️ Comparing model performance...")
    
    indices = np.random.choice(len(demo.x_test), num_samples, replace=False)
    
    std_clean_correct = 0
    std_adv_correct = 0  
    rob_clean_correct = 0
    rob_adv_correct = 0
    
    for idx in indices:
        original_img = demo.x_test[idx:idx+1]
        true_label = demo.y_test[idx:idx+1]
        true_class = np.argmax(true_label)
        
        # CRITICAL FIX: Generate adversarial examples using ONLY the original standard model
        # This represents the real attack scenario - attackers target the original vulnerable system
        adv_img = fgsm_attack(demo.model, original_img, true_label, epsilon)
        
        # Test both models on clean images
        std_clean_pred = np.argmax(demo.model.predict(original_img, verbose=0))
        rob_clean_pred = np.argmax(demo.robust_model.predict(original_img, verbose=0))
        
        # Test both models on the SAME adversarial examples (generated from standard model)
        std_adv_pred = np.argmax(demo.model.predict(adv_img, verbose=0))
        rob_adv_pred = np.argmax(demo.robust_model.predict(adv_img, verbose=0))
        
        # Count correct predictions
        if std_clean_pred == true_class:
            std_clean_correct += 1
        if std_adv_pred == true_class:
            std_adv_correct += 1
        if rob_clean_pred == true_class:
            rob_clean_correct += 1
        if rob_adv_pred == true_class:
            rob_adv_correct += 1
    
    # Calculate percentages
    std_clean_acc = std_clean_correct / num_samples * 100
    std_adv_acc = std_adv_correct / num_samples * 100
    rob_clean_acc = rob_clean_correct / num_samples * 100
    rob_adv_acc = rob_adv_correct / num_samples * 100
    
    # Display results
    print(f"\n📊 FINAL COMPARISON RESULTS (ε={epsilon})")
    print("=" * 50)
    print(f"Standard Model:")
    print(f"  Clean Accuracy:       {std_clean_acc:.1f}%")
    print(f"  Adversarial Accuracy: {std_adv_acc:.1f}%")
    print(f"\nRobust Model:")
    print(f"  Clean Accuracy:       {rob_clean_acc:.1f}%")
    print(f"  Adversarial Accuracy: {rob_adv_acc:.1f}%")
    print(f"\nTrade-offs:")
    print(f"  Clean Accuracy Lost:  {std_clean_acc - rob_clean_acc:.1f}%")
    print(f"  Adversarial Gain:     {rob_adv_acc - std_adv_acc:.1f}%")
    
    # Enhanced visualization to show the dramatic security trade-off
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Adversarial Training Results: Security vs Performance Trade-off', fontsize=16, fontweight='bold')
    
    # 1. Side-by-side accuracy comparison
    models = ['Standard\nModel', 'Robust\nModel']
    x = np.arange(len(models))
    width = 0.35
    
    clean_scores = [std_clean_acc, rob_clean_acc]
    adv_scores = [std_adv_acc, rob_adv_acc]
    
    bars1 = ax1.bar(x - width/2, clean_scores, width, label='Clean Accuracy', color='lightblue', edgecolor='blue')
    bars2 = ax1.bar(x + width/2, adv_scores, width, label='Adversarial Accuracy', color='lightcoral', edgecolor='red')
    
    ax1.set_ylabel('Accuracy (%)', fontsize=12)
    ax1.set_title('Performance Comparison', fontsize=14, fontweight='bold')
    ax1.set_xticks(x)
    ax1.set_xticklabels(models)
    ax1.legend(fontsize=11)
    ax1.grid(True, alpha=0.3)
    ax1.set_ylim(0, 100)
    
    # Add value labels with better visibility
    for i, (clean, adv) in enumerate(zip(clean_scores, adv_scores)):
        ax1.text(i - width/2, clean + 2, f'{clean:.1f}%', ha='center', va='bottom', fontweight='bold')
        ax1.text(i + width/2, adv + 2, f'{adv:.1f}%', ha='center', va='bottom', fontweight='bold')
    
    # 2. Dramatic adversarial vulnerability comparison
    ax2.bar(models, adv_scores, color=['darkred', 'darkgreen'], alpha=0.8)
    ax2.set_ylabel('Adversarial Accuracy (%)', fontsize=12)
    ax2.set_title('Adversarial Attack Resistance', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    ax2.set_ylim(0, max(adv_scores) * 1.2)
    
    # Highlight the dramatic improvement
    for i, score in enumerate(adv_scores):
        ax2.text(i, score + max(adv_scores)*0.03, f'{score:.1f}%', ha='center', va='bottom', 
                fontweight='bold', fontsize=12)
    
    # Add dramatic improvement callout
    improvement = rob_adv_acc - std_adv_acc
    if improvement > 5:  # Only show improvement if significant
        ax2.annotate(f'{improvement:.1f}%\nImprovement!', 
                    xy=(1, rob_adv_acc), xytext=(0.5, rob_adv_acc * 0.7),
                    arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.3', color='gold', lw=3),
                    fontsize=14, fontweight='bold', ha='center',
                    bbox=dict(boxstyle="round,pad=0.5", facecolor='yellow', alpha=0.9, edgecolor='orange'))
    
    # 3. Security effectiveness matrix
    categories = ['Normal\nOperation', 'Under\nAttack']
    standard_performance = [std_clean_acc, std_adv_acc]
    robust_performance = [rob_clean_acc, rob_adv_acc]
    
    x = np.arange(len(categories))
    bars3 = ax3.bar(x - width/2, standard_performance, width, label='Standard Model', 
                   color=['lightblue', 'darkred'], alpha=0.7)
    bars4 = ax3.bar(x + width/2, robust_performance, width, label='Robust Model', 
                   color=['blue', 'green'], alpha=0.7)
    
    ax3.set_ylabel('Accuracy (%)', fontsize=12)
    ax3.set_title('Security Effectiveness', fontsize=14, fontweight='bold')
    ax3.set_xticks(x)
    ax3.set_xticklabels(categories)
    ax3.legend(fontsize=11)
    ax3.grid(True, alpha=0.3)
    ax3.set_ylim(0, 100)
    
    # Add value labels
    for i, (std, rob) in enumerate(zip(standard_performance, robust_performance)):
        ax3.text(i - width/2, std + 2, f'{std:.1f}%', ha='center', va='bottom', fontweight='bold')
        ax3.text(i + width/2, rob + 2, f'{rob:.1f}%', ha='center', va='bottom', fontweight='bold')
    
    # Add vulnerability callout for standard model under attack
    if std_adv_acc < 20:
        ax3.annotate('VULNERABLE!', xy=(1 - width/2, std_adv_acc), 
                    xytext=(0.3, 40), fontsize=12, fontweight='bold', color='red',
                    arrowprops=dict(arrowstyle='->', color='red', lw=2),
                    bbox=dict(boxstyle="round,pad=0.3", facecolor='red', alpha=0.2))
    
    # 4. Trade-off analysis
    trade_off_data = {
        'Security Gain': improvement,
        'Accuracy Cost': std_clean_acc - rob_clean_acc
    }
    
    colors = ['green' if improvement > 0 else 'red', 'orange' if std_clean_acc > rob_clean_acc else 'green']
    bars = ax4.bar(trade_off_data.keys(), trade_off_data.values(), color=colors, alpha=0.7)
    ax4.set_ylabel('Percentage Points', fontsize=12)
    ax4.set_title('Trade-off Analysis', fontsize=14, fontweight='bold')
    ax4.grid(True, alpha=0.3)
    ax4.axhline(y=0, color='black', linestyle='-', alpha=0.3)
    
    # Add value labels and interpretation
    for bar, value in zip(bars, trade_off_data.values()):
        ax4.text(bar.get_x() + bar.get_width()/2, value + (abs(value)*0.05 if value >= 0 else -abs(value)*0.05), 
                f'+{value:.1f}%' if value >= 0 else f'{value:.1f}%', 
                ha='center', va='bottom' if value >= 0 else 'top', 
                fontweight='bold', fontsize=11)
    
    # Add trade-off interpretation
    if improvement > abs(std_clean_acc - rob_clean_acc) * 2:  # If security gain > 2x accuracy cost
        ax4.text(0.5, max(trade_off_data.values()) * 0.5, 'EXCELLENT\nTRADE-OFF!', 
                ha='center', va='center', fontsize=14, fontweight='bold',
                bbox=dict(boxstyle="round,pad=0.5", facecolor='lightgreen', alpha=0.8))
    elif improvement > 10:
        ax4.text(0.5, max(trade_off_data.values()) * 0.5, 'GOOD\nTRADE-OFF', 
                ha='center', va='center', fontsize=12, fontweight='bold',
                bbox=dict(boxstyle="round,pad=0.5", facecolor='lightyellow', alpha=0.8))
    
    plt.tight_layout()
    plt.show()
    
    # Summary statistics box
    print("\n" + "="*60)
    print("CYBERSECURITY IMPACT SUMMARY")
    print("="*60)
    print(f"Standard Model Security Failure: {std_adv_acc:.1f}% accuracy under attack")
    print(f"Robust Model Security Success: {rob_adv_acc:.1f}% accuracy under attack")
    print(f"Security Improvement: {improvement:.1f} percentage points")
    print(f"Accuracy Trade-off: {std_clean_acc - rob_clean_acc:.1f}% clean performance cost")
    print(f"Net Benefit: {improvement:.1f}% security gain for {abs(std_clean_acc - rob_clean_acc):.1f}% accuracy cost")
    
    if improvement > 30:
        print("\nCONCLUSION: Adversarial training provides SIGNIFICANT security benefits!")
    elif improvement > 15:
        print("\nCONCLUSION: Adversarial training provides meaningful security improvements.")
    elif improvement > 5:
        print("\nCONCLUSION: Modest security improvement - consider stronger defenses.")
    else:
        print("\nCONCLUSION: Limited improvement - may need different defense strategies.")
    
    print("="*60)
    
    return std_clean_acc, std_adv_acc, rob_clean_acc, rob_adv_acc
    # Final comparison - EXECUTE THE FUNCTION
print("=" * 60)
print("PART 7: FINAL MODEL COMPARISON") 
print("=" * 60)

results = compare_models_final(demo, epsilon=EPSILON, num_samples=200)

## 🤔 FINAL OBSERVATION CHECKPOINT - Part 6

**DEFENSE RESULTS:**
| Model Type | Clean Accuracy | Adversarial Accuracy | Robustness Gain |
|------------|----------------|---------------------|----------------|
| Standard   | ______%        | ______%             | N/A            |
| Robust     | ______%        | ______%             | +______%       |

**TRADE-OFF ANALYSIS:**
- Clean accuracy lost: ______%  
- Adversarial accuracy gained: ______%
- Is this trade-off acceptable for cybersecurity? Why?

**YOUR ANSWER:** ___________________________________________

## 🎓 Key Takeaways

**Three most important things you learned:**
1. ___________________________________________________________
2. ___________________________________________________________  
3. ___________________________________________________________

**For your capstone project, consider:**
- How will you test against adversarial attacks?
- What defense strategies will you implement?
- Is the accuracy/robustness trade-off acceptable?
