# Interactive Exploration: Adversarial Attacks and Defenses

This notebook lets you run, visualize, and experiment with adversarial attacks and defenses on image and text classifiers.

In [ ]:
# Run this cell to import dependencies
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB


## Image Classifier: Adversarial Example Demo
Generate and visualize adversarial images.

In [ ]:
data = load_digits()
X, y = data.data / 16.0, data.target
y_binary = (y == 3).astype(int)
clf = LogisticRegression(max_iter=500).fit(X, y_binary)
epsilon = 0.3
grad = clf.coef_[0]
idx = np.where(y_binary == 1)[0][0]
x_orig = X[idx]
x_adv = np.clip(x_orig + epsilon * np.sign(grad), 0, 1)
plt.subplot(1,2,1); plt.imshow(x_orig.reshape(8,8), cmap='gray'); plt.title('Original'); plt.axis('off')
plt.subplot(1,2,2); plt.imshow(x_adv.reshape(8,8), cmap='gray'); plt.title('Adversarial'); plt.axis('off')
plt.show()

## Text Classifier: Adversarial Example Demo
Craft adversarial text and observe classifier predictions.

In [ ]:
texts = ['free money', 'urgent offer', 'hello friend', 'see you']
labels = [1, 1, 0, 0]
vectorizer = CountVectorizer().fit(texts)
clf = MultinomialNB().fit(vectorizer.transform(texts), labels)
def adv_text(t): return t.replace('e','3').replace('o','0')
for t in texts:
    print(f'Original: {t}, Adv: {adv_text(t)}, Pred: {clf.predict(vectorizer.transform([adv_text(t)]))[0]}')