# HW12 - Adversarial Examples


In this problem, we will first visualize the decision boundaries of different models, and then study the robustness of the linear models as well as the kernel ridge regression model.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
import torch.nn.functional as F

import matplotlib.pyplot as plt
import seaborn
import matplotlib.patches as patches
%matplotlib inline
seaborn.set(font_scale=2)
seaborn.set_style("white")

from sklearn.preprocessing import normalize
import numpy as np
import ipywidgets as widgets
from ipywidgets import interactive


## Part (I). 2D Toy Example


### Generate Data

We first generate data points with label $y\in \{+1, -1\}$ that also have all the $x_i$ being $r^{\prime}$ apart, i.e.,

$$\| x_i - x_j \|_{2} > r^{\prime}, \quad \text{for}\, i \neq j.$$


In [None]:
def cal_radius(x):
    return np.sqrt(x[0] ** 2 + x[1] ** 2)


# random points at least 2r apart
m = 50
np.random.seed(221)
x_train = [np.random.uniform(size=(2))]

r = 0.1
epsilon = r/2

while(len(x_train) < m):
    p = np.random.uniform(size=(2))
    if min(cal_radius(p-a) for a in x_train) > 1.1*r:
        x_train.append(p)

X_train = torch.Tensor(np.array(x_train))
torch.manual_seed(1)
y_train = (torch.rand(m)+0.5).long()
y_train = (X_train[:, 1] - X_train[:, 0] > 0).long()


**Visualize the data points with $\|\cdot\|_{2}$ perturbation balls around training samples.**


In [None]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(X_train[:,0], X_train[:,1], c=y_train, cmap="coolwarm", s=70)
ax.axis("equal")
ax.axis([0,1,0,1])
for a in x_train:
    ax.add_patch(patches.Circle((a[0], a[1]), r*0.5, fill=False, edgecolor='black'))


### Training a Neural Network
We first train a one-hidden-layer network with a width of 200, and then visualize the decision boundary of the learned NN.


In [None]:
def standard_train(X, y):
    net = nn.Sequential(
        nn.Linear(2, 200),
        nn.ReLU(),
        nn.Linear(200,200),
        nn.ReLU(),
        nn.Linear(200,2)
    )

    opt = optim.Adam(net.parameters(), lr=1e-3)
    for i in range(1000):
        out = net(Variable(X))
        l = nn.CrossEntropyLoss()(out, Variable(y))
        err = (out.max(1)[1].data != y).float().mean()
        opt.zero_grad()
        (l).backward()
        opt.step()
    print('loss: ', l.data.item(), 'training error: ', err.item())
    return net.eval()


def visualize_dnn(net, X, y, x):
    XX, YY = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100))
    X0 = Variable(torch.Tensor(np.stack([np.ravel(XX), np.ravel(YY)]).T))
    y0 = net(X0)
    ZZ = (y0[:,0] - y0[:,1]).resize(100,100).data.numpy()

    fig, ax = plt.subplots(figsize=(8, 8))
    plt.title('Deep Neural Network', pad=20)
    ax.contourf(XX,YY,-ZZ, cmap="coolwarm", levels=np.linspace(-1000,1000,3))
    ax.scatter(X.numpy()[:,0], X.numpy()[:,1], c=y.numpy(), cmap="coolwarm", s=70)
    ax.axis("equal")
    ax.axis([0,1,0,1])

    for a in x:
        ax.add_patch(patches.Circle((a[0], a[1]), r*0.5, fill=False))

net = standard_train(X_train, y_train)
visualize_dnn(net, X_train, y_train, x_train)


### Random fourier features
We consider first transforming the input $x_i$ to fourier features $z_i$ with dimension $2d$, i.e., $z_i \in\mathbb{R}^{2d}$,

\begin{align*}
z_{i}^{2k} &= \text{cos}(0.1 \cdot 2 \cdot \pi \cdot k \cdot h_{k}^{\top}x_{i}),\\
z_{i}^{2k+1} &= \text{sin}(0.1 \cdot 2 \cdot \pi \cdot k  \cdot h_{k}^{\top}x_{i}),
\end{align*}

where $h_{k}\in \mathbb{R}^{2\times 1}$ is a random vector and $\|h_{k}\|_{2}=1$.


**Process data.**


In [None]:
y_train = y_train * 2.0 - 1.0
X_train_np = X_train.numpy()
y_train_np = y_train.numpy()


**Define functions**


In [None]:
def featurization(X_train, H):
    '''Transform data to Fourier features'''
    X_feature = []
    feature_d = H.shape[0]
    Pi = np.pi
    for k in range(feature_d):
        X_feature.append(np.cos(0.1*2*Pi*k*X_train@H[k].transpose()))
        X_feature.append(np.sin(0.1*2*Pi*k*X_train@H[k].transpose()))
    X_feature = np.stack(X_feature, axis=1)
    return X_feature

def ridge_solver(X, y, lambda_reg):
    return np.linalg.inv(X.transpose()@X + lambda_reg * np.diag(np.ones(X.shape[1])))@X.transpose()@y

def visualize_ridge_fourier(H, X_feature, X, y, x):
    XX, YY = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100))
    X0 = np.stack([np.ravel(XX), np.ravel(YY)]).T
    # Perform feature transformation
    X0_feature = featurization(X0, H)
    # Learn linear model using ridge regression
    w_ridge = ridge_solver(X_feature, y, lambda_reg=1e-5)
    y0 = X0_feature@w_ridge * (-1.0)
    ZZ = y0.reshape(100,100)

    fig, ax = plt.subplots(figsize=(8, 8))
    plt.title('Ridge Regression Using Fourier Features', pad=20)
    ax.contourf(XX,YY,-ZZ, cmap="coolwarm", levels=np.linspace(-1000,1000,3))
    ax.scatter(X[:,0], X[:,1], c=y, cmap="coolwarm", s=70)
    ax.axis("equal")
    ax.axis([0,1,0,1])

    for a in x:
        ax.add_patch(patches.Circle((a[0], a[1]), r*0.5, fill=False))


In [None]:
def generate_fourier_dimension_widget():
    return widgets.IntSlider(
        value=10,
        min=5,
        max=200,
        step=5,
        description='number of Fourier features',
        continuous_update=False)


In [None]:
def visualize_fourier(d):
    feature_d = d
    Pi = np.pi
    H = np.random.randn(feature_d, 2)
    H = normalize(H, axis=1, norm='l2')
    X_feature_train = featurization(X_train, H)
    visualize_ridge_fourier(H, X_feature_train, X_train_np, y_train_np, x_train)


**Visualize the decision boundry of the learned ridge regression on Fourier features**


In [None]:
interactive_plot = interactive(visualize_fourier, d=generate_fourier_dimension_widget())
interactive_plot


### Kernel Ridge Regression


Recall the rbf kernel,

$$K_{\text{rbf}}(x_i, x_j) = \exp(-\gamma\|x_i - x_j\|_{2}^2),$$

we visualize the decision boundary of kernel ridge regression model with different $\gamma$ parameter.


In [None]:
from sklearn.kernel_ridge import KernelRidge


def visualize_kernel_ridge(X, y, x, gamma):
    clf_kernel_ridge = KernelRidge(alpha=1e-5, kernel='rbf', gamma=gamma)
    clf_kernel_ridge.fit(X, y)
    XX, YY = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100))
    X0 = np.stack([np.ravel(XX), np.ravel(YY)]).T
    y0 = clf_kernel_ridge.predict(X0)
    ZZ = y0.reshape(100,100)

    fig, ax = plt.subplots(figsize=(8, 8))
    plt.title('Kernel Ridge Regression', pad=20)
    ax.contourf(XX,YY,ZZ, cmap="coolwarm", levels=np.linspace(-1000,1000,3))
    ax.scatter(X[:,0], X[:,1], c=y, cmap="coolwarm", s=70)
    ax.axis("equal")
    ax.axis([0,1,0,1])

    for a in x:
        ax.add_patch(patches.Circle((a[0], a[1]), r*0.5, fill=False))


In [None]:
def generate_gamma_widget():
    return widgets.FloatLogSlider(
        value=1.0,
        base=10.0,
        min=-3,
        max=6,
        step=1,
        description='$\gamma$:',
        continuous_update= False)


In [None]:
def visualize_kernel_gamma(gamma):
    visualize_kernel_ridge(X_train_np, y_train_np, x_train, gamma)


**Visualize the decision boundry of the learned kernel ridge regression**


In [None]:
interactive_plot = interactive(visualize_kernel_gamma, gamma=generate_gamma_widget())
interactive_plot


## Part (II). MNIST Binary Classification - digit '1' and digit '3'


Now we study the binary classification problem on a subset of the MNIST dataset: we are distinguishing digit `1` and digit `3`.


**Load training data**


In [None]:
X_train_mnist = np.load('X_mnist_1_3_train.npy')
y_train_mnist = np.load('y_mnist_1_3_train.npy')
X_test_mnist = np.load('X_mnist_1_3_test.npy')
y_test_mnist = np.load('y_mnist_1_3_test.npy')

# transform label to {+1, -1}
y_train_mnist = y_train_mnist * 2.0 - 1.0
y_test_mnist = y_test_mnist * 2.0 - 1.0


Learn linear model via logistic regression.


In [None]:
from sklearn.linear_model import LogisticRegression
clf_LR = LogisticRegression(random_state=0).fit(X_train_mnist, y_train_mnist)
print('Accuracy on Original Test Examples', clf_LR.score(X_test_mnist, y_test_mnist))


Now you need to apply the results you derived in Part (a) to construct the adversarial perturbation and write the code.

(Hint (1): You only need to construct one perturbation $\delta$ for all examples.)

(Hint (2): You could you the weights ($\theta$) of the learned linear models.)


In [None]:
weights_lr = clf_LR.coef_
epsilon = 0.1
# TODO: You need to write code to construct the adversarial perturbation
#       Name the perturbation as 'delta_adv'
#       The constructed adversarial perturbation is R^{d}

### start compute_delta_adv ###

### end compute_delta_adv ###

# Repeat the perturbation for n times and construct the perturbation matrix with same dimension as X_train_mnist
# For each data point, multiply the computed 'delta_adv' with label y_i \in {+1, -1}
delta_adv_mtx =  np.tile(delta_adv, (1,y_test_mnist.shape[0])).transpose()
sign_y_test_mnist_mtx = np.tile(y_test_mnist, (delta_adv.shape[0], 1)).transpose()
delta_adv_mtx = np.multiply(delta_adv_mtx, sign_y_test_mnist_mtx)

print('Accuracy on Adversarial Perturbed Test Examples', clf_LR.score(X_test_mnist+delta_adv_mtx, y_test_mnist))


To see if this is good or bad, we random perturb the inputs and see what happens.


In [None]:
delta_random_mtx = epsilon * np.sign(np.random.randn(2000, 784))
print('Accuracy on Random Perturbed Test Examples', clf_LR.score(X_test_mnist+delta_random_mtx, y_test_mnist))


**Next, we take one particular test example, and visualize the original as well as the perturbed images.**


In [None]:
print('True Label: ', y_test_mnist[0])
print('Prediction on Original Sample: ', clf_LR.predict(X_test_mnist)[0])
print('Prediction on Adversarial Perturbed Sample: ', clf_LR.predict(X_test_mnist+delta_adv_mtx)[0])
print('Prediction on Random Perturbed Sample: ', clf_LR.predict(X_test_mnist+delta_random_mtx)[0])


In [None]:
fig, axs = plt.subplots(1,3, figsize=(35,10))
axs[0].set_title('Original Image', fontsize=35, pad=20)
axs[0].matshow(np.resize(X_test_mnist[0], (28, 28)), cmap="gray")
axs[1].set_title('Adversarial Perturbed Image', fontsize=35, pad=20)
axs[1].matshow(np.resize(X_test_mnist[0]+delta_adv_mtx[0], (28, 28)), cmap="gray")
axs[2].set_title('Random Perturbed Image', fontsize=35, pad=20)
axs[2].matshow(np.resize(X_test_mnist[0]+delta_random_mtx[0], (28, 28)), cmap="gray")


Finally, we evalute the kernel ridge regression model by using the adversarial perturbation (constructed on linear models).

Try different $\gamma$, say $\gamma \in \{0.1, 0.01, 0.001, 0.0001\}$.


In [None]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import accuracy_score

clf_kernel_ridge = KernelRidge(alpha=1e-5, kernel='rbf', gamma=0.001)
clf_kernel_ridge.fit(X_train_mnist, y_train_mnist)

y_predict = np.sign(clf_kernel_ridge.predict(X_test_mnist))
print('Accuracy on Original Test Examples: ', accuracy_score(y_test_mnist, y_predict))
y_predict_adv = np.sign(clf_kernel_ridge.predict(X_test_mnist + delta_adv_mtx))
print('Accuracy on Adversarial Perturbed Examples: ', accuracy_score(y_test_mnist, y_predict_adv))
y_predict_random = np.sign(clf_kernel_ridge.predict(X_test_mnist + delta_random_mtx))
print('Accuracy on Random Perturbed Examples: ', accuracy_score(y_test_mnist, y_predict_random))


### About finding adversarial examples of the nonlinear model

In general, it is not easy to find the close-form solution for nonlinear models compared with the linear model we considered above. For example, if we want to construct adversarial examples for deep neural networks, we could perform projected gradient acsent on the input space iteratively (For more information, refer to this [paper](https://arxiv.org/pdf/1706.06083.pdf).)


**Congrats! Hope you learned something from this Jupyter notebook.**
