# Stability on CIFAR

In order to train for stability, we leverage two methods in this research: 
1. Numerical estimation of Lyapunov exponent
2. Runge kutta divergence

1 requires adaptation of the solver, where in 2 we only need parameters from the RK4 integrator. The code adaptation to the base (see notebook 0) are shown below:

In [2]:
# 1. In the solver, we perform the numerical estimation of the Lyapunov exponent:
class FixedStepNumericalLyapunovSolverV2():
    def __init__(self, f, t0, t1, method, steps=None, step_size=None, verbose=False, eps=1, n=2):
        self.name = "FixedStepNumericalLyapunovSolverV2"
        self.f = f
        self.t0 = t0
        self.t1 = t1
        self.method = method
        self.eps, self.n = eps, n
        self.vecs = [None]
        self.loops = 3
        self.diff = [None for _ in range(n)]
        self.lyapunov = True

        # Define the step sizes h to go from t0 to t1
        assert steps or step_size, "Either steps or step size should be defined!"
        if steps:
            self.hs = [(t1 - t0) / steps for s in range(steps)]
        else:
            assert step_size <= (t1 - t0), "Step size should be smaller than integration time!"
            self.hs = [step_size for _ in range(int((t1 - t0) / step_size))]
            # Add the residual in the last step, if required
            if (t1 - t0) % step_size != 0:
                self.hs.append((t1 - t0) - sum(self.hs))
        if verbose:
            print("This solver will be using the following time deltas:", self.hs)
            print("This solver will require", self.method.order * len(self.hs), "gradient evaluations")

    def batch_normalize(self, p):
        return p / torch.norm(p.view(p.size(0), -1), p=2, dim=1)[:, None, None, None]

    def reset_vec(self, y):
        # Random vector and its norm along the batch size
        ps = [torch.randn_like(y) for _ in range(self.n)]
        self.vecs = [self.batch_normalize(p) for p in ps]

    def integrate(self, y, reset=False):
        # First we integrate the original system y
        t = self.t0
        for h in self.hs:
            y = self.method.step(self.f, t, h, y)
            t += h

        if self.lyapunov:
            # Then we iteratively determine the Lyapunov vector for the batch
            self.exp = 0
            
            # First we make N versions of y by randomly mutating
            self.reset_vec(y)
            self.diff = []
            for l in range(self.loops):
                # Re-do the orbit seperation each loop
                ys = [y + self.eps * self.vecs[i] for i in range(self.n)]

                # Concatenate the y to batch the vectors
                yb = torch.cat(ys, dim=0)
                
                # Integrate the system for both y and ys
                t = self.t0
                for h in self.hs:
                    yb = self.method.step(self.f, t, h, yb)
                    t += h
                yl = torch.chunk(yb, self.n, dim=0)
                
                # Calculate the seperation
                for i in range(self.n):
                    # Calculate the difference minus the projection of earlier vectors (ealier vector, scaled by the dot product of yl and earlier vectors)
                    diff = yl[i] - y
                    proj = sum([self.vecs[j] * (diff * self.vecs[j]).sum([1,2,3])[:, None, None, None] for j in range(0, i)])
                    diff = (diff - proj)
                
                if l < self.loops - 1:
                    # The Lyapunov is the normalized vector in the difference direction
                    self.vecs[i] = self.batch_normalize(diff).detach()
                else:
                    self.diff.append(diff)
                    # Calculate exponent at last divergence
                    self.exp += 1 / t * torch.log(torch.norm(diff.view(diff.size(0), -1), p=2, dim=1) / self.eps + 1e-10).mean()
        else:
            self.exp = None
        return y


# 2. Besides the estimation of the next point, this integrator also returns the (batch_mean) L2 norm:
class K2K3L2DistRungeKutta4():
    name = "K2K3L2DistRungeKutta4"
    order = 4
    def step(f, t, dt, y):
        k1 = f(t, y)
        k2 = f(t + dt / 2,  y + dt * k1 / 2)
        k3 = f(t + dt / 2,  y + dt * k2 / 2)
        k4 = f(t + dt,      y + dt * k3)
        return y + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6, ((k2 - k3) ** 2).mean(0).sum().sqrt()

## Adversarial Stability

Adversarial attacks are one way to test for stability in Neural networks. In the paper, we use the following adversarial attacks (code defined in utils.attacks)
1. Fast Gradient Sign Method (FGSM)
2. Projected Gradient Descent (PGD)
3. DeepFool
4. Gaussian noise
5. Salt and pepper noise
6. Simple black box adversarial attack (SIMBA)

# CIFAR10

In [3]:
import sys, time, torch
from utils.training.training_ops import Average, accuracy, loader_accuracy

# Make the training / testing loaders
from utils.training.datasets import get_cifar10
train_loader, test_loader = get_cifar10(batch_size=128)
mu, std = [0.4914, 0.4822, 0.4465], [0.247, 0.243, 0.261]

# Define the model
from utils.solvers.fixed_step import FixedStepSolver, FixedStepRK4RegSolver, FixedStepNumericalLyapunovSolver, FixedStepNumericalLyapunovSolverV2
from utils.integrators.simple import Euler, ModifiedEuler, RungeKutta4, K2K3L1DistRungeKutta4, K2K3L2DistRungeKutta4, K2K3CosSimRungeKutta4
from utils.networks.odenet_2 import OdeNet

# Load Adversarial attacks
from utils.attacks.fgsm import fgsm_attack
from utils.attacks.deepfool import deepfool_attack
from utils.attacks.pgd import pgd_attack
from utils.attacks.attacks import gaussian_noise_attack, salt_and_pepper_attack, simba_attack

# Define experiments
experiments = [
    # Unregularized baseline
    (Euler, FixedStepSolver, 2, None, None),
    (Euler, FixedStepSolver, 2, None, None),
    (Euler, FixedStepSolver, 2, None, None),
    # Lyapunov exponents models
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 5e-2, 1),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 5e-2, 1),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 5e-2, 1),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 2),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 2),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 2),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 3),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 3),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 3),
    # Runge Kutta divergence models
    (K2K3L2DistRungeKutta4, FixedStepRK4RegSolver, 1, 1e-1, None),
    (K2K3L2DistRungeKutta4, FixedStepRK4RegSolver, 1, 1e-1, None),
    (K2K3L2DistRungeKutta4, FixedStepRK4RegSolver, 1, 1e-1, None),
]

for intr, sol, steps, le_reg, vectors in experiments:
    for i in range(3):
        # Define the model
        model = OdeNet(solver=sol, integrator=intr, t0=0.0, t1=1.0, steps=steps, in_channels=3, channels=[64, 128, 256, 512], classes=10, mu=mu, std=std)
        model.solver1.n, model.solver2.n, model.solver3.n, model.solver4.n = vectors, vectors, vectors, vectors

        # Naming for saving
        post_fix = "%sStep-eps1-leReg%s-Vecs%s-Run%s" % (str(steps), str(le_reg), str(vectors), str(i))
        print("\n" + model.solver1.name + "-" + model.solver1.method.name + "-" + post_fix)

        # Define the convenient average calculators
        time_keeper, train_acc, train_exps, test_acc, test_exps = Average(), Average(), Average(), Average(), Average()

        epochs = 20
        best_acc = 0
        for e in range(epochs):
            # Train loop
            model.train()
            for i, (data, target) in enumerate(train_loader):
                s = time.time()
                # Convert data proper device, forward pass and calculate loss
                data, target = data.to(model.device), target.to(model.device)
                pred = model(data)
                ce_loss = model.loss_module(pred, target)
                
                #Take optimizer step
                model.optimizer.zero_grad()
                if le_reg and model.exps:
                    (ce_loss + le_reg * model.exps).backward()
                else:
                    ce_loss.backward()
                model.optimizer.step()

                # Update metrics
                time_keeper.update(time.time() - s)
                train_acc.update(accuracy(pred, target))
                if le_reg and model.exps:
                    train_exps.update(model.exps.item())
            
            # Evaluation loop
            with torch.no_grad():
                for i, (data, target) in enumerate(test_loader):
                    data, target = data.to(model.device), target.to(model.device)
                    pred = model(data)
                    ce_loss = model.loss_module(pred, target)
                    test_acc.update(accuracy(pred, target))
                    if le_reg and model.exps:
                        test_exps.update(model.exps.item())
            print('Epoch: %d / %d | Time per iter %.3f | Train acc: %.3f | Train exps: %.3f | Test acc: %.3f | Test exps: %.3f' % (e + 1, epochs, time_keeper.eval(), 100 * train_acc.eval(), train_exps.eval(), 100 * test_acc.eval(), test_exps.eval()))

            # Save the model
            if test_acc.eval() >= best_acc:
                torch.save({'state_dict': model.state_dict()}, 'checkpoint/cifar10/' + model.solver1.name + "-" + model.solver1.method.name + "-" + post_fix + '.pth')
                best_acc = test_acc.eval()

            # Reset statistics each epoch:
            time_keeper.reset(), train_acc.reset(), train_exps.reset(), test_acc.reset(), test_exps.reset()

            # Decay Learning Rate
            model.scheduler.step()

        # Perform FGSM tests
        # Check baseline accuracy
        gaussian_noise_attack(model, test_loader, std=0.0)

        # Run the random gaussian attack
        gaussian_noise_attack(model, test_loader, std=35/255.)

        # Run salt and pepper attack
        salt_and_pepper_attack(model, test_loader, 0.1)

        # Run test for each epsilon
        fgsm_attack(model, test_loader, 0.2/255.)

        # Run the PGD attack
        pgd_attack(model, test_loader, epsilon=4/255., alpha=1/255., iters=20)

        # Run DeepFool tests
        deepfool_attack(model, test_loader, 10)

        # Run black box attack
        simba_attack(model, test_loader)

Files already downloaded and verified
Files already downloaded and verified
This solver will be using the following time deltas: [0.5, 0.5]
This solver will require 2 gradient evaluations
This model is using 7847946 parameters
This model is using 12 Conv layers

FixedStepSolver-Euler-2Step-eps1-leRegNone-VecsNone-Run0
Epoch: 1 / 20 | Time per iter 0.028 | Train acc: 49.115 | Train exps: 0.000 | Test acc: 63.001 | Test exps: 0.000


FileNotFoundError: [Errno 2] No such file or directory: 'checkpoints/cifar10/FixedStepSolver-Euler-2Step-eps1-leRegNone-VecsNone-Run0.pth'

In [None]:
# Code for plotting
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import random

import torch
from torchvision.transforms import transforms
import torchvision.transforms.functional as TF

checkpoints = [
    ("FixedStepSolver-Euler-2Step-Run0.pth", Euler, 2),
    ("FixedStepNumericalLyapunovSolverV2-Euler-2Step-leReg0.1-Vecs2-Run0.pth", Euler, 2),
    ("FixedStepRK4RegSolver-K2K3L2DistRungeKutta4-2Step-leReg0.01-Vecs0-Run0.pth", RungeKutta4, 1),
]

idxs = [0, 160, 40, 190, 215] #[random.randint(0, 256) for _ in range(5)]
size = 128

width, height = (1 + len(checkpoints)) * size, size * len(idxs) 
grid = Image.new('RGB', (width, height))

with torch.no_grad():
    data, target = next(iter(test_loader))

    for (path, intr, steps) in checkpoints:
        model = OdeNet(solver=FixedStepNumericalLyapunovSolverV2, integrator=intr, t0=0.0, t1=1.0, steps=steps, in_channels=3, channels=[64, 128, 256, 512], classes=10,  mu=mu, std=std)

        print("\nCheckpoint: ", path)
        model.load_state_dict(torch.load("checkpoint/cifar10/"+path, map_location=torch.device('cpu'))["state_dict"])

        # Set loops higher to converge
        model.solver1.loops, model.solver2.loops, model.solver3.loops, model.solver4.loops = 10, 10, 10, 10

        # Convert data proper device, forward pass and calculate loss
        data, target = data.to(model.device), target.to(model.device)
        pred = model(data)
        for i in idxs:
            image = data[i].to('cpu').clone()
            img = TF.to_pil_image(image).resize((size, size), Image.ANTIALIAS)
            grid.paste(img, (0, idxs.index(i) * size))

            # Get vectors
            diff = model.solver2.diff[0][i, :, :, :].abs().mean(0).to('cpu')
            print(i, diff.sum(), diff.max())

            # Scale diff 0 - 1
            diff = diff / 1.3

            # Make heatmap
            heatmap = torch.zeros(size=[3] + list(diff.size()))
            heatmap[0, :, :] = 2 * diff
            heatmap[1, :, :] = 1 - 2 * torch.abs(diff - 0.5)
            heatmap[2, :, :] = 1 - 2 * diff
            heatmap = heatmap.clamp(0, 1)

            h_img = TF.to_pil_image(heatmap).resize((size, size), Image.ANTIALIAS)
            res = Image.blend(img, h_img, alpha=0.6)
            grid.paste(res, (size + checkpoints.index((path, intr, steps)) * size, idxs.index(i) * size))

    grid.show()
    grid.save('figs/grid_test.jpg')

# CIFAR100

In [None]:
import sys, time, torch
from utils.training.training_ops import Average, accuracy, loader_accuracy

# Make the training / testing loaders
from utils.training.datasets import get_cifar100
train_loader, test_loader = get_cifar100(batch_size=128)
mu, std = [n/255. for n in [129.3, 124.1, 112.4]], [n/255. for n in [68.2,  65.4,  70.4]]

# Define the model
from utils.solvers.fixed_step import FixedStepSolver, FixedStepRK4RegSolver, FixedStepNumericalLyapunovSolver, FixedStepNumericalLyapunovSolverV2
from utils.integrators.simple import Euler, ModifiedEuler, RungeKutta4, K2K3L2DistRungeKutta4
from utils.networks.odenet_2 import OdeNet

# Load Adversarial attacks
from utils.attacks.fgsm import fgsm_attack
from utils.attacks.deepfool import deepfool_attack
from utils.attacks.pgd import pgd_attack
from utils.attacks.attacks import gaussian_noise_attack, salt_and_pepper_attack, simba_attack

# Define experiments
experiments = [
    # Unregularized baseline
    (Euler, FixedStepSolver, 2, None, None),
    (Euler, FixedStepSolver, 2, None, None),
    (Euler, FixedStepSolver, 2, None, None),
    # Lyapunov exponents models
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 1),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 5e-2, 1),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-2, 1),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 2),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 5e-2, 2),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-2, 2),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-1, 3),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 5e-2, 3),
    (Euler, FixedStepNumericalLyapunovSolverV2, 2, 1e-2, 3),
    # Runge Kutta divergence models
    (K2K3L2DistRungeKutta4, FixedStepRK4RegSolver, 1, 1e-1, None),
    (K2K3L2DistRungeKutta4, FixedStepRK4RegSolver, 1, 5e-2, None),
    (K2K3L2DistRungeKutta4, FixedStepRK4RegSolver, 1, 1e-2, None),
]

for intr, sol, steps, le_reg, vectors in experiments:
    for i in range(3):
        # Define the model
        model = OdeNet(solver=sol, integrator=intr, t0=0.0, t1=1.0, steps=steps, in_channels=3, channels=[64, 128, 256, 512], classes=100,  mu=mu, std=std)
        model.solver1.n, model.solver2.n, model.solver3.n, model.solver4.n = vectors, vectors, vectors, vectors

        # Naming for saving
        post_fix = "%sStep-leReg%s-Vecs%s-Run%s" % (str(steps), str(le_reg), str(vectors), str(i))
        print("\n" + model.solver1.name + "-" + model.solver1.method.name + "-" + post_fix)

        # Define the convenient average calculators
        time_keeper, train_acc, train_exps, test_acc, test_exps = Average(), Average(), Average(), Average(), Average()

        epochs = 20
        best_acc = 0
        for e in range(epochs):
            # Train loop
            model.train()
            for i, (data, target) in enumerate(train_loader):
                s = time.time()
                # Convert data proper device, forward pass and calculate loss
                data, target = data.to(model.device), target.to(model.device)
                pred = model(data)
                ce_loss = model.loss_module(pred, target)
                
                #Take optimizer step
                model.optimizer.zero_grad()
                if le_reg and model.exps:
                    (ce_loss + le_reg * model.exps).backward()
                else:
                    ce_loss.backward()
                model.optimizer.step()

                # Update metrics
                time_keeper.update(time.time() - s)
                train_acc.update(accuracy(pred, target))
                if le_reg and model.exps:
                    train_exps.update(model.exps.item())
            
            # Evaluation loop
            with torch.no_grad():
                for i, (data, target) in enumerate(test_loader):
                    data, target = data.to(model.device), target.to(model.device)
                    pred = model(data)
                    ce_loss = model.loss_module(pred, target)
                    test_acc.update(accuracy(pred, target))
                    if le_reg and model.exps:
                        test_exps.update(model.exps.item())
            print('Epoch: %d / %d | Time per iter %.3f | Train acc: %.3f | Train exps: %.3f | Test acc: %.3f | Test exps: %.3f' % (e + 1, epochs, time_keeper.eval(), 100 * train_acc.eval(), train_exps.eval(), 100 * test_acc.eval(), test_exps.eval()))

            # Save the model
            if test_acc.eval() >= best_acc:
                torch.save({'state_dict': model.state_dict()}, 'checkpoint/cifar100/' + model.solver1.name + "-" + model.solver1.method.name + "-" + post_fix + '.pth')
                best_acc = test_acc.eval()

            # Reset statistics each epoch:
            time_keeper.reset(), train_acc.reset(), train_exps.reset(), test_acc.reset(), test_exps.reset()

            # Decay Learning Rate
            model.scheduler.step()

        # Perform FGSM tests
        # Check baseline accuracy
        gaussian_noise_attack(model, test_loader, std=0.0)

        # Run the random gaussian attack
        gaussian_noise_attack(model, test_loader, std=35/255.)

        # Run salt and pepper attack
        salt_and_pepper_attack(model, test_loader, 0.1)

        # Run test for each epsilon
        fgsm_attack(model, test_loader, 0.2/255.)

        # Run the PGD attack
        pgd_attack(model, test_loader, epsilon=4/255., alpha=1/255., iters=20)

        # Run DeepFool tests
        deepfool_attack(model, test_loader, 100)

        # Run black box attack
        simba_attack(model, test_loader)