In [None]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# Prepare data
import torch
import torchvision.datasets as dset
import torchvision.transforms as transforms

trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
# if not exist, download mnist dataset
root = "/tmp"
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)

batch_size = 1000

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)

for X_train, y_train in train_loader:
    break
for X_val, y_val in test_loader:
    break
    
if True:
    X_train = X_train.double()
    X_val = X_val.double()
    
if torch.cuda.is_available():
    X_train = X_train.cuda()
    y_train = y_train.cuda()
    X_test = X_test.cuda()
    y_test = y_test.cuda()

In [None]:
# Build model
from tda.models import pytorch_lenet
lenet = pytorch_lenet.LeNet()
lenet.train_or_load()
if torch.cuda.is_available():
    lenet = lenet.cuda()

In [None]:
# Load or train model
lenet.train_or_load(train_loader=[(X_train, y_train)], val_data=(X_val, y_val),
                    num_epochs=50)

In [None]:
# Build architecture (tda pipeline terminology)
import tda.models.architectures.parser as parser
from imp import reload
reload(parser)

x = X_train[0]
lenet_arch = parser.model_to_architecture(lenet, name="mnist_lenet",
                                          x=x)

In [None]:
from tda.graph import Graph

graph = Graph.from_architecture_and_data_point(lenet_arch, x)
for key in graph._edge_dict:
    layer_matrix = graph._edge_dict[key]
    print(layer_matrix.shape)

In [None]:
# Compute thresholds
from tda.thresholds import process_thresholds

class Dataset(object):
    def __init__(self, name, X_train, y_train, X_test, y_test):
        self.name = name
        self.train_dataset = list(zip(X_train, y_train))
        self.test_and_val_dataset = list(zip(X_test, y_test))
        
    def __str__(self):
        return self.name
    
# %debug
threshold = 0.1
raw_thresholds = "_".join([str(threshold)] * len(lenet_arch.layers))
dataset = Dataset("mnist", X_train, y_train, X_val, y_val)
thresholds = process_thresholds(architecture=lenet_arch,
                                dataset=dataset,
                                raw_thresholds=raw_thresholds,
                                dataset_size=10)

In [None]:
# Build tda dataset (i.e activation graphs for clean and adversarial inputs)
from tda.protocol import get_protocolar_datasets

# %debug
lims = X_train.min(), X_train.max()
dataset_size = 200
all_epsilons = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4]
(train_clean, test_clean, train_adv,
 test_adv) = get_protocolar_datasets(dataset=dataset,
                                     succ_adv=True,
                                     dataset_size=dataset_size,
                                     noise=0.,
                                     all_epsilons=all_epsilons,
                                     attack_type="FGSM_art",
                                     archi=lenet_arch,
                                     lims=lims)

In [None]:
# Compute embeddings for test-set adversarial inputs
from tda.embeddings import get_embedding, EmbeddingType, KernelType, ThresholdStrategy
from joblib import delayed, Parallel


def embedding_getter(line):
    embedding = get_embedding(
        architecture=lenet_arch,
        embedding_type=EmbeddingType.PersistentDiagram,
        line=line, dataset=None, edges_to_keep=None,
        threshold_strategy=ThresholdStrategy.ActivationValue,
        thresholds=thresholds)
    print(".", end="")
    return embedding


n_jobs = 1
embeddings = {}
for eps in test_adv:
    print("\nComputing test adversarial embeddings for eps=%.3f" % eps)
    embeddings[eps] = Parallel(n_jobs=n_jobs)(
        delayed(embedding_getter)(line)
        for line in test_adv[eps])

In [None]:
embeddings[0] = Parallel(n_jobs=n_jobs)(
        delayed(embedding_getter)(line)
        for line in test_clean)

In [None]:
embeddings_test_adv = dict((eps, embeddings[eps]) for eps in embeddings if eps != 0)

In [None]:
# Compute other adversarial examples
embeddings_train_adv = {}
for eps in train_adv:
    print("\nComputing train adversarial embeddings for eps=%.3f" % eps)
    embeddings_train_adv[eps] = Parallel(n_jobs=n_jobs)(
        delayed(embedding_getter)(line) for line in train_adv[eps])
print("\nComputing train clean embeddings")
embeddings_train_clean = Parallel(n_jobs=n_jobs)(
        delayed(embedding_getter)(line)
        for line in train_clean)
print("\nComputing test clean embeddings")
embeddings_test_clean = Parallel(n_jobs=n_jobs)(
        delayed(embedding_getter)(line)
        for line in test_clean)

In [None]:
# The real deal: try to detect adversarial examples from normal examples
from tda.protocol import evaluate_embeddings

param_space = [{"M": 20, "sigma": sigma} for sigma in np.logspace(-3, 3, 7)]
kernel_type = KernelType.SlicedWasserstein
evaluation_results = evaluate_embeddings(embeddings_train_clean,
                                         embeddings_test_clean,
                                         embeddings_train_adv,
                                         embeddings_test_adv,
                                         kernel_type=kernel_type,
                                         param_space=param_space)

In [None]:
# visualize embeddings
import matplotlib as mpl
import matplotlib.cm as cm

import seaborn as sns

cmap = cm.Blues_r

colors = {0: "b",
          0.1: "c",
          0.2: "m",
          0.3: "r"}
_, (ax1, ax3, ax4) = plt.subplots(1, 3, figsize=(15, 5))
for eps in embeddings:
    if not eps in [0., 0.1, 0.3]: continue
    color = colors[eps]
    for x in embeddings[eps]:
        birth, death = np.transpose(x)
        age = death - birth
        ax1.plot(birth, c=color)
        ax1.set_ylabel("birth")
        ax1.set_xlabel("points")
        # ax2.plot(death, c=color)
        # ax2.set_ylabel("death")
        # ax2.set_xlabel("points")
        ax3.plot(age, c=color)
        ax3.set_ylabel("age (death - birth)")
        ax3.set_xlabel("points")
        ax4.scatter(birth, death, c=color);
        ax4.set_xlabel("birth")
        ax4.set_ylabel("death")
plt.tight_layout()

In [None]:
# Plot performance of the detector
import pandas as pd
import seaborn as sns

df = []
for key in ["supervised_metrics", "unsupervised_metrics"]:
    tmp = evaluation_results[key]
    if key == "unsupervised_metrics":
        sup = False
    else:
        sup = True
    for eps in tmp:
        df.append(dict(sup=sup, eps=eps, auc=tmp[eps]["auc"]["upper_bound"],
                       method="PersistentDiagram",
                       arch=lenet_arch.name))
df = pd.DataFrame(df)

In [None]:
sns.pointplot(data=df, x="eps", y="auc", hue="sup");

In [None]:
df

In [None]:
layers

In [None]:
lenet_arch.layers[-2].get_matrix()

In [None]:
import scipy.sparse.linalg as slinalg

In [None]:
U, s, V = slinalg.svds(lenet_arch.layers[0].get_matrix()[], k=1)

In [None]:
_, ax = plt.subplots(1, 1, figsize=(15, 8))
data = lenet_arch.layers[0].get_matrix()[-1].todense()[:2000].T
data = np.ma.masked_where(data == 0, data)
ax.matshow(data, cmap=plt.cm.RdBu);
ax.axis("off")

In [None]:
graph._edge_dict.keys()

In [None]:
lenet_arch.layers[4].get_matrix()

In [None]:
from tda.graph import Graph

Graph.from_architecture_and_data_point(lenet_arch, X_train[0])

In [None]:
graph._edge_dict

In [None]:
from ripser.ripser import Rips

rips = Rips(maxdim=1)

In [None]:
rips.fit_transform(graph.get_adjacency_matrix())

In [None]:
graph.

In [None]:
def get_edge_list(graph):
    """
    Generate the list of edges of the multipartite graph
    """
    shapes = graph._get_shapes()
    all_layer_indices = sorted(list(shapes.keys()))
    vertex_offset = [0] + list(np.cumsum([shapes[idx]
                                          for idx in all_layer_indices]))
    vertex_offset = vertex_offset[:-1]
    for source_layer, target_layer in graph._edge_dict:
        offset_source = vertex_offset[source_layer + 1]
        offset_target = vertex_offset[target_layer + 1]
        mat = graph._edge_dict[(source_layer, target_layer)]
        source_vertices = mat.col + offset_source
        target_vertices = mat.row + offset_target
        for edge, weight in zip(zip(source_vertices, target_vertices), mat.data):
            yield edge, weight

In [None]:
timeit -n 1 graph.get_edge_list();

In [None]:
timeit -n 1 list(get_edge_list(graph));

In [None]:
del el

In [None]:
8.08 / .777

In [None]:
from tda.embeddings import persistent_diagrams
from tda import graph
from imp import reload

reload(persistent_diagrams)
reload(graph)
graph = graph.Graph.from_architecture_and_data_point(lenet_arch, X_train[0])


In [None]:
toto = persistent_diagrams._prepare_edges_for_diagram_old(graph);
titi = persistent_diagrams._prepare_edges_for_diagram_fast(graph);

In [None]:
titi = list(titi)

In [None]:
titi[-1], toto[-1]

In [None]:
for vertices, w in persistent_diagrams._prepare_edges_for_diagram_fast(graph):
    print(vertices, w)