# Adversarial example using ONNX



## Import the necessary packages and load data



In [None]:
from matplotlib import pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
import onnx
from onnx import helper, TensorProto

import gurobipy as gp

from gurobi_ml import add_predictor_constr

In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

We reshape and scale `x_train` and `x_test`.


In [None]:
x_train = tf.reshape(tf.cast(x_train, tf.float32) / 255.0, [-1, 28 * 28])
x_test = tf.reshape(tf.cast(x_test, tf.float32) / 255.0, [-1, 28 * 28])

## Construct and train the neural network



In [None]:
nn = tf.keras.models.Sequential(
    [
        tf.keras.layers.InputLayer((28 * 28,)),
        tf.keras.layers.Dense(50, activation="relu"),
        tf.keras.layers.Dense(50, activation="relu"),
        tf.keras.layers.Dense(10),  # logits
    ]
)

In [None]:
nn.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [None]:
nn.fit(
    x_train,
    y_train,
    epochs=3,
    validation_data=(x_test, y_test),
)

Convert the trained Keras model to an ONNX MLP


In [None]:
def keras_dense_layers_to_onnx(model):
    # Extract dense layers weights/bias and activation
    layers = []
    in_dim = None
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.InputLayer):
            try:
                in_dim = layer.input_shape[-1]
            except Exception:
                pass
        elif isinstance(layer, tf.keras.layers.Dense):
            W, b = layer.get_weights()
            act = layer.get_config().get("activation", "linear")
            layers.append((W.astype(np.float32), b.astype(np.float32), act))

    # Build ONNX graph from collected layers
    n_in = in_dim or layers[0][0].shape[0]
    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [None, n_in])

    last = "X"
    inits = []
    nodes = []
    for i, (W, b, act) in enumerate(layers):
        W_name = f"W{i + 1}"
        b_name = f"b{i + 1}"
        # Gemm with transB=1 realizes (last @ W + b) when B is W.T
        inits.append(
            helper.make_tensor(W_name, TensorProto.FLOAT, W.T.shape, W.T.flatten())
        )
        inits.append(helper.make_tensor(b_name, TensorProto.FLOAT, b.shape, b))
        out_name = f"H{i + 1}"
        nodes.append(
            helper.make_node(
                "Gemm",
                inputs=[last, W_name, b_name],
                outputs=[out_name],
                name=f"gemm{i + 1}",
                transB=1,
            )
        )
        last = out_name
        if act == "relu":
            act_name = f"A{i + 1}"
            nodes.append(
                helper.make_node(
                    "Relu", inputs=[last], outputs=[act_name], name=f"relu{i + 1}"
                )
            )
            last = act_name

    # Connect final tensor to a named output via Identity
    n_out = layers[-1][1].shape[0]
    nodes.append(
        helper.make_node("Identity", inputs=[last], outputs=["Y"], name="output")
    )
    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [None, n_out])
    graph = helper.make_graph(
        nodes=nodes, name="KerasMLP", inputs=[X], outputs=[Y], initializer=inits
    )
    model = helper.make_model(graph)
    onnx.checker.check_model(model)
    return model


onnx_model = keras_dense_layers_to_onnx(nn)

## Build optimization model

Now we turn to building the optimization model.

We choose a training example and follow the same steps as the Keras example.


In [None]:
example = x_train[18, :]
plt.imshow(tf.reshape(example, [28, 28]), cmap="gray")
ex_prob = nn.predict(tf.reshape(example, (1, -1)))
sorted_labels = tf.argsort(ex_prob)[0]
right_label = sorted_labels[-1]
wrong_label = sorted_labels[-2]
print(
    f"Original classified as {int(right_label)}; target misclassify as {int(wrong_label)}"
)

In [None]:
m = gp.Model()
delta = 5

x = m.addMVar(example.numpy().shape, lb=0.0, ub=1.0, name="x")
y = m.addMVar(ex_prob.shape, lb=-gp.GRB.INFINITY, name="y")

abs_diff = m.addMVar(example.numpy().shape, lb=0, ub=1, name="abs_diff")

m.setObjective(y[0, wrong_label] - y[0, right_label], gp.GRB.MAXIMIZE)

# Bound on the distance to example in norm-1
m.addConstr(abs_diff >= x - example.numpy())
m.addConstr(abs_diff >= -x + example.numpy())
m.addConstr(abs_diff.sum() <= delta)

pred_constr = add_predictor_constr(m, onnx_model, x, y)

pred_constr.print_stats()

In [None]:
m.Params.BestBdStop = 0.0
m.Params.BestObjStop = 0.0
m.optimize()

Finally, display the adversarial example if one was found.


In [None]:
if m.SolCount and m.ObjVal > 0.0:
    plt.imshow(x.X.reshape((28, 28)), cmap="gray")
    label = tf.math.argmax(nn.predict(tf.reshape(x.X, (1, -1))), axis=1)
    print(f"Solution is classified as {label.numpy()[0]}")
else:
    print("No counter example exists in neighborhood.")