### Deep Fool Algorithm 

We have achieved some results with the CNN and MLP models, but we would like to explore how these models perform when faced with adversarial examples. To do so, we aply DeepFool.

**DeepFool** is a widely used method for generating adversarial examples, designed to evaluate the robustness of machine learning models, particularly in classification tasks. 

By iteratively finding the minimal perturbation needed to alter a model's prediction, DeepFool provides insights into a **model's vulnerability to adversarial attacks**. This method is crucial for developing more **robust and reliable models**, as it helps identify potential weaknesses and informs strategies for improving their defense mechanisms

![Descrição da imagem](./images/deepfool.png)


1. **Initialization:** Starting with the original image \( $x_0$ \), and setting the iteration counter \($ i $\) to 0.

2. **Perturbation Calculation:** For each iteration:
   - The algorithm calculates the perturbation required to change the model's prediction, iteratively adjusting the image.
   - It computes the gradients and the necessary perturbation \( $r_i $\) for the class boundary.

3. **Repeat Until Misclassification:** The algorithm repeats this process until the image is misclassified by the model.

4. **Return Perturbation:** The final perturbation is the sum of all the adjustments \( $r_i$ \) made during the iterations.

The result is the minimal perturbation $ r $ that causes a misclassification.


In [2]:
# import of relevant libraries

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers, regularizers, optimizers
from tensorflow.python.client import device_lib
import pandas as pd
import numpy as np
import pickle
import os
import librosa
from copy import deepcopy

In [11]:
def get_gradient(model, x, target_class):
    """
    Calcula o gradiente da saída da classe-alvo com respeito à entrada.

    Args:
        model: O modelo neural treinado.
        x: Entrada para o modelo (tensor).
        target_class: Índice da classe-alvo.

    Returns:
        Gradiente calculado.
    """
    with tf.GradientTape() as tape:
        tape.watch(x)
        logits = model(x)
        target_logits = logits[:, target_class]
    return tape.gradient(target_logits, x)


In [12]:
import numpy as np

def deepfool(model, x0, eta=1e-2, max_iter=50, num_classes=10):
    """
    Implementa o algoritmo DeepFool para calcular a menor perturbação.

    Args:
        model: O modelo neural treinado.
        x0: Entrada inicial (tensor).
        eta: Parâmetro de overshoot.
        max_iter: Número máximo de iterações.
        num_classes: Número de classes no modelo.

    Returns:
        r_sum: Perturbação acumulada.
        loop_i: Número de iterações realizadas.
        label_xi: Nova previsão após a perturbação.
    """
    x = tf.convert_to_tensor(x0, dtype=tf.float32)
    r_sum = tf.zeros_like(x)
    label_xi = tf.argmax(model(x), axis=1).numpy()[0]

    for loop_i in range(max_iter):
        gradients = []
        logits = model(x)
        current_label = tf.argmax(logits, axis=1).numpy()[0]

        if current_label != label_xi:
            break

        for k in range(num_classes):
            grad = get_gradient(model, x, k)
            gradients.append(grad)

        gradients = tf.stack(gradients)
        logits = tf.squeeze(logits)

        smallest_perturbation = float('inf')
        for k in range(num_classes):
            if k == label_xi:
                continue
            w_k = gradients[k] - gradients[label_xi]
            f_k = logits[k] - logits[label_xi]
            perturbation = tf.abs(f_k) / tf.norm(w_k, ord=2)
            if perturbation < smallest_perturbation:
                smallest_perturbation = perturbation
                r_i = (perturbation + eta) * w_k / tf.norm(w_k, ord=2)

        r_sum += r_i
        x = x + r_i

    return r_sum, loop_i, current_label


In [13]:
def example_robustness(r, x):
    """
    Calcula a robustez de um exemplo.

    Args:
        r: Perturbação adversarial aplicada.
        x: Entrada original.

    Returns:
        Valor de robustez (ρ).
    """
    norm_r = tf.norm(r)
    norm_x = tf.norm(x)
    return norm_r / norm_x


In [14]:
def model_robustness(model, X_test, y_test):
    """
    Avalia a robustez média do modelo em relação ao conjunto de testes.

    Args:
        model: O modelo neural treinado.
        X_test: Conjunto de dados de teste.
        y_test: Labels do conjunto de teste.

    Returns:
        Média e desvio padrão da robustez.
    """
    rho_values = []
    for i in range(len(X_test)):
        x = tf.expand_dims(X_test[i], axis=0)
        r, _, _ = deepfool(model, x)
        rho = example_robustness(r, x)
        rho_values.append(rho.numpy())
    
    mean_rho = np.mean(rho_values)
    std_rho = np.std(rho_values)
    return mean_rho, std_rho


In [None]:
# Avaliar robustez do modelo
mean_rho, std_rho = model_robustness(model, X_test, y_test)
print(f"Medium robustness: {mean_rho:.4f}")
print(f"Standard deviation of robustness: {std_rho:.4f}")

In [9]:
class MLP(tf.keras.Model):
    def __init__(self, input_dim, output_dim, hidden_units, dropout_rate, activations, regularization_type=None, regularization_value=0.01):
        super(MLP, self).__init__()
        self.hidden_layers = []
        self.regularization_type = regularization_type
        self.regularization_value = regularization_value

        for units, activation in zip(hidden_units, activations):
            self.hidden_layers.append(
                tf.keras.layers.Dense(units, activation=activation)
            )
            self.hidden_layers.append(tf.keras.layers.Dropout(dropout_rate))
        
        self.output_layer = tf.keras.layers.Dense(output_dim, activation='softmax')  

    def call(self, inputs):
        x = inputs
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x)
    
    def compute_regularization_loss(self):
        regularization_loss = 0.0
        if self.regularization_type:
            for layer in self.hidden_layers:
                if isinstance(layer, tf.keras.layers.Dense):
                    weights = layer.kernel
                    if self.regularization_type == 'l1':
                        regularization_loss += tf.reduce_sum(tf.abs(weights)) * self.regularization_value
                    elif self.regularization_type == 'l2':
                        regularization_loss += tf.reduce_sum(tf.square(weights)) * self.regularization_value
        return regularization_loss

def load_fold_data(fold_index, files):
    # Adjust fold_index to be zero-based
    data = pd.read_csv(files[fold_index-1]).to_numpy()

    if np.isnan(data).any():
        print(f"Warning: Missing values detected in file {files[fold_index - 1]}.")
        data = data[~np.isnan(data).any(axis=1)]  # Remove rows with NaN values
    X = data[:, :-1]  # Features
    y = data[:, -1].astype(int)  # Labels
    if (y < 0).any() or (y >= 10).any():
        raise ValueError(f"Invalid label values detected in file {files[fold_index - 1]}. Labels: {np.unique(y)}")
    return X, y

files = [f'datasets/urbansounds_features_fold{i}.csv' for i in range(1, 11)]

# Define the test fold
fold_test = 1
X_test, y_test = load_fold_data(fold_test, files)

# Define the training folds
X_train, y_train = [], []
for i in range(1, 11):  # Total of 10 folds
    if i != fold_test:
        X_temp, y_temp = load_fold_data(i, files)
        X_train.append(X_temp)
        y_train.append(y_temp)

# Concatenate the training data
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)

# Hyperparameters
best_config = {
    'hidden_units': [256, 128, 64],
    'activations': ['relu', 'relu', 'relu'],
    'dropout_rate': 0.3,
    'batch_size': 64,
    'epochs': 20,
    'learning_rate': 0.0001,
    'regularization_type': None,
    'regularization_value': 0.01
}

# Initialize and train the model
model = MLP(
    input_dim=X_train.shape[1],
    output_dim=10,  # Classes from 0 to 9
    hidden_units=best_config['hidden_units'],
    dropout_rate=best_config['dropout_rate'],
    activations=best_config['activations']
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=best_config['learning_rate']),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

# Example validation split
X_val, y_val = X_train[:len(X_train)//10], y_train[:len(y_train)//10]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=best_config['batch_size'],
    epochs=best_config['epochs']
)


Epoch 1/20
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 18ms/step - accuracy: 0.1474 - loss: 2.2515 - val_accuracy: 0.3350 - val_loss: 2.1373
Epoch 2/20
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.3759 - loss: 2.0446 - val_accuracy: 0.4127 - val_loss: 1.9569
Epoch 3/20
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.4481 - loss: 1.8074 - val_accuracy: 0.3822 - val_loss: 1.7841
Epoch 4/20
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.4825 - loss: 1.6219 - val_accuracy: 0.4854 - val_loss: 1.6441
Epoch 5/20
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5295 - loss: 1.4747 - val_accuracy: 0.5108 - val_loss: 1.5327
Epoch 6/20
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5575 - loss: 1.3904 - val_accuracy: 0.5299 - val_loss: 1.4604
Epoch 7/20
[1m123/123

In [33]:
def deepfool_mlp(model, x0, y0, max_iter=50, epsilon=1e-6):
    """
    Implements the DeepFool adversarial attack for a given model and input.
    """
    x_adv = tf.identity(x0)  # Copy input tensor for adversarial manipulation
    logits = model(x_adv)  # Get model predictions
    pred_label = tf.argmax(logits, axis=-1).numpy()[0]
    
    if pred_label != y0:
        return x0.numpy()  # Return original input if already misclassified

    for i in range(max_iter):
        with tf.GradientTape() as tape:
            tape.watch(x_adv)
            logits = model(x_adv)

        gradients = tape.gradient(logits, x_adv).numpy()
        logits = logits.numpy()[0]
        current_label = np.argmax(logits)
        if current_label != y0:
            break  # Misclassification achieved

        w = gradients - gradients[y0]
        f = logits - logits[y0]

        perturbations = []
        for k in range(len(logits)):
            if k != y0:
                norm_w = np.linalg.norm(w[k]) + epsilon
                perturbations.append((abs(f[k]) / norm_w, k))

        perturbations.sort()
        r_min, k_min = perturbations[0]
        x_adv += (1 + epsilon) * tf.convert_to_tensor(r_min * w[k_min], dtype=tf.float32)

    return x_adv.numpy()

def cross_validation_mlp_deepfool(datasets, model_builder, params):
    accuracy_values = []
    loss_values = []
    robustness_values = []

    for i, test_set in enumerate(datasets):
        print(f"=== Fold {i+1} ===")

        # Prepare the train, validation, and test splits
        validation_set = datasets[(i + 1) % len(datasets)]
        train_set = pd.concat([datasets[j] for j in range(len(datasets)) if j != i and j != (i + 1) % len(datasets)])

        X_train, y_train = train_set.iloc[:, :-1].values, train_set.iloc[:, -1].values
        X_val, y_val = validation_set.iloc[:, :-1].values, validation_set.iloc[:, -1].values
        X_test, y_test = test_set.iloc[:, :-1].values, test_set.iloc[:, -1].values

        # Build and train the model
        model = model_builder(
            input_dim=X_train.shape[1],
            output_dim=10,  # Classes from 0 to 9
            hidden_units=best_config['hidden_units'],
            dropout_rate=best_config['dropout_rate'],
            activations=best_config['activations']
        )

        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=params['epochs'],
            batch_size=params['batch_size'],
            verbose=1
        )

        # Evaluate the model on test data
        fold_loss, fold_accuracy = model.evaluate(X_test, y_test, verbose=0)
        accuracy_values.append(fold_accuracy)
        loss_values.append(fold_loss)

        # Evaluate robustness using DeepFool
        adversarial_success = 0
        for idx in range(len(X_test)):
            x0 = np.expand_dims(X_test[idx], axis=0)
            y0 = y_test[idx]
            x_adv = deepfool_mlp(model, x0, y0)
            adv_pred = tf.argmax(model(x_adv), axis=-1).numpy()[0]

            if adv_pred != y0:
                adversarial_success += 1

        robustness = 1 - (adversarial_success / len(X_test))
        robustness_values.append(robustness)
        print(f"Robustness for fold {i+1}: {robustness:.4f}")

    return accuracy_values, loss_values, robustness_values


In [29]:
from tensorflow.keras import models, layers, regularizers

def build_mlp_model(input_shape, hidden_units, learning_rate, dropout_rate):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))

    # Add hidden layers
    for units in hidden_units:
        model.add(layers.Dense(units, activation='relu', 
                               kernel_regularizer=regularizers.l2(l2_lambda)))
        model.add(layers.Dropout(dropout_rate))
    
    # Add output layer
    model.add(layers.Dense(10, activation='softmax'))  # Assuming 10 classes

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=best_config['learning_rate']),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )


    return model


In [30]:
# Hyperparameters
params = {
    'hidden_units': [256, 128, 64],
    'activations': ['relu', 'relu', 'relu'],
    'dropout_rate': 0.3,
    'batch_size': 64,
    'epochs': 20,
    'learning_rate': 0.0001,
    'regularization_type': None,
    'regularization_value': 0.01
}

In [31]:
# Load all folds into a list
files = [f'datasets/urbansounds_features_fold{i}.csv' for i in range(1, 11)]
folds = [pd.read_csv(file) for file in files]

In [34]:
accuracy_values, loss_values, robustness_values = cross_validation_mlp_deepfool(
    datasets=folds,
    model_builder=build_mlp_model,
    params=params
)


=== Fold 1 ===


TypeError: build_mlp_model() missing 2 required positional arguments: 'l2_lambda' and 'optimizer'