In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pickle

2023-01-19 18:20:24.344094: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


Load datasets and standardize data

In [2]:
def load_dataset(dsname):
    metadata = pd.read_csv(f"/datasets/nicolas_facchinetti/processed_data/{dsname}/metadata_final.csv")
    x = pickle.load(open(f"/datasets/nicolas_facchinetti/processed_data/{dsname}/processed_data0.p", "rb" ))
    y = pickle.load(open(f"/datasets/nicolas_facchinetti/processed_data/{dsname}/processed_labels.p", "rb" ))
    return metadata, x, y

In [3]:
from sklearn.preprocessing import StandardScaler

datasets = ["emodb"]
scalers = {d: StandardScaler() for d in datasets}

data = {}
for d in datasets:
    md, x, y = load_dataset(d)
    data[d] = {}
    # standardize data
    n = x.shape
    data[d]["x"] = scalers[d].fit_transform(x.reshape(n[0],-1)).reshape(n)
    data[d]["y"] = y
    data[d]["metadata"] = md

Get train/test data

In [4]:
from sklearn.model_selection import train_test_split

train = {}
test = {}
for d in datasets:
    train[d] = {}
    test[d] = {}
    train_df, test_df = train_test_split(data[d]['metadata'], 
                                       test_size = 0.2, 
                                       random_state = 1938,
                                       stratify = data[d]['metadata']['label'])
    train_index = train_df.index
    test_index = test_df.index
    train[d]['x'] = data[d]['x'][train_index]
    train[d]['y'] = data[d]['y'][train_index]
    test[d]['x']= data[d]['x'][test_index]
    test[d]['y']= data[d]['y'][test_index]
    

In [5]:
data['emodb']['y'].shape, train['emodb']['y'].shape, test['emodb']['y'].shape

((4181, 5), (3344, 5), (837, 5))

Load best parameter for each dataset

In [6]:
import keras_tuner as kt
param = pickle.load(open('/datasets/nicolas_facchinetti/param.p', "rb" ))
param  

{'emodb': {'dropout': 0.3,
  'lstm_dropout': 0.2,
  'learning_rate': 0.001,
  'batch_size': 32,
  'tuner/epochs': 120,
  'tuner/initial_epoch': 8,
  'tuner/bracket': 1,
  'tuner/round': 1,
  'tuner/trial_id': '0050',
  'lstm_units': 2},
 'emovo': {'dropout': 0.3,
  'lstm_dropout': 0.0,
  'learning_rate': 0.0001,
  'batch_size': 8,
  'tuner/epochs': 120,
  'tuner/initial_epoch': 8,
  'tuner/bracket': 1,
  'tuner/round': 1,
  'tuner/trial_id': '0001',
  'lstm_units': 2},
 'ravdess': {'dropout': 0.6,
  'lstm_dropout': 0.2,
  'learning_rate': 0.001,
  'batch_size': 128,
  'tuner/epochs': 120,
  'tuner/initial_epoch': 0,
  'tuner/bracket': 0,
  'tuner/round': 0,
  'lstm_units': 2}}

# Model definition

In [7]:
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D, LSTM, TimeDistributed, InputLayer, Reshape, BatchNormalization, Bidirectional

In [8]:
def get_m(best_param):
    # Define hyper model architecture
    m = Sequential([
        InputLayer(input_shape=(261,128,1)),
        Reshape((9,29,128,1)),
        TimeDistributed(Conv2D(16, kernel_size=(5,5), activation='relu')),
        TimeDistributed(BatchNormalization()),
        TimeDistributed(MaxPooling2D(pool_size=(4,4), strides=2)),

        TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu')),
        TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=2)),

        TimeDistributed(Conv2D(64, kernel_size=(3,3), activation='relu')),
        TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=1)),
        TimeDistributed(Flatten()),
        
        Dropout(best_param['dropout']),
        
        Bidirectional(LSTM(256, dropout=best_param['lstm_dropout'], return_sequences=False)),
        Dense(5, activation='softmax')
    ])
    
    lr = best_param['learning_rate']
    
    # Define optimizer, loss, and metrics
    m.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
              loss='categorical_crossentropy',
              metrics=["accuracy"])
    return m

Models training

In [9]:
models = {}
for d in param:
    m = get_m(param[d])

    earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=6, verbose=1, min_delta=1e-4, mode='min')
    
    print(f'Training on {d} dataset')
    history = m.fit(train[d]['x'], train[d]['y'],
                    epochs=100,
                    batch_size=param[d]["batch_size"],
                    validation_split=0.2,
                    callbacks=[earlyStopping, reduce_lr_loss], verbose=2)
    print("\n\n\n")
    
    models[d] = m

2023-01-19 18:20:27.413951: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-01-19 18:20:27.415332: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-01-19 18:20:27.562959: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:15:00.0 name: NVIDIA TITAN V computeCapability: 7.0
coreClock: 1.455GHz coreCount: 80 deviceMemorySize: 11.77GiB deviceMemoryBandwidth: 607.97GiB/s
2023-01-19 18:20:27.562995: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-01-19 18:20:27.564942: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2023-01-19 18:20:27.565037: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2023-0

Training on emodb dataset


2023-01-19 18:20:28.822705: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-01-19 18:20:28.837891: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3301490000 Hz


Epoch 1/100


2023-01-19 18:20:30.609941: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2023-01-19 18:20:30.795494: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


84/84 - 5s - loss: 1.2364 - accuracy: 0.4968 - val_loss: 2.3020 - val_accuracy: 0.2706
Epoch 2/100
84/84 - 1s - loss: 0.7368 - accuracy: 0.7107 - val_loss: 1.2075 - val_accuracy: 0.5755
Epoch 3/100
84/84 - 1s - loss: 0.5353 - accuracy: 0.7989 - val_loss: 0.8006 - val_accuracy: 0.6816
Epoch 4/100
84/84 - 1s - loss: 0.3829 - accuracy: 0.8665 - val_loss: 0.5280 - val_accuracy: 0.7952
Epoch 5/100
84/84 - 1s - loss: 0.2867 - accuracy: 0.9002 - val_loss: 0.4139 - val_accuracy: 0.8371
Epoch 6/100
84/84 - 1s - loss: 0.2096 - accuracy: 0.9271 - val_loss: 0.3728 - val_accuracy: 0.8550
Epoch 7/100
84/84 - 1s - loss: 0.1875 - accuracy: 0.9376 - val_loss: 0.3299 - val_accuracy: 0.8714
Epoch 8/100
84/84 - 1s - loss: 0.1275 - accuracy: 0.9622 - val_loss: 0.3220 - val_accuracy: 0.8685
Epoch 9/100
84/84 - 1s - loss: 0.0850 - accuracy: 0.9802 - val_loss: 0.2602 - val_accuracy: 0.9013
Epoch 10/100
84/84 - 1s - loss: 0.0664 - accuracy: 0.9806 - val_loss: 0.2627 - val_accuracy: 0.9088
Epoch 11/100
84/84 - 

KeyError: 'emovo'

Model testing

In [10]:
for d in models:
    loss, accuracy = models[d].evaluate(test[d]['x'], test[d]['y'], batch_size=param[d]["batch_size"])
    print(f'For {d} test loss: {loss} accuracy: {accuracy}')

For emodb test loss: 0.33246275782585144 accuracy: 0.8924731016159058


# Model attack

Make ART model

In [11]:
from art.estimators.classification import TensorFlowV2Classifier




In [12]:
models['emodb'].summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 9, 29, 128, 1)     0         
_________________________________________________________________
time_distributed (TimeDistri (None, 9, 25, 124, 16)    416       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 9, 25, 124, 16)    64        
_________________________________________________________________
time_distributed_2 (TimeDist (None, 9, 11, 61, 16)     0         
_________________________________________________________________
time_distributed_3 (TimeDist (None, 9, 9, 59, 32)      4640      
_________________________________________________________________
time_distributed_4 (TimeDist (None, 9, 4, 29, 32)      0         
_________________________________________________________________
time_distributed_5 (TimeDist (None, 9, 2, 27, 64)      1

In [14]:
classifiers = {d: TensorFlowV2Classifier(models[d],
                                         nb_classes=5,
                                         input_shape=(261,128,1),
                                         loss_object=tf.keras.losses.SparseCategoricalCrossentropy(),
                                         channels_first=False)
               for d in models}

In [15]:
from art.attacks.evasion import FastGradientMethod
attack_fgsm = FastGradientMethod(estimator=classifiers['emodb'], eps=0.3)

In [16]:
x_test_adv = attack_fgsm.generate(test['emodb']['x'])

In [19]:
for d in models:
    loss, accuracy = models[d].evaluate(x_test_adv, test[d]['y'], batch_size=param[d]["batch_size"])
    perturbation = np.mean(np.abs((x_test_adv - test[d]['x'])))
    print(f'For {d} test loss: {loss} accuracy: {accuracy}')
    print('Average perturbation: {:4.2f}'.format(perturbation))

For emodb test loss: 5.44273567199707 accuracy: 0.15292711555957794
Average perturbation: 0.25


In [24]:
from art.attacks.evasion import CarliniLInfMethod

# tune LR, init and largest cost to reduce time
attack_cw = CarliniLInfMethod(classifier=classifiers['emodb'],
                              max_iter=10,
                              learning_rate=0.01,
                              initial_const=1e0,
                              largest_const=2e0,
                              verbose=True)

x_test_adv = attack_cw.generate(test['emodb']['x'])

C&W L_inf:   0%|          | 0/837 [00:00<?, ?it/s]

In [25]:
for d in models:
    loss, accuracy = models[d].evaluate(x_test_adv, test[d]['y'], batch_size=param[d]["batch_size"])
    perturbation = np.mean(np.abs((x_test_adv - test[d]['x'])))
    print(f'For {d} test loss: {loss} accuracy: {accuracy}')
    print('Average perturbation: {:4.2f}'.format(perturbation))

For emodb test loss: 2.413248062133789 accuracy: 0.07287932932376862
Average perturbation: 0.05
