In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pickle

2023-01-23 16:40:23.489917: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


Load datasets and standardize data

In [2]:
def load_dataset(dsname):
    metadata = pd.read_csv(f"/datasets/nicolas_facchinetti/processed_data/{dsname}/metadata_final.csv")
    x = pickle.load(open(f"/datasets/nicolas_facchinetti/processed_data/{dsname}/processed_data0.p", "rb" ))
    y = pickle.load(open(f"/datasets/nicolas_facchinetti/processed_data/{dsname}/processed_labels.p", "rb" ))
    return metadata, x, y

In [3]:
from sklearn.preprocessing import StandardScaler

datasets = ["emodb", "emovo", "ravdess"]
scalers = {d: StandardScaler() for d in datasets}

data = {}
for d in datasets:
    md, x, y = load_dataset(d)
    data[d] = {}
    # standardize data
    n = x.shape
    data[d]["x"] = scalers[d].fit_transform(x.reshape(n[0],-1)).reshape(n)
    data[d]["y"] = y
    data[d]["metadata"] = md

In [38]:
data['emodb']['metadata'].head()

Unnamed: 0,path,filename,chunk,label,actor,gender,mod
0,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f,0
1,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f,0
2,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f,1
3,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,1,sad,13,f,1
4,/datasets/nicolas_facchinetti/emodb/data/13a05...,13a05Tc.wav,0,sad,13,f,1


Get train/test data

In [65]:
from sklearn.model_selection import train_test_split

train = {}
test = {}
test_md = {}
for d in datasets:
    train[d] = {}
    test[d] = {}
    train_df, test_df = train_test_split(data[d]['metadata'], 
                                       test_size = 0.2, 
                                       random_state = 1938,
                                       stratify = data[d]['metadata']['label'])
    test_md[d] = test_df.reset_index()
    train_index = train_df.index
    test_index = test_df.index
    print(f'For {d}:\ttrain size {len(train_index)}, test size {len(test_index)}')
    train[d]['x'] = data[d]['x'][train_index]
    train[d]['y'] = data[d]['y'][train_index]
    test[d]['x']= data[d]['x'][test_index]
    test[d]['y']= data[d]['y'][test_index]
    

For emodb:	train size 3344, test size 837
For emovo:	train size 3944, test size 987
For ravdess:	train size 7161, test size 1791


In [66]:
for d in datasets:
    print(f'For {d} the gender balance in test set is\n{test_md[d].gender.value_counts()}\n')

For emodb the gender balance in test set is
f    522
m    315
Name: gender, dtype: int64

For emovo the gender balance in test set is
f    499
m    488
Name: gender, dtype: int64

For ravdess the gender balance in test set is
f    896
m    895
Name: gender, dtype: int64



In [67]:
test_md['emodb'].head()

Unnamed: 0,index,path,filename,chunk,label,actor,gender,mod
0,2272,/datasets/nicolas_facchinetti/emodb/data/08b03...,08b03Tc.wav,2,sad,8,f,0
1,1115,/datasets/nicolas_facchinetti/emodb/data/14a07...,14a07Tc.wav,0,sad,14,f,2
2,4069,/datasets/nicolas_facchinetti/emodb/data/16b01...,16b01Tb.wav,1,sad,16,f,2
3,3325,/datasets/nicolas_facchinetti/emodb/data/16a07...,16a07Fb.wav,0,happy,16,f,2
4,3585,/datasets/nicolas_facchinetti/emodb/data/14a07...,14a07Fd.wav,0,happy,14,f,1


In [99]:
def get_m_f_data(md):
    m = md[md.gender=='m'].index
    f = md[md.gender=='f'].index
    return m, f

In [103]:
test_mf = {}
for d in datasets:
    data = get_m_f_data(test_md[d])
    print(f"For {d}\n\tmale:{data[0].shape[0]}\n\tfemale:{data[1].shape[0]}\n")

For emodb
	male:315
	female:522

For emovo
	male:488
	female:499

For ravdess
	male:895
	female:896



Load best parameter for each dataset

In [6]:
import keras_tuner as kt
param = pickle.load(open('/datasets/nicolas_facchinetti/param.p', "rb" ))
param  

{'emodb': {'dropout': 0.3,
  'lstm_dropout': 0.2,
  'learning_rate': 0.001,
  'batch_size': 32,
  'tuner/epochs': 120,
  'tuner/initial_epoch': 8,
  'tuner/bracket': 1,
  'tuner/round': 1,
  'tuner/trial_id': '0050',
  'lstm_units': 2},
 'emovo': {'dropout': 0.3,
  'lstm_dropout': 0.0,
  'learning_rate': 0.0001,
  'batch_size': 8,
  'tuner/epochs': 120,
  'tuner/initial_epoch': 8,
  'tuner/bracket': 1,
  'tuner/round': 1,
  'tuner/trial_id': '0001',
  'lstm_units': 2},
 'ravdess': {'dropout': 0.6,
  'lstm_dropout': 0.2,
  'learning_rate': 0.001,
  'batch_size': 128,
  'tuner/epochs': 120,
  'tuner/initial_epoch': 0,
  'tuner/bracket': 0,
  'tuner/round': 0,
  'lstm_units': 2}}

# Model definition

In [7]:
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D, LSTM, TimeDistributed, InputLayer, Reshape, BatchNormalization, Bidirectional

In [8]:
def get_m(best_param):
    # Define hyper model architecture
    m = Sequential([
        InputLayer(input_shape=(261,128,1)),
        Reshape((9,29,128,1)),
        TimeDistributed(Conv2D(16, kernel_size=(5,5), activation='relu')),
        TimeDistributed(BatchNormalization()),
        TimeDistributed(MaxPooling2D(pool_size=(4,4), strides=2)),

        TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu')),
        TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=2)),

        TimeDistributed(Conv2D(64, kernel_size=(3,3), activation='relu')),
        TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=1)),
        TimeDistributed(Flatten()),
        
        Dropout(best_param['dropout']),
        
        Bidirectional(LSTM(256, dropout=best_param['lstm_dropout'], return_sequences=False)),
        Dense(5, activation='softmax')
    ])
    
    lr = best_param['learning_rate']
    
    # Define optimizer, loss, and metrics
    m.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
              loss='categorical_crossentropy',
              metrics=["accuracy"])
    return m

Models training

In [9]:
models = {}
for d in param:
    m = get_m(param[d])

    earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=6, verbose=1, min_delta=1e-4, mode='min')
    
    print(f'Training on {d} dataset')
    history = m.fit(train[d]['x'], train[d]['y'],
                    epochs=100,
                    batch_size=param[d]["batch_size"],
                    validation_split=0.2,
                    callbacks=[earlyStopping, reduce_lr_loss], verbose=2)
    print("\n\n\n")
    
    models[d] = m

2023-01-23 16:41:07.736378: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-01-23 16:41:07.737499: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-01-23 16:41:07.856918: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:15:00.0 name: NVIDIA TITAN V computeCapability: 7.0
coreClock: 1.455GHz coreCount: 80 deviceMemorySize: 11.77GiB deviceMemoryBandwidth: 607.97GiB/s
2023-01-23 16:41:07.856963: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-01-23 16:41:07.859403: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2023-01-23 16:41:07.859510: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2023-0

Training on emodb dataset


2023-01-23 16:41:09.111568: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-01-23 16:41:09.127282: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3301490000 Hz


Epoch 1/100


2023-01-23 16:41:10.894909: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2023-01-23 16:41:11.111297: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


84/84 - 5s - loss: 1.2471 - accuracy: 0.5058 - val_loss: 0.9726 - val_accuracy: 0.6009
Epoch 2/100
84/84 - 1s - loss: 0.7525 - accuracy: 0.6972 - val_loss: 1.0468 - val_accuracy: 0.5934
Epoch 3/100
84/84 - 1s - loss: 0.5407 - accuracy: 0.7862 - val_loss: 0.6827 - val_accuracy: 0.7085
Epoch 4/100
84/84 - 1s - loss: 0.3898 - accuracy: 0.8579 - val_loss: 0.4203 - val_accuracy: 0.8251
Epoch 5/100
84/84 - 1s - loss: 0.2786 - accuracy: 0.9021 - val_loss: 0.3314 - val_accuracy: 0.8804
Epoch 6/100
84/84 - 1s - loss: 0.2202 - accuracy: 0.9264 - val_loss: 0.3090 - val_accuracy: 0.8819
Epoch 7/100
84/84 - 1s - loss: 0.1492 - accuracy: 0.9559 - val_loss: 0.2926 - val_accuracy: 0.8834
Epoch 8/100
84/84 - 1s - loss: 0.0962 - accuracy: 0.9783 - val_loss: 0.2527 - val_accuracy: 0.9013
Epoch 9/100
84/84 - 1s - loss: 0.0726 - accuracy: 0.9839 - val_loss: 0.2586 - val_accuracy: 0.8924
Epoch 10/100
84/84 - 1s - loss: 0.0605 - accuracy: 0.9832 - val_loss: 0.2546 - val_accuracy: 0.9088
Epoch 11/100
84/84 - 

Epoch 20/100
45/45 - 2s - loss: 0.0896 - accuracy: 0.9733 - val_loss: 0.3342 - val_accuracy: 0.8890
Epoch 21/100
45/45 - 2s - loss: 0.0755 - accuracy: 0.9787 - val_loss: 0.2963 - val_accuracy: 0.9065
Epoch 22/100
45/45 - 2s - loss: 0.0693 - accuracy: 0.9820 - val_loss: 0.3451 - val_accuracy: 0.8939
Epoch 23/100
45/45 - 2s - loss: 0.0657 - accuracy: 0.9822 - val_loss: 0.3116 - val_accuracy: 0.9093
Epoch 24/100
45/45 - 2s - loss: 0.0603 - accuracy: 0.9841 - val_loss: 0.3018 - val_accuracy: 0.9093
Epoch 25/100
45/45 - 2s - loss: 0.0512 - accuracy: 0.9873 - val_loss: 0.2911 - val_accuracy: 0.9107
Epoch 26/100
45/45 - 2s - loss: 0.0478 - accuracy: 0.9878 - val_loss: 0.2759 - val_accuracy: 0.9135
Epoch 27/100
45/45 - 2s - loss: 0.0516 - accuracy: 0.9853 - val_loss: 0.3202 - val_accuracy: 0.9051
Epoch 28/100
45/45 - 2s - loss: 0.0515 - accuracy: 0.9843 - val_loss: 0.3186 - val_accuracy: 0.8953
Epoch 29/100
45/45 - 2s - loss: 0.0464 - accuracy: 0.9869 - val_loss: 0.3003 - val_accuracy: 0.9114


Model testing

In [10]:
for d in models:
    loss, accuracy = models[d].evaluate(test[d]['x'], test[d]['y'], batch_size=param[d]["batch_size"])
    print(f'For {d} test loss: {loss} accuracy: {accuracy}')

For emodb test loss: 0.3230094611644745 accuracy: 0.9008363485336304
For emovo test loss: 0.3751484751701355 accuracy: 0.890577495098114
For ravdess test loss: 0.28232303261756897 accuracy: 0.9212730526924133


# Model attack

Make ART model

In [11]:
from art.estimators.classification import TensorFlowV2Classifier



In [12]:
models['emodb'].summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 9, 29, 128, 1)     0         
_________________________________________________________________
time_distributed (TimeDistri (None, 9, 25, 124, 16)    416       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 9, 25, 124, 16)    64        
_________________________________________________________________
time_distributed_2 (TimeDist (None, 9, 11, 61, 16)     0         
_________________________________________________________________
time_distributed_3 (TimeDist (None, 9, 9, 59, 32)      4640      
_________________________________________________________________
time_distributed_4 (TimeDist (None, 9, 4, 29, 32)      0         
_________________________________________________________________
time_distributed_5 (TimeDist (None, 9, 2, 27, 64)      1

In [13]:
classifiers = {d: TensorFlowV2Classifier(models[d],
                                         nb_classes=5,
                                         input_shape=(261,128,1),
                                         loss_object=tf.keras.losses.SparseCategoricalCrossentropy(),
                                         channels_first=False)
               for d in models}

In [110]:
param['emodb']["batch_size"]

32

In [134]:
def attack(model, method, d, md, bs):
    x_adv = method.generate(d['x'], verbose=True)
    
    mi, fi = get_m_f_data(md)
    
    def compute_attack(model, adv, x, y):
        loss, accuracy = model.evaluate(adv, y, batch_size=bs, verbose=0)
        perturbation = np.mean(np.abs((adv - x)))
        print(f'\t\tloss: {loss:.5f}, accuracy: {accuracy:.5f}, avg perturbation: {perturbation:.5f}\n')
        return loss, accuracy, perturbation
    
    print('\tWhole test set')
    compute_attack(model, x_adv, d['x'], d['y'])
    print('\tOnly male data')
    compute_attack(model, x_adv[mi], d['x'][mi], d['y'][mi])
    print('\tOnly female data')
    compute_attack(model, x_adv[fi], d['x'][fi], d['y'][fi])

In [135]:
from art.attacks.evasion import FastGradientMethod

for d in models:
    print(f'Attacking {d} with FGSM')
    fgsm = FastGradientMethod(estimator=classifiers[d], eps=0.3)

    attack(models[d], fgsm, test[d], test_md[d], param[d]["batch_size"])

Attacking emodb with FGSM
	Whole test set
		loss: 6.18733 accuracy: 0.14456, avg perturbation: 0.25557

	Only male data
		loss: 6.89885 accuracy: 0.12063, avg perturbation: 0.25698

	Only female data
		loss: 5.75796 accuracy: 0.15900, avg perturbation: 0.25473

Attacking emovo with FGSM
	Whole test set
		loss: 10.05832 accuracy: 0.07700, avg perturbation: 0.25641

	Only male data
		loss: 10.67248 accuracy: 0.07992, avg perturbation: 0.25666

	Only female data
		loss: 9.45770 accuracy: 0.07415, avg perturbation: 0.25617

Attacking ravdess with FGSM
	Whole test set
		loss: 7.39520 accuracy: 0.15075, avg perturbation: 0.23744

	Only male data
		loss: 7.45760 accuracy: 0.14972, avg perturbation: 0.22960

	Only female data
		loss: 7.33287 accuracy: 0.15179, avg perturbation: 0.24527



In [140]:
from art.attacks.evasion import CarliniLInfMethod

for d in models:
    print(f'Attacking {d} with C&W LInf')
    cw = CarliniLInfMethod(classifier=classifiers[d],
                              max_iter=10,
                              learning_rate=0.01,
                              initial_const=1e0,
                              largest_const=2e0,
                              verbose=True)

    attack(models[d], cw, test[d], test_md[d], param[d]["batch_size"])

Attacking emodb with C&W LInf


C&W L_inf:   0%|          | 0/837 [00:00<?, ?it/s]

	Whole test set
		loss: 2.51218 accuracy: 0.06930, avg perturbation: 0.05085

	Only male data
		loss: 2.73026 accuracy: 0.08254, avg perturbation: 0.04878

	Only female data
		loss: 2.38058 accuracy: 0.06130, avg perturbation: 0.05210

Attacking emovo with C&W LInf


C&W L_inf:   0%|          | 0/987 [00:00<?, ?it/s]

	Whole test set
		loss: 3.90252 accuracy: 0.07700, avg perturbation: 0.02697

	Only male data
		loss: 4.17128 accuracy: 0.07172, avg perturbation: 0.02658

	Only female data
		loss: 3.63968 accuracy: 0.08216, avg perturbation: 0.02736

Attacking ravdess with C&W LInf


C&W L_inf:   0%|          | 0/1791 [00:00<?, ?it/s]

	Whole test set
		loss: 3.25371 accuracy: 0.05583, avg perturbation: 0.02460

	Only male data
		loss: 3.39439 accuracy: 0.06257, avg perturbation: 0.02272

	Only female data
		loss: 3.11318 accuracy: 0.04911, avg perturbation: 0.02647



In [136]:
from art.attacks.evasion import FastGradientMethod

for d in models:
    attack_fgsm = FastGradientMethod(estimator=classifiers[d], eps=0.3)

    x_test_adv = attack_fgsm.generate(test[d]['x'], verbose=True)
    loss, accuracy = models[d].evaluate(x_test_adv, test[d]['y'], batch_size=param[d]["batch_size"], verbose=0)
    perturbation = np.mean(np.abs((x_test_adv - test[d]['x'])))
    print(f'For {d} test loss: {loss} accuracy: {accuracy}')
    print('Average perturbation: {:4.2f}\n'.format(perturbation))

SyntaxError: unmatched ')' (2928560334.py, line 9)

In [24]:
from art.attacks.evasion import CarliniLInfMethod

# tune LR, init and largest cost to reduce time
attack_cw = CarliniLInfMethod(classifier=classifiers['emodb'],
                              max_iter=10,
                              learning_rate=0.01,
                              initial_const=1e0,
                              largest_const=2e0,
                              verbose=True)

x_test_adv = attack_cw.generate(test['emodb']['x'])

C&W L_inf:   0%|          | 0/837 [00:00<?, ?it/s]

In [25]:
for d in models:
    loss, accuracy = models[d].evaluate(x_test_adv, test[d]['y'], batch_size=param[d]["batch_size"])
    perturbation = np.mean(np.abs((x_test_adv - test[d]['x'])))
    print(f'For {d} test loss: {loss} accuracy: {accuracy}')
    print('Average perturbation: {:4.2f}'.format(perturbation))

For emodb test loss: 2.413248062133789 accuracy: 0.07287932932376862
Average perturbation: 0.05
