# Import datasets

In [1]:
import numpy as np
import pandas as pd
def load_dataset(name):
    x = np.load(f'/datasets/nicolas_facchinetti/processed_data/{name}/x_trim_repeated_im.npy')
    y = np.load(f'/datasets/nicolas_facchinetti/processed_data/{name}/y_trim_repeated_im.npy')
    metadata = pd.read_csv(f'/datasets/nicolas_facchinetti/processed_data/{name}/trim_repeated_metadata.csv')
    return x, y, metadata

In [2]:
datasets = ["emodb", "emovo", "ravdess"]
data = {}
for d in datasets:
    x, y, md = load_dataset(d)
    data[d] = {}
    data[d]["x"] = x
    data[d]["y"] = y
    data[d]["metadata"] = md

In [3]:
data

{'emodb': {'x': array([[[[0.        , 0.        , 0.00257353],
           [0.        , 0.        , 0.00882353],
           [0.        , 0.        , 0.00882353],
           ...,
           [0.35588235, 0.1       , 0.35588235],
           [0.5382353 , 0.16176471, 0.32352942],
           [0.52003676, 0.15514706, 0.31194854]],
  
          [[0.        , 0.        , 0.00514706],
           [0.        , 0.        , 0.01568628],
           [0.        , 0.        , 0.01568628],
           ...,
           [0.5058824 , 0.14509805, 0.5058824 ],
           [0.7607843 , 0.23137255, 0.45882353],
           [0.73651963, 0.22254902, 0.44338235]],
  
          [[0.        , 0.        , 0.00514706],
           [0.        , 0.        , 0.01568628],
           [0.        , 0.        , 0.01568628],
           ...,
           [0.49117646, 0.1392157 , 0.5088235 ],
           [0.85490197, 0.28431374, 0.41470587],
           [0.82732844, 0.27383578, 0.4009191 ]],
  
          ...,
  
          [[0.        , 0.

# Define architectures

In [9]:
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential 
from tensorflow.keras.callbacks import EarlyStopping 
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D, LSTM, TimeDistributed, InputLayer, Reshape, BatchNormalization

Search the best architecture

In [61]:
archs = []

In [62]:
arch0 = Sequential([
    InputLayer(input_shape=(256,256,3)),
    Reshape((16,16,256,3)),
    BatchNormalization(),
    TimeDistributed(Conv2D(16, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2))),
    TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),

    TimeDistributed(Flatten()),
    Dropout(0.2),
    LSTM(128, return_sequences=False),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
arch0.summary()

archs.append(arch0.get_config())

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_24 (Reshape)         (None, 16, 16, 256, 3)    0         
_________________________________________________________________
batch_normalization_24 (Batc (None, 16, 16, 256, 3)    12        
_________________________________________________________________
time_distributed_144 (TimeDi (None, 16, 14, 254, 16)   448       
_________________________________________________________________
time_distributed_145 (TimeDi (None, 16, 7, 127, 16)    0         
_________________________________________________________________
time_distributed_146 (TimeDi (None, 16, 5, 125, 32)    4640      
_________________________________________________________________
time_distributed_147 (TimeDi (None, 16, 2, 62, 32)     0         
_________________________________________________________________
time_distributed_148 (TimeDi (None, 16, 3968)        

In [63]:
arch1 = Sequential([
    InputLayer(input_shape=(256,256,3)),
    Reshape((16,16,256,3)),
    BatchNormalization(),
    TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2))),
    TimeDistributed(Conv2D(64, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),

    TimeDistributed(Flatten()),
    Dropout(0.2),
    LSTM(128, return_sequences=False),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
arch1.summary()

archs.append(arch1.get_config())

Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_25 (Reshape)         (None, 16, 16, 256, 3)    0         
_________________________________________________________________
batch_normalization_25 (Batc (None, 16, 16, 256, 3)    12        
_________________________________________________________________
time_distributed_149 (TimeDi (None, 16, 14, 254, 32)   896       
_________________________________________________________________
time_distributed_150 (TimeDi (None, 16, 7, 127, 32)    0         
_________________________________________________________________
time_distributed_151 (TimeDi (None, 16, 5, 125, 64)    18496     
_________________________________________________________________
time_distributed_152 (TimeDi (None, 16, 2, 62, 64)     0         
_________________________________________________________________
time_distributed_153 (TimeDi (None, 16, 7936)        

In [64]:
arch2 = Sequential([
    InputLayer(input_shape=(256,256,3)),
    Reshape((16,16,256,3)),
    BatchNormalization(),
    TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2))),
    TimeDistributed(Conv2D(64, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),

    TimeDistributed(Flatten()),
    Dropout(0.2),
    LSTM(256, return_sequences=False),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
arch2.summary()

archs.append(arch2.get_config())

Model: "sequential_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_26 (Reshape)         (None, 16, 16, 256, 3)    0         
_________________________________________________________________
batch_normalization_26 (Batc (None, 16, 16, 256, 3)    12        
_________________________________________________________________
time_distributed_154 (TimeDi (None, 16, 14, 254, 32)   896       
_________________________________________________________________
time_distributed_155 (TimeDi (None, 16, 7, 127, 32)    0         
_________________________________________________________________
time_distributed_156 (TimeDi (None, 16, 5, 125, 64)    18496     
_________________________________________________________________
time_distributed_157 (TimeDi (None, 16, 2, 62, 64)     0         
_________________________________________________________________
time_distributed_158 (TimeDi (None, 16, 7936)        

In [65]:
arch3 = Sequential([
    InputLayer(input_shape=(256,256,3)),
    Reshape((8,32,256,3)),
    BatchNormalization(),
    TimeDistributed(Conv2D(16, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2))),
    TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),
    TimeDistributed(Conv2D(64, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),

    TimeDistributed(Flatten()),
    Dropout(0.2),
    LSTM(128, return_sequences=False),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
arch3.summary()

archs.append(arch3.get_config())

Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_27 (Reshape)         (None, 8, 32, 256, 3)     0         
_________________________________________________________________
batch_normalization_27 (Batc (None, 8, 32, 256, 3)     12        
_________________________________________________________________
time_distributed_159 (TimeDi (None, 8, 30, 254, 16)    448       
_________________________________________________________________
time_distributed_160 (TimeDi (None, 8, 15, 127, 16)    0         
_________________________________________________________________
time_distributed_161 (TimeDi (None, 8, 13, 125, 32)    4640      
_________________________________________________________________
time_distributed_162 (TimeDi (None, 8, 6, 62, 32)      0         
_________________________________________________________________
time_distributed_163 (TimeDi (None, 8, 4, 60, 64)    

In [66]:
arch4 = Sequential([
    InputLayer(input_shape=(256,256,3)),
    Reshape((8,32,256,3)),
    BatchNormalization(),
    TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2))),
    TimeDistributed(Conv2D(64, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),
    TimeDistributed(Conv2D(128, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),

    TimeDistributed(Flatten()),
    Dropout(0.2),
    LSTM(128, return_sequences=False),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
arch4.summary()

archs.append(arch4.get_config())

Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_28 (Reshape)         (None, 8, 32, 256, 3)     0         
_________________________________________________________________
batch_normalization_28 (Batc (None, 8, 32, 256, 3)     12        
_________________________________________________________________
time_distributed_166 (TimeDi (None, 8, 30, 254, 32)    896       
_________________________________________________________________
time_distributed_167 (TimeDi (None, 8, 15, 127, 32)    0         
_________________________________________________________________
time_distributed_168 (TimeDi (None, 8, 13, 125, 64)    18496     
_________________________________________________________________
time_distributed_169 (TimeDi (None, 8, 6, 62, 64)      0         
_________________________________________________________________
time_distributed_170 (TimeDi (None, 8, 4, 60, 128)   

In [67]:
arch5 = Sequential([
    InputLayer(input_shape=(256,256,3)),
    Reshape((8,32,256,3)),
    BatchNormalization(),
    TimeDistributed(Conv2D(32, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2))),
    TimeDistributed(Conv2D(64, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),
    TimeDistributed(Conv2D(128, kernel_size=(3,3), activation='relu', strides=(1))),
    TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=(2))),

    TimeDistributed(Flatten()),
    Dropout(0.2),
    LSTM(256, return_sequences=False),
    
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
arch5.summary()

archs.append(arch5.get_config())

Model: "sequential_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_29 (Reshape)         (None, 8, 32, 256, 3)     0         
_________________________________________________________________
batch_normalization_29 (Batc (None, 8, 32, 256, 3)     12        
_________________________________________________________________
time_distributed_173 (TimeDi (None, 8, 30, 254, 32)    896       
_________________________________________________________________
time_distributed_174 (TimeDi (None, 8, 15, 127, 32)    0         
_________________________________________________________________
time_distributed_175 (TimeDi (None, 8, 13, 125, 64)    18496     
_________________________________________________________________
time_distributed_176 (TimeDi (None, 8, 6, 62, 64)      0         
_________________________________________________________________
time_distributed_177 (TimeDi (None, 8, 4, 60, 128)   

In [68]:
archs

[{'name': 'sequential_24',
  'layers': [{'class_name': 'InputLayer',
    'config': {'batch_input_shape': (None, 256, 256, 3),
     'dtype': 'float32',
     'sparse': False,
     'ragged': False,
     'name': 'input_25'}},
   {'class_name': 'Reshape',
    'config': {'name': 'reshape_24',
     'trainable': True,
     'dtype': 'float32',
     'target_shape': (16, 16, 256, 3)}},
   {'class_name': 'BatchNormalization',
    'config': {'name': 'batch_normalization_24',
     'trainable': True,
     'dtype': 'float32',
     'axis': ListWrapper([4]),
     'momentum': 0.99,
     'epsilon': 0.001,
     'center': True,
     'scale': True,
     'beta_initializer': {'class_name': 'Zeros', 'config': {}},
     'gamma_initializer': {'class_name': 'Ones', 'config': {}},
     'moving_mean_initializer': {'class_name': 'Zeros', 'config': {}},
     'moving_variance_initializer': {'class_name': 'Ones', 'config': {}},
     'beta_regularizer': None,
     'gamma_regularizer': None,
     'beta_constraint': None,


# Search best architecturs for all datasets

In [4]:
def leave_one_speaker_out(data, n=5):
    speakers = data.actor.unique()
    # random select n actors as test
    selected = np.random.choice(speakers, size=n, replace=False)
    
    myCViterator = []
    for i in selected:
        trainIndices = data[data['actor']!=i ].index.values.astype(int)
        testIndices =  data[data['actor']==i ].index.values.astype(int)
        np.random.shuffle(trainIndices)
        np.random.shuffle(testIndices)
        myCViterator.append((trainIndices, testIndices))
    return myCViterator

In [6]:
leave_one_speaker_out(data["emodb"]["metadata"], n=3)

[(array([245, 231, 813, 179, 654, 359, 291, 809, 850, 645, 750, 667, 128,
          51, 222,  32, 120, 256, 763, 188,  92,  72, 848, 592, 354,  18,
         316, 851, 852, 146, 164, 759, 705, 284, 277, 760, 619, 281, 301,
         203, 198, 184, 325, 697, 790, 370,  39, 672, 806,   6, 605,  20,
         391, 386, 732, 133, 854,  49, 747, 258, 132, 397, 169, 235, 371,
         114, 273, 157,   8,   9, 630, 151, 204, 564,  57, 299, 167, 815,
         712, 782, 155, 749,  90, 703, 382, 228,  25,  33, 707, 124, 131,
         618, 118, 816, 621,  31,  12, 178, 669,  78, 347, 383, 388, 578,
         565, 376, 693, 141, 644, 716, 622, 700, 651,  67,  16,  50, 839,
         138, 206, 662, 224, 314, 824, 743, 608, 350, 220, 103, 398, 346,
         139, 629,  79, 623,  15, 845,   1, 561, 600, 684, 597, 838, 650,
          96, 315, 580,  68,  70, 385, 748,  37, 855, 246, 601, 797, 172,
         187, 802, 677, 257, 302, 710, 724, 822, 823, 358, 232,  89, 728,
         292, 596,  74, 582,  66, 690,

In [84]:
from sklearn.model_selection import cross_val_score
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

results_arch = {}
for count, a in enumerate(archs):
    print(f'Architecture {count} of {len(archs)-1}')
    
    def create_model():
        new_model = tensorflow.keras.Sequential.from_config(a)
        new_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return new_model
    
    model = KerasClassifier(build_fn=create_model, epochs=30, batch_size=32, verbose=0)
          
    results_arch[f"arch{count}"] = {}

    for d in data:
        print(f"\tTraining on {d}")
        x = data[d]["x"]
        y = data[d]["y"]
        metadata = data[d]["metadata"]
        res = cross_val_score(model, x, y, cv=leave_one_speaker_out(metadata, n=3))
        results_arch[f"arch{count}"][d] = res

Architecture 0 of 5
	Training on emodb


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
Traceback (most recent call last):
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1123, in f1_score
    return fbeta_score(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1261, in fbeta_score
    _, _, f, _ = precision_recall_fscore_sup

[CV] END .................................................... total time=  15.7s


Traceback (most recent call last):
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1123, in f1_score
    return fbeta_score(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1261, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklear

[CV] END .................................................... total time=  15.4s


Traceback (most recent call last):
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1123, in f1_score
    return fbeta_score(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1261, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklear

[CV] END .................................................... total time=  14.6s
	{'fit_time': array([15.35903835, 14.98572421, 14.26604462]), 'score_time': array([0.35453844, 0.36932445, 0.37788773]), 'test_f1_samples': array([nan, nan, nan]), 'train_f1_samples': array([nan, nan, nan]), 'test_accuracy': array([nan, nan, nan]), 'train_accuracy': array([nan, nan, nan]), 'test_roc_auc': array([nan, nan, nan]), 'train_roc_auc': array([nan, nan, nan])}
	Training on emovo


Traceback (most recent call last):
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1123, in f1_score
    return fbeta_score(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1261, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklear

[CV] END .................................................... total time=  14.9s


Traceback (most recent call last):
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1123, in f1_score
    return fbeta_score(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1261, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklear

[CV] END .................................................... total time=  15.8s


Traceback (most recent call last):
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1123, in f1_score
    return fbeta_score(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1261, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklear

[CV] END .................................................... total time=  15.3s
	{'fit_time': array([14.52608228, 15.44278026, 14.8459568 ]), 'score_time': array([0.37731171, 0.40298867, 0.42172623]), 'test_f1_samples': array([nan, nan, nan]), 'train_f1_samples': array([nan, nan, nan]), 'test_accuracy': array([nan, nan, nan]), 'train_accuracy': array([nan, nan, nan]), 'test_roc_auc': array([nan, nan, nan]), 'train_roc_auc': array([nan, nan, nan])}
	Training on ravdess


Traceback (most recent call last):
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1123, in f1_score
    return fbeta_score(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1261, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "/home/nicolas/.conda/envs/thesis/lib/python3.9/site-packages/sklear

[CV] END .................................................... total time=  28.4s


KeyboardInterrupt: 

In [None]:
results_arch

In [None]:
for arch in results_arch:
    print(arch)
    for d in results_arch[arch]:
        mean = round(np.mean(results_arch[arch][d])*100, 3)
        std = round(np.std(results_arch[arch][d])*100, 3)
        print(f'\t{d}: mean {mean}, std dev {std}')

# Hyperparameter optimization

In [142]:
chosen = archs[4]
def create_model_opt():
    new_model = tensorflow.keras.Sequential.from_config(chosen)
    new_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return new_model
    
model = KerasClassifier(build_fn=create_model_opt, verbose=0)

In [143]:
from sklearn.model_selection import GridSearchCV
# define the grid search parameters
batch_size = [16, 32, 64, 128]
epochs = [30, 50, 70]
param_grid = dict(batch_size=batch_size, epochs=epochs)

In [145]:
results_opt = {}
for d in data:
    print(f'Dataset {d}')
    
    x = data[d]["x"]
    y = data[d]["y"]
    metadata = data[d]["metadata"]
    cv = leave_one_speaker_out(metadata, n=3)
    
    grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv, verbose=3)
    grid_result = grid.fit(x, y)
    results_opt[d] = grid_result
    

Dataset emodb
Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV 1/3] END ..........batch_size=16, epochs=30;, score=0.649 total time=  18.0s
[CV 2/3] END ..........batch_size=16, epochs=30;, score=0.667 total time=  19.4s
[CV 3/3] END ..........batch_size=16, epochs=30;, score=0.631 total time=  18.5s
[CV 1/3] END ..........batch_size=16, epochs=50;, score=0.662 total time=  28.2s
[CV 2/3] END ..........batch_size=16, epochs=50;, score=0.564 total time=  30.4s
[CV 3/3] END ..........batch_size=16, epochs=50;, score=0.600 total time=  29.8s
[CV 1/3] END ..........batch_size=16, epochs=70;, score=0.662 total time=  40.0s
[CV 2/3] END ..........batch_size=16, epochs=70;, score=0.615 total time=  42.0s
[CV 3/3] END ..........batch_size=16, epochs=70;, score=0.585 total time=  40.4s
[CV 1/3] END ..........batch_size=32, epochs=30;, score=0.714 total time=  15.6s
[CV 2/3] END ..........batch_size=32, epochs=30;, score=0.641 total time=  16.6s
[CV 3/3] END ..........batch_size=

[CV 2/3] END ..........batch_size=64, epochs=30;, score=0.615 total time=  18.1s
[CV 3/3] END ..........batch_size=64, epochs=30;, score=0.600 total time=  16.2s
[CV 1/3] END ..........batch_size=64, epochs=50;, score=0.636 total time=  25.2s
[CV 2/3] END ..........batch_size=64, epochs=50;, score=0.641 total time=  26.6s
[CV 3/3] END ..........batch_size=64, epochs=50;, score=0.600 total time=  25.6s
[CV 1/3] END ..........batch_size=64, epochs=70;, score=0.649 total time=  34.8s
[CV 2/3] END ..........batch_size=64, epochs=70;, score=0.615 total time=  36.5s
[CV 3/3] END ..........batch_size=64, epochs=70;, score=0.662 total time=  35.3s
[CV 1/3] END .........batch_size=128, epochs=30;, score=0.623 total time=  16.4s
[CV 2/3] END .........batch_size=128, epochs=30;, score=0.667 total time=  15.4s
[CV 3/3] END .........batch_size=128, epochs=30;, score=0.554 total time=  15.0s


[CV 1/3] END .........batch_size=128, epochs=50;, score=0.662 total time=  23.3s
[CV 2/3] END .........batch_size=128, epochs=50;, score=0.641 total time=  24.7s
[CV 3/3] END .........batch_size=128, epochs=50;, score=0.600 total time=  23.5s
[CV 1/3] END .........batch_size=128, epochs=70;, score=0.649 total time=  31.8s
[CV 2/3] END .........batch_size=128, epochs=70;, score=0.692 total time=  35.1s
[CV 3/3] END .........batch_size=128, epochs=70;, score=0.662 total time=  32.3s
Dataset emovo
Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV 1/3] END ..........batch_size=16, epochs=30;, score=0.227 total time=  21.2s
[CV 2/3] END ..........batch_size=16, epochs=30;, score=0.311 total time=  21.1s
[CV 3/3] END ..........batch_size=16, epochs=30;, score=0.250 total time=  20.7s
[CV 1/3] END ..........batch_size=16, epochs=50;, score=0.286 total time=  33.9s
[CV 2/3] END ..........batch_size=16, epochs=50;, score=0.361 total time=  33.5s
[CV 3/3] END ..........batch_size=

[CV 1/3] END ..........batch_size=64, epochs=50;, score=0.252 total time=  29.1s
[CV 2/3] END ..........batch_size=64, epochs=50;, score=0.352 total time=  29.3s
[CV 3/3] END ..........batch_size=64, epochs=50;, score=0.258 total time=  28.6s
[CV 1/3] END ..........batch_size=64, epochs=70;, score=0.252 total time=  40.7s
[CV 2/3] END ..........batch_size=64, epochs=70;, score=0.369 total time=  40.7s
[CV 3/3] END ..........batch_size=64, epochs=70;, score=0.266 total time=  41.4s
[CV 1/3] END .........batch_size=128, epochs=30;, score=0.328 total time=  17.6s
[CV 2/3] END .........batch_size=128, epochs=30;, score=0.361 total time=  17.6s
[CV 3/3] END .........batch_size=128, epochs=30;, score=0.305 total time=  17.3s
[CV 1/3] END .........batch_size=128, epochs=50;, score=0.252 total time=  27.2s
[CV 2/3] END .........batch_size=128, epochs=50;, score=0.320 total time=  26.9s


[CV 3/3] END .........batch_size=128, epochs=50;, score=0.305 total time=  26.6s
[CV 1/3] END .........batch_size=128, epochs=70;, score=0.269 total time=  37.2s
[CV 2/3] END .........batch_size=128, epochs=70;, score=0.311 total time=  38.2s
[CV 3/3] END .........batch_size=128, epochs=70;, score=0.273 total time=  36.7s
Dataset ravdess
Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV 1/3] END ..........batch_size=16, epochs=30;, score=0.316 total time=  46.9s
[CV 2/3] END ..........batch_size=16, epochs=30;, score=0.433 total time=  52.8s
[CV 3/3] END ..........batch_size=16, epochs=30;, score=0.486 total time=  52.4s
[CV 1/3] END ..........batch_size=16, epochs=50;, score=0.354 total time= 1.3min
[CV 2/3] END ..........batch_size=16, epochs=50;, score=0.500 total time= 1.5min
[CV 3/3] END ..........batch_size=16, epochs=50;, score=0.403 total time= 1.4min
[CV 1/3] END ..........batch_size=16, epochs=70;, score=0.329 total time= 1.8min
[CV 2/3] END ..........batch_siz

[CV 1/3] END ..........batch_size=32, epochs=70;, score=0.329 total time= 1.7min
[CV 2/3] END ..........batch_size=32, epochs=70;, score=0.433 total time= 1.7min
[CV 3/3] END ..........batch_size=32, epochs=70;, score=0.444 total time= 1.7min
[CV 1/3] END ..........batch_size=64, epochs=30;, score=0.392 total time=  45.8s
[CV 2/3] END ..........batch_size=64, epochs=30;, score=0.483 total time=  47.9s
[CV 3/3] END ..........batch_size=64, epochs=30;, score=0.514 total time=  45.8s
[CV 1/3] END ..........batch_size=64, epochs=50;, score=0.291 total time= 1.2min
[CV 2/3] END ..........batch_size=64, epochs=50;, score=0.450 total time= 1.3min
[CV 3/3] END ..........batch_size=64, epochs=50;, score=0.514 total time= 1.2min
[CV 1/3] END ..........batch_size=64, epochs=70;, score=0.342 total time= 1.7min


[CV 2/3] END ..........batch_size=64, epochs=70;, score=0.533 total time= 1.7min
[CV 3/3] END ..........batch_size=64, epochs=70;, score=0.458 total time= 1.8min
[CV 1/3] END .........batch_size=128, epochs=30;, score=0.380 total time=  42.4s
[CV 2/3] END .........batch_size=128, epochs=30;, score=0.533 total time=  42.4s
[CV 3/3] END .........batch_size=128, epochs=30;, score=0.514 total time=  42.3s
[CV 1/3] END .........batch_size=128, epochs=50;, score=0.468 total time= 1.1min
[CV 2/3] END .........batch_size=128, epochs=50;, score=0.450 total time= 1.1min
[CV 3/3] END .........batch_size=128, epochs=50;, score=0.472 total time= 1.1min
[CV 1/3] END .........batch_size=128, epochs=70;, score=0.430 total time= 1.6min
[CV 2/3] END .........batch_size=128, epochs=70;, score=0.450 total time= 1.6min
[CV 3/3] END .........batch_size=128, epochs=70;, score=0.514 total time= 1.6min


In [146]:
# summarize results
for d in results_opt:
    print(d)
    grid_result = results_opt[d]
    print("\tBest: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("\t%f (%f) with: %r" % (mean, stdev, param))

emodb
	Best: 0.677411 using {'batch_size': 32, 'epochs': 30}
	0.648929 (0.014658) with: {'batch_size': 16, 'epochs': 30}
	0.608813 (0.040586) with: {'batch_size': 16, 'epochs': 50}
	0.620779 (0.031958) with: {'batch_size': 16, 'epochs': 70}
	0.677411 (0.029910) with: {'batch_size': 32, 'epochs': 30}
	0.666245 (0.028795) with: {'batch_size': 32, 'epochs': 50}
	0.654168 (0.015106) with: {'batch_size': 32, 'epochs': 70}
	0.638894 (0.044571) with: {'batch_size': 64, 'epochs': 30}
	0.625796 (0.018340) with: {'batch_size': 64, 'epochs': 50}
	0.642091 (0.019529) with: {'batch_size': 64, 'epochs': 70}
	0.614630 (0.046472) with: {'batch_size': 128, 'epochs': 30}
	0.634454 (0.025870) with: {'batch_size': 128, 'epochs': 50}
	0.667732 (0.018076) with: {'batch_size': 128, 'epochs': 70}
emovo
	Best: 0.331025 using {'batch_size': 128, 'epochs': 30}
	0.262789 (0.035696) with: {'batch_size': 16, 'epochs': 30}
	0.309207 (0.036426) with: {'batch_size': 16, 'epochs': 50}
	0.257669 (0.008121) with: {'batch