In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models, activations
from tensorflow.python.keras.callbacks import ModelCheckpoint
import torch

  from .autonotebook import tqdm as notebook_tqdm


### Choose GPU

In [2]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)

Name: /physical_device:GPU:0   Type: GPU
Name: /physical_device:GPU:1   Type: GPU


In [3]:
# Choose GPU
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

### Define variables

In [4]:
SPECTROGRAM = False             # If True, model is trained on spectrograms. If False, model is trained on mel-spectrograms.
classes = ["Anger", "Happy", "Neutral", "Sad"]

In [5]:
if SPECTROGRAM == True:
    path_train = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Train_4_emotions/Spectrogram"
    path_test = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Test_4_emotions/Spectrogram"
    log_directory = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/4_emotions_spec"
    filepath= '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_spec.h5'

else:
    path_train = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Train_4_emotions/Melspectrogram"
    path_test = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Test_4_emotions/Melspectrogram"
    log_directory = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/4_emotions_mel"
    filepath= '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_mel.h5'


### Methods

In [6]:
def get_train_data(train_data_path):
    """
    Loads train data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_train - training samples
        data_val - validation samples
        target_train - training targets
        target_val - validation targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        emotion_folder = train_data_path + '/' + current_folder
        for fileName in os.listdir(emotion_folder):
            targets.append(classNumber)
            img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
        classNumber += 1

    
    targets_array = np.asarray(targets)
    targets = []
    img_array = np.asarray(img)
    img = []

    targets_array = tf.keras.utils.to_categorical(targets_array)
    data_train, data_val, target_train, target_val = train_test_split(img_array, targets_array, test_size=0.25, random_state=0)

    print("shapes")
    print(data_train.shape)
    print(target_train.shape)
    print(data_val.shape)
    print(target_val.shape)

    return data_train, data_val, target_train, target_val

In [7]:
def get_test_data(test_data_path):
    """
    Loads test data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_test - test samples
        target_test_to_categorical - test targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        emotion_folder = test_data_path + '/' + current_folder
        for fileName in os.listdir(emotion_folder):
            targets.append(classNumber)
            img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
        classNumber += 1

    data_test = np.asarray(img)
    img = []
    target_test = np.asarray(targets)
    targets = []
    target_test_to_categorical = tf.keras.utils.to_categorical(target_test)

    print(data_test.shape)
    print(target_test_to_categorical.shape)

    return data_test, target_test_to_categorical

In [8]:
# MODEL SHEDULER
def scheduler(epoch, lr):
    if epoch < 15:
        eta = lr
    else:
        eta = lr - 0.00001
    if lr < 0.00005:
        eta = 0.00005

    return eta

### Get train data and define model

In [9]:
data_train, data_val, target_train, target_val = get_train_data(path_train)

# MODEL SHEDULER
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

# TENSORBOARD
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_directory)

# SAVE MODEL 
checkpoint = ModelCheckpoint(filepath,monitor='val_loss',verbose=1,save_best_only=True,mode='min')


model = models.Sequential()
model.add(layers.Conv2D(75, (5, 5), activation='relu', input_shape=(231, 349, 3), padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Conv2D(135, (5, 5), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Dropout(0.15))
model.add(layers.Conv2D(75, (5, 5), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten()) 
model.add(layers.Dense(45, activation='relu'))
model.add(layers.Dropout(0.2))
# model.add(layers.Dense(40, activation='relu'))
# model.add(layers.Dropout(0.3))
model.add(layers.Dense(4, activation='softmax'))


model.summary()


shapes
(7569, 231, 349, 3)
(7569, 4)
(2524, 231, 349, 3)
(2524, 4)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 231, 349, 75)      5700      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 77, 116, 75)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 77, 116, 135)      253260    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 25, 38, 135)      0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 25, 38, 135)       0         
                                                       

2022-09-30 08:13:07.671682: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-30 08:13:08.630822: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22839 MB memory:  -> device: 0, name: TITAN RTX, pci bus id: 0000:09:00.0, compute capability: 7.5
2022-09-30 08:13:08.635186: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22767 MB memory:  -> device: 1, name: TITAN RTX, pci bus id: 0000:41:00.0, compute capability: 7.5


### Model compile and model fit to start training

In [10]:
# MODEL COMPILE 
model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

In [11]:
# MODEL FIT 
history = model.fit(data_train, target_train,
            epochs=20,
            shuffle=True,
            validation_data=(data_val, target_val),
            callbacks=[tensorboard_callback, checkpoint])

Epoch 1/20


2022-09-30 08:13:16.664317: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8101


Epoch 00001: val_loss improved from inf to 0.99133, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_mel.h5
Epoch 2/20
Epoch 00002: val_loss improved from 0.99133 to 0.92415, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_mel.h5
Epoch 3/20
Epoch 00003: val_loss improved from 0.92415 to 0.90953, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_mel.h5
Epoch 4/20
Epoch 00004: val_loss improved from 0.90953 to 0.90244, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_mel.h5
Epoch 5/20
Epoch 00005: val_loss improved from 0.90244 to 0.88030, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_mel.h5
Epoch 6/20
Epoch 00006: val_loss improved from 0.88030 to 0.85212, saving model to /home/studenci/165122/Speech_emo

### Get test data, load weights of the best model and evaluate

In [12]:
data_train = []
data_val = []
target_train = []
target_val = []

targets = []
img = []

data_test, target_test_to_categorical = get_test_data(path_test)

(2909, 231, 349, 3)
(2909, 4)


In [13]:
model.load_weights(filepath=filepath)
    
results = model.evaluate(data_test, target_test_to_categorical, batch_size=1)
print("test loss, test acc:", results)

test loss, test acc: [1.1148383617401123, 0.5888621807098389]
