In [16]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models, activations
from tensorflow.python.keras.callbacks import ModelCheckpoint

### Define variables

In [17]:
SPECTROGRAM = False             # If True, model is trained on spectrograms. If False, model is trained on mel-spectrograms.
classes = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad"]

In [18]:
if SPECTROGRAM == True:
    path_train = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Train_6_emotions/Spectrogram"
    path_test = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Test_6_emotions/Spectrogram"
    log_directory = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/6_emotions_spec"
    filepath = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_spec.h5'
    log_directory_on_4_emotions = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/4_emotions_on_model_trained_on_6_emotions_spec"
    filepath_on_4_emotions = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_spec.h5'

else:
    path_train = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Train_6_emotions/Melspectrogram"
    path_test = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Test_6_emotions/Melspectrogram"
    log_directory = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/6_emotions_mel"
    filepath = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5'
    log_directory_on_4_emotions = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/4_emotions_on_model_trained_on_6_emotions_spec"
    filepath_on_4_emotions = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_spec.h5'



### Methods

In [19]:
def get_train_data(train_data_path):
    """
    Loads train data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_train - training samples
        data_val - validation samples
        target_train - training targets
        target_val - validation targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        emotion_folder = train_data_path + '/' + current_folder
        for fileName in os.listdir(emotion_folder):
            targets.append(classNumber)
            img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
        classNumber += 1

    
    targets_array = np.asarray(targets)
    targets = []
    img_array = np.asarray(img)
    img = []

    targets_array = tf.keras.utils.to_categorical(targets_array)
    data_train, data_val, target_train, target_val = train_test_split(img_array, targets_array, test_size=0.25, random_state=0)

    print("shapes")
    print(data_train.shape)
    print(target_train.shape)
    print(data_val.shape)
    print(target_val.shape)

    return data_train, data_val, target_train, target_val

In [20]:
def get_train_data_4_emotions(train_data_path):
    """
    Loads train data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_train - training samples
        data_val - validation samples
        target_train - training targets
        target_val - validation targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        if(current_folder != "Disgust" and current_folder != "Fear"):
            emotion_folder = train_data_path + '/' + current_folder
            for fileName in os.listdir(emotion_folder):
                targets.append(classNumber)
                img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
            classNumber += 1

    
    targets_array = np.asarray(targets)
    targets = []
    img_array = np.asarray(img)
    img = []

    targets_array = tf.keras.utils.to_categorical(targets_array)
    data_train, data_val, target_train, target_val = train_test_split(img_array, targets_array, test_size=0.25, random_state=0)

    print("shapes")
    print(data_train.shape)
    print(target_train.shape)
    print(data_val.shape)
    print(target_val.shape)

    return data_train, data_val, target_train, target_val

In [21]:
def get_test_data_4_emotions(test_data_path):
    """
    Loads test data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_test - test samples
        target_test_to_categorical - test targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        if(current_folder != "Disgust" and current_folder != "Fear"):
            emotion_folder = test_data_path + '/' + current_folder
            for fileName in os.listdir(emotion_folder):
                targets.append(classNumber)
                img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
            classNumber += 1

    data_test = np.asarray(img)
    img = []
    target_test = np.asarray(targets)
    targets = []
    target_test_to_categorical = tf.keras.utils.to_categorical(target_test)

    print(data_test.shape)
    print(target_test_to_categorical.shape)

    return data_test, target_test_to_categorical

In [22]:
def get_test_data(test_data_path):
    """
    Loads test data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_test - test samples
        target_test_to_categorical - test targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        emotion_folder = test_data_path + '/' + current_folder
        for fileName in os.listdir(emotion_folder):
            targets.append(classNumber)
            img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
        classNumber += 1

    data_test = np.asarray(img)
    img = []
    target_test = np.asarray(targets)
    targets = []
    target_test_to_categorical = tf.keras.utils.to_categorical(target_test)

    print(data_test.shape)
    print(target_test_to_categorical.shape)

    return data_test, target_test_to_categorical

In [23]:
# MODEL SHEDULER
def scheduler(epoch, lr):
    if epoch < 15:
        eta = lr
    else:
        eta = lr - 0.00001
    if lr < 0.00005:
        eta = 0.00005

    return eta

### Get train data and define model

In [24]:
data_train, data_val, target_train, target_val = get_train_data(path_train)


shapes
(6360, 231, 349, 3)
(6360, 6)
(2121, 231, 349, 3)
(2121, 6)


In [25]:

# MODEL SHEDULER
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

# TENSORBOARD
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_directory)

# SAVE MODEL 
checkpoint = ModelCheckpoint(filepath,monitor='val_loss',verbose=1,save_best_only=True,mode='min')


model = models.Sequential()
model.add(layers.Conv2D(75, (5, 5), activation='relu', input_shape=(231, 349, 3), padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Conv2D(135, (5, 5), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Dropout(0.15))
model.add(layers.Conv2D(75, (5, 5), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten()) 
model.add(layers.Dense(45, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(6, activation='softmax'))


print(model.summary())


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 231, 349, 75)      5700      
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 77, 116, 75)      0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 77, 116, 135)      253260    
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 25, 38, 135)      0         
 2D)                                                             
                                                                 
 dropout_3 (Dropout)         (None, 25, 38, 135)       0         
                                                                 
 conv2d_5 (Conv2D)           (None, 25, 38, 75)       

### Model compile and model fit to start training

In [26]:
data_train[0].shape

(231, 349, 3)

In [27]:
# MODEL COMPILE 
model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

In [28]:
# MODEL FIT 
history = model.fit(data_train, target_train,
            batch_size = 32,
            epochs=20,
            shuffle=True,
            validation_data=(data_val, target_val),
            callbacks=[tensorboard_callback, checkpoint])

Epoch 1/20
Epoch 00001: val_loss improved from inf to 1.46519, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 2/20
Epoch 00002: val_loss improved from 1.46519 to 1.31440, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 3/20
Epoch 00003: val_loss improved from 1.31440 to 1.27732, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 4/20
Epoch 00004: val_loss improved from 1.27732 to 1.20107, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 5/20
Epoch 00005: val_loss improved from 1.20107 to 1.18093, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 6/20
Epoch 00006: val_loss improved from 1.18093 to 1.17897, saving model to /home/studenci/165122

### Get test data, load weights of the best model and evaluate

In [29]:


data_test, target_test_to_categorical = get_test_data(path_test)

(2835, 231, 349, 3)
(2835, 6)


In [30]:
model.load_weights(filepath=filepath)
    
results = model.evaluate(data_test, target_test_to_categorical, batch_size=1)
print("test loss, test acc:", results)

test loss, test acc: [1.2449952363967896, 0.5742504596710205]
