In [44]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models, activations
from tensorflow.python.keras.callbacks import ModelCheckpoint
import tensorflow_model_optimization as tfmot
import tempfile
import zipfile

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

### Define variables

In [11]:
SPECTROGRAM = False             # If True, model is trained on spectrograms. If False, model is trained on mel-spectrograms.
classes = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad"]

In [12]:
if SPECTROGRAM == True:
    path_train = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Train_6_emotions/Spectrogram"
    path_test = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Test_6_emotions/Spectrogram"
    log_directory = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/6_emotions_spec"
    filepath = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_spec.h5'
    log_directory_on_4_emotions = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/4_emotions_on_model_trained_on_6_emotions_spec"
    filepath_on_4_emotions = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_spec.h5'

else:
    path_train = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Train_6_emotions/Melspectrogram"
    path_test = "/Speech_emotion_recognition/New_Big_dataset_Spetember2022/Test_6_emotions/Melspectrogram"
    log_directory = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/6_emotions_mel"
    filepath = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5'
    log_directory_on_4_emotions = "/Speech_emotion_recognition/Testy_do_mgr/logs/TensorBoard/4_emotions_on_model_trained_on_6_emotions_mel"
    filepath_on_4_emotions = '/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_mel.h5'



### Methods

In [13]:
def get_train_data(train_data_path):
    """
    Loads train data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_train - training samples
        data_val - validation samples
        target_train - training targets
        target_val - validation targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        emotion_folder = train_data_path + '/' + current_folder
        for fileName in os.listdir(emotion_folder):
            targets.append(classNumber)
            img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
        classNumber += 1

    
    targets_array = np.asarray(targets)
    targets = []
    img_array = np.asarray(img)
    img = []

    targets_array = tf.keras.utils.to_categorical(targets_array)
    data_train, data_val, target_train, target_val = train_test_split(img_array, targets_array, test_size=0.25, random_state=0)

    print("shapes")
    print(data_train.shape)
    print(target_train.shape)
    print(data_val.shape)
    print(target_val.shape)

    return data_train, data_val, target_train, target_val

In [14]:
def get_train_data_4_emotions(train_data_path):
    """
    Loads train data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_train - training samples
        data_val - validation samples
        target_train - training targets
        target_val - validation targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        if(current_folder != "Disgust" and current_folder != "Fear"):
            emotion_folder = train_data_path + '/' + current_folder
            for fileName in os.listdir(emotion_folder):
                targets.append(classNumber)
                img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
            classNumber += 1

    
    targets_array = np.asarray(targets)
    targets = []
    img_array = np.asarray(img)
    img = []

    targets_array = tf.keras.utils.to_categorical(targets_array)
    data_train, data_val, target_train, target_val = train_test_split(img_array, targets_array, test_size=0.25, random_state=0)

    print("shapes")
    print(data_train.shape)
    print(target_train.shape)
    print(data_val.shape)
    print(target_val.shape)

    return data_train, data_val, target_train, target_val

In [15]:
def get_test_data_4_emotions(test_data_path):
    """
    Loads test data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_test - test samples
        target_test_to_categorical - test targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        if(current_folder != "Disgust" and current_folder != "Fear"):
            emotion_folder = test_data_path + '/' + current_folder
            for fileName in os.listdir(emotion_folder):
                targets.append(classNumber)
                img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
            classNumber += 1

    data_test = np.asarray(img)
    img = []
    target_test = np.asarray(targets)
    targets = []
    target_test_to_categorical = tf.keras.utils.to_categorical(target_test)

    print(data_test.shape)
    print(target_test_to_categorical.shape)

    return data_test, target_test_to_categorical

In [16]:
def get_test_data(test_data_path):
    """
    Loads test data from all datasets, for IEMOCAP data from all sessions except session 2

    Returns:
        data_test - test samples
        target_test_to_categorical - test targets
        
    """
    classNumber = 0
    targets = []
    img = []

    for current_folder in classes:
        emotion_folder = test_data_path + '/' + current_folder
        for fileName in os.listdir(emotion_folder):
            targets.append(classNumber)
            img.append(np.array(Image.open(emotion_folder + '/' + fileName).convert('RGB'))/255)
        classNumber += 1

    data_test = np.asarray(img)
    img = []
    target_test = np.asarray(targets)
    targets = []
    target_test_to_categorical = tf.keras.utils.to_categorical(target_test)

    print(data_test.shape)
    print(target_test_to_categorical.shape)

    return data_test, target_test_to_categorical

In [17]:
# MODEL SHEDULER
def scheduler(epoch, lr):
    if epoch < 15:
        eta = lr
    else:
        eta = lr - 0.00001
    if lr < 0.00005:
        eta = 0.00005

    return eta

### Get train data and define model

In [18]:
data_train, data_val, target_train, target_val = get_train_data(path_train)


shapes
(6047, 231, 349, 3)
(6047, 6)
(2016, 231, 349, 3)
(2016, 6)


In [19]:

# MODEL SHEDULER
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

# TENSORBOARD
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_directory)

# SAVE MODEL 
checkpoint = ModelCheckpoint(filepath,monitor='val_loss',verbose=1,save_best_only=True,mode='min')


model = models.Sequential()
model.add(layers.Conv2D(75, (5, 5), activation='relu', input_shape=(231, 349, 3), padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Conv2D(135, (5, 5), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Dropout(0.15))
model.add(layers.Conv2D(75, (5, 5), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((3, 3)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten()) 
model.add(layers.Dense(45, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(6, activation='softmax'))


print(model.summary())


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 231, 349, 75)      5700      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 77, 116, 75)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 77, 116, 135)      253260    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 25, 38, 135)      0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 25, 38, 135)       0         
                                                                 
 conv2d_2 (Conv2D)           (None, 25, 38, 75)        2

2022-09-08 16:47:50.361622: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-08 16:47:51.077200: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22839 MB memory:  -> device: 0, name: TITAN RTX, pci bus id: 0000:09:00.0, compute capability: 7.5
2022-09-08 16:47:51.078650: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22576 MB memory:  -> device: 1, name: TITAN RTX, pci bus id: 0000:41:00.0, compute capability: 7.5


### Model compile and model fit to start training

In [20]:
data_train[0].shape

(231, 349, 3)

In [21]:
# MODEL COMPILE 
model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

In [22]:
# MODEL FIT 
history = model.fit(data_train, target_train,
            batch_size = 32,
            epochs=20,
            shuffle=True,
            validation_data=(data_val, target_val),
            callbacks=[tensorboard_callback, checkpoint])

Epoch 1/20


2022-09-08 16:47:58.819362: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8101


Epoch 00001: val_loss improved from inf to 1.51600, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 2/20
Epoch 00002: val_loss improved from 1.51600 to 1.39308, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 3/20
Epoch 00003: val_loss improved from 1.39308 to 1.34305, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 4/20
Epoch 00004: val_loss improved from 1.34305 to 1.30312, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 5/20
Epoch 00005: val_loss improved from 1.30312 to 1.28046, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/6_emotions_mel.h5
Epoch 6/20
Epoch 00006: val_loss did not improve from 1.28046
Epoch 7/20
Epoch 00007: val_loss improved from 1.2804

### Get test data, load weights of the best model and evaluate

In [23]:


data_test, target_test_to_categorical = get_test_data(path_test)

(2725, 231, 349, 3)
(2725, 6)


In [24]:
model.load_weights(filepath=filepath)
    
results = model.evaluate(data_test, target_test_to_categorical, batch_size=1)
print("test loss, test acc:", results)

test loss, test acc: [1.36733877658844, 0.4858715534210205]


### Remove last layer of model and add new one suited for 4-emotions classification

In [25]:


model_4_emotions = models.Sequential()
for layer in model.layers[:-1]: # go through until last layer
    model_4_emotions.add(layer)
model_4_emotions.add(layers.Dense(4, activation='softmax'))
model_4_emotions.summary()
#model_4_emotions.compile(optimizer='adam', loss='categorical_crossentropy')


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 231, 349, 75)      5700      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 77, 116, 75)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 77, 116, 135)      253260    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 25, 38, 135)      0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 25, 38, 135)       0         
                                                                 
 conv2d_2 (Conv2D)           (None, 25, 38, 75)       

### Drop 2 emotions for optimalization purpose

In [26]:
data_train, data_val, target_train, target_val = get_train_data_4_emotions(path_train)

shapes
(3995, 231, 349, 3)
(3995, 4)
(1332, 231, 349, 3)
(1332, 4)


In [27]:
# MODEL COMPILE 
model_4_emotions.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])


In [28]:
# TENSORBOARD
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_directory_on_4_emotions)

# SAVE MODEL 
checkpoint = ModelCheckpoint(filepath_on_4_emotions,monitor='val_loss',verbose=1,save_best_only=True,mode='min')

# MODEL FIT 
history = model_4_emotions.fit(data_train, target_train,
            batch_size = 32,
            epochs=10,
            shuffle=True,
            validation_data=(data_val, target_val),
            callbacks=[tensorboard_callback, checkpoint])

Epoch 1/10
Epoch 00001: val_loss improved from inf to 1.32393, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_mel.h5
Epoch 2/10
Epoch 00002: val_loss improved from 1.32393 to 0.83835, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_mel.h5
Epoch 3/10
Epoch 00003: val_loss improved from 0.83835 to 0.72996, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_mel.h5
Epoch 4/10
Epoch 00004: val_loss improved from 0.72996 to 0.63784, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModels/4_emotions_on_model_trained_on_6_emotions_mel.h5
Epoch 5/10
Epoch 00005: val_loss improved from 0.63784 to 0.61329, saving model to /home/studenci/165122/Speech_emotion_recognition/Testy_do_mgr/logs/SavedModel

In [29]:
data_test, target_test_to_categorical = get_test_data_4_emotions(path_test)


(1808, 231, 349, 3)
(1808, 4)


In [30]:
model_4_emotions.load_weights(filepath=filepath_on_4_emotions)
    
results = model_4_emotions.evaluate(data_test, target_test_to_categorical, batch_size=1)
print("test loss, test acc:", results)

test loss, test acc: [0.6457031965255737, 0.7516592741012573]


In [31]:
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.5, begin_step=0, frequency=100)
  }

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep()
]

pruned_model = prune_low_magnitude(model_4_emotions, **pruning_params)

# Odpowiednio mała wartość `lr` dla etapu dotrenowania
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)

pruned_model.compile(
            loss='categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy'])

In [32]:
# Dotrenowanie modelu
pruned_model.fit(
  data_train,
  target_train,
  epochs=10,
  validation_data=(data_val, target_val),
  callbacks=callbacks)

stripped_pruned_model = tfmot.sparsity.keras.strip_pruning(pruned_model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [33]:
stripped_pruned_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d   (None, 231, 349, 75)     11327     
 (PruneLowMagnitude)                                             
                                                                 
 prune_low_magnitude_max_poo  (None, 77, 116, 75)      1         
 ling2d (PruneLowMagnitude)                                      
                                                                 
 prune_low_magnitude_conv2d_  (None, 77, 116, 135)     506387    
 1 (PruneLowMagnitude)                                           
                                                                 
 prune_low_magnitude_max_poo  (None, 25, 38, 135)      1         
 ling2d_1 (PruneLowMagnitude                                     
 )                                                               
                                                      

In [34]:
def print_model_weights_sparsity(model):
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Wrapper):
            weights = layer.trainable_weights
        else:
            weights = layer.weights
        for weight in weights:
            if "kernel" not in weight.name or "centroid" in weight.name:
                continue
            weight_size = weight.numpy().size
            zero_num = np.count_nonzero(weight == 0)
            print(
                f"{weight.name}: {zero_num/weight_size:.2%} sparsity ",
                f"({zero_num}/{weight_size})",
            )

def print_model_weight_clusters(model):
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Wrapper):
            weights = layer.trainable_weights
        else:
            weights = layer.weights
        for weight in weights:
            # ignore auxiliary quantization weights
            if "quantize_layer" in weight.name:
                continue
            if "kernel" in weight.name:
                unique_count = len(np.unique(weight))
                print(
                    f"{layer.name}/{weight.name}: {unique_count} clusters "
                )

In [35]:
print("Model sparsity:\n")
print_model_weights_sparsity(stripped_pruned_model)

print("\nModel clusters:\n")
print_model_weight_clusters(stripped_pruned_model)

Model sparsity:

conv2d/kernel:0: 50.01% sparsity  (2813/5625)
conv2d_1/kernel:0: 50.00% sparsity  (126563/253125)
conv2d_2/kernel:0: 50.00% sparsity  (126563/253125)
dense/kernel:0: 50.00% sparsity  (162000/324000)
dense_2/kernel:0: 50.00% sparsity  (90/180)

Model clusters:

conv2d/conv2d/kernel:0: 2813 clusters 
conv2d_1/conv2d_1/kernel:0: 126194 clusters 
conv2d_2/conv2d_2/kernel:0: 126321 clusters 
dense/dense/kernel:0: 161536 clusters 
dense_2/dense_2/kernel:0: 91 clusters 


In [36]:
from tensorflow_model_optimization.python.core.clustering.keras.experimental import (
    cluster,
)

cluster_weights = tfmot.clustering.keras.cluster_weights
CentroidInitialization = tfmot.clustering.keras.CentroidInitialization

cluster_weights = cluster.cluster_weights

clustering_params = {
  'number_of_clusters': 8,
  'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS,
  'preserve_sparsity': True
}

sparsity_clustered_model = cluster_weights(stripped_pruned_model, **clustering_params)

sparsity_clustered_model.compile(
            loss='categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy'])



In [37]:
print('Uczenie modelu z współdzieleniem wag i zachowaniem przerzedzenia:')
sparsity_clustered_model.fit(
  data_train,
  target_train,
  epochs=10,
  validation_data=(data_val, target_val))

Uczenie modelu z współdzieleniem wag i zachowaniem przerzedzenia:
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f3fcbf59400>

In [38]:
sparsity_clustered_model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cluster_conv2d (ClusterWeig  (None, 231, 349, 75)     11333     
 hts)                                                            
                                                                 
 cluster_max_pooling2d (Clus  (None, 77, 116, 75)      0         
 terWeights)                                                     
                                                                 
 cluster_conv2d_1 (ClusterWe  (None, 77, 116, 135)     506393    
 ights)                                                          
                                                                 
 cluster_max_pooling2d_1 (Cl  (None, 25, 38, 135)      0         
 usterWeights)                                                   
                                                                 
 cluster_dropout (ClusterWei  (None, 25, 38, 135)     

In [39]:
sparsity_clustered_model = tfmot.clustering.keras.strip_clustering(sparsity_clustered_model)

print("Model sparsity:\n")
print_model_weights_sparsity(sparsity_clustered_model)

print("\nModel clusters:\n")
print_model_weight_clusters(sparsity_clustered_model)

Model sparsity:

kernel:0: 50.03% sparsity  (2814/5625)
kernel:0: 50.05% sparsity  (126694/253125)
kernel:0: 50.14% sparsity  (126921/253125)
kernel:0: 51.40% sparsity  (166547/324000)
kernel:0: 50.00% sparsity  (90/180)

Model clusters:

conv2d/kernel:0: 8 clusters 
conv2d_1/kernel:0: 8 clusters 
conv2d_2/kernel:0: 8 clusters 
dense/kernel:0: 8 clusters 
dense_2/kernel:0: 8 clusters 


In [42]:
sparsity_clustered_model.compile(loss='categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy'])

In [43]:
_, pruned_model_accuracy = sparsity_clustered_model.evaluate(data_test, target_test_to_categorical, verbose=1)
print('Dokładność modelu przerzedzonego:', pruned_model_accuracy)

Dokładność modelu przerzedzonego: 0.7549778819084167
