In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.utils import shuffle, class_weight
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import librosa


In [2]:
DATASET_DIR = "Datasets/EmoDB"
SPECTROGRAM_DIR = "Spectrograms/EmoDB_Separate_Speakers_v2/male/Log"
TF_RECORDS_DIR = "TFRecords/EmoDB_Separate_Speakers_v2_male_VGG16"
TF_RECORDS_NAME = "EmoDB_Separate_Speakers_v2_male_log_VGG16.tfrecords"
MODEL_DIR = "Models"
MODEL_NAME = "EmoDB_Separate_Speakers_v2_male_log_VGG16.h5"
NUM_CLASSES = 7
SAMPLE_RATE = 16000
BATCH_SIZE = 32
EPOCHS = 50
RANDOM_SEED = 42

In [3]:
def create_dataframe_emodb():
    EMOTION_DICT_EMODB = {'W': 'anger', 'L': 'boredom', 'E': 'disgust', 'A': 'fear', 'F': 'happiness', 'T': 'sadness',
                          'N': 'neutral'}
    if DATASET_DIR != "Datasets/EmoDB" and DATASET_DIR != "Datasets/Converted Datasets/EmoDB":
        raise Exception(
            "DATASET_DIR must be set to 'Datasets/EmoDB' or 'Datasets/Converted Datasets/EmoDB' for EmoDB dataset")
    file_person, file_gender, file_emotion, file_path = [], [], [], []
    file_list = os.listdir(DATASET_DIR)
    for file in file_list:
        person = int(file[0:2])
        gender = 'male' if person in [3, 10, 11, 12, 15] else 'female'
        emotion = EMOTION_DICT_EMODB[file[5]]
        file_person.append(person)
        file_gender.append(gender)
        file_emotion.append(emotion)
        file_path.append(os.path.join(DATASET_DIR, file))
    file_dict = {'person': file_person, 'gender': file_gender, 'emotion': file_emotion, 'path': file_path}
    emodb_df = pd.DataFrame.from_dict(file_dict)
    return emodb_df

In [4]:
def preprocess_dataset(ser_df):
    audio_block_list = []
    emotion_list = []
    for row in tqdm(ser_df.itertuples(), desc=f"Preprocessing audio files dataset", total=len(ser_df)):
        data, _ = librosa.load(row.path, sr=SAMPLE_RATE)
        if data.shape[0] < SAMPLE_RATE:
            data = np.pad(data, (0, SAMPLE_RATE - data.shape[0]), 'constant')
        frames = librosa.util.frame(data, frame_length=SAMPLE_RATE, hop_length=int(SAMPLE_RATE/100)).T
        for frame in frames:
            audio_block_list.append(frame)
            emotion_list.append(row.emotion)
    audio_block_list = np.array(audio_block_list)
    emotion_list = np.array(emotion_list)
    ohe = OneHotEncoder(categories='auto', sparse=False)
    emotion_list = ohe.fit_transform(emotion_list[:, np.newaxis])
    return audio_block_list, emotion_list

In [5]:
ser_df = create_dataframe_emodb()

In [6]:
audio_block_list, emotion_list = preprocess_dataset(ser_df)

Preprocessing audio files dataset: 100%|██████████| 535/535 [00:07<00:00, 67.00it/s] 


In [7]:
print(f"Number of spectrograms: {len(audio_block_list)}")

Number of spectrograms: 95489


In [8]:
ser_df

Unnamed: 0,person,gender,emotion,path
0,3,male,happiness,Datasets/EmoDB\03a01Fa.wav
1,3,male,neutral,Datasets/EmoDB\03a01Nc.wav
2,3,male,anger,Datasets/EmoDB\03a01Wa.wav
3,3,male,happiness,Datasets/EmoDB\03a02Fc.wav
4,3,male,neutral,Datasets/EmoDB\03a02Nc.wav
...,...,...,...,...
530,16,female,boredom,Datasets/EmoDB\16b10Lb.wav
531,16,female,sadness,Datasets/EmoDB\16b10Tb.wav
532,16,female,sadness,Datasets/EmoDB\16b10Td.wav
533,16,female,anger,Datasets/EmoDB\16b10Wa.wav


In [9]:
ser_df["gender"].value_counts()

female    302
male      233
Name: gender, dtype: int64

In [10]:
# get the number of files per person, sort by person
ser_df['person'].value_counts().sort_index()

3     49
8     58
9     43
10    38
11    55
12    35
13    61
14    69
15    56
16    71
Name: person, dtype: int64

In [11]:
ser_df['emotion'].value_counts().sort_index()

anger        127
boredom       81
disgust       46
fear          69
happiness     71
neutral       79
sadness       62
Name: emotion, dtype: int64

In [12]:
count_by_emotion_gender = ser_df.groupby(['emotion', 'gender']).size()
sorted_counts = count_by_emotion_gender.sort_index()
sorted_counts

emotion    gender
anger      female    67
           male      60
boredom    female    46
           male      35
disgust    female    35
           male      11
fear       female    33
           male      36
happiness  female    44
           male      27
neutral    female    40
           male      39
sadness    female    37
           male      25
dtype: int64

In [13]:
for person in ser_df['person'].unique():
    print(f"\nPerson {person}")
    print(ser_df[ser_df['person'] == person]['emotion'].value_counts().sort_index())


Person 3
anger        14
boredom       5
disgust       1
fear          4
happiness     7
neutral      11
sadness       7
Name: emotion, dtype: int64

Person 8
anger        12
boredom      10
fear          6
happiness    11
neutral      10
sadness       9
Name: emotion, dtype: int64

Person 9
anger        13
boredom       4
disgust       8
fear          1
happiness     4
neutral       9
sadness       4
Name: emotion, dtype: int64

Person 10
anger        10
boredom       8
disgust       1
fear          8
happiness     4
neutral       4
sadness       3
Name: emotion, dtype: int64

Person 11
anger        11
boredom       8
disgust       2
fear         10
happiness     8
neutral       9
sadness       7
Name: emotion, dtype: int64

Person 12
anger        12
boredom       5
disgust       2
fear          6
happiness     2
neutral       4
sadness       4
Name: emotion, dtype: int64

Person 13
anger        12
boredom      10
disgust       8
fear          7
happiness    10
neutral       9
sadnes