In [1]:
import os # for interacting with the operating system
import numpy as np # for linear algebra operations
import pandas as pd # for data processing and reading/writing CSV files
import matplotlib.pyplot as plt # for plotting data
import seaborn as sns # for creating beautiful plots
import librosa # a library for analyzing audio and music
import librosa.display # for displaying audio data
from IPython.display import Audio # for playing audio files

# Importing necessary modules from scikit-learn library
# StandardScaler is used for feature scaling
# OneHotEncoder is used for one-hot encoding categorical variables
# confusion_matrix and classification_report are used for evaluating the performance of a classifier
# train_test_split is used for splitting the data into training and testing sets

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization, AveragePooling1D
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, AveragePooling1D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model

  "class": algorithms.Blowfish,


In [2]:
def RAVDESS():
    # 
    ravdess_dataset_path = "D:\\final_voice_model_3\\RAVDESS\\"
    # Create empty lists to store audio files for males and females
    males_audios = []
    females_audios = [] 
    
    # Loop through each actor in the RAVDESS directory
    for actor in os.listdir(ravdess_dataset_path):
        # Get a list of all audio files for each actor
        files = os.listdir(ravdess_dataset_path + actor)
        
        # Loop through each audio file
        for file in files: 
            # Split the file name to extract the emotion label
            idx = file.split('.')[0].split("-")
        
            # Assign an emotion label based on the integer value
            if idx[2] == '01':
                emotion = 'neutral'
            elif idx[2] == '02':
                emotion = 'calm'
            elif idx[2] == '03':
                emotion = 'happy'
            elif idx[2] == '04':
                emotion = 'sad'
            elif idx[2] == '05':
                emotion = 'angry'
            elif idx[2] == '06':
                emotion = 'fear'
            elif idx[2] == '07':
                emotion = 'disgust'
            elif idx[2] == '08':
                emotion = 'surprise'
            else:
                emotion = 'unknown'
            
            
            # Check if the audio file is for a male or female actor
            if int(idx[6])%2 == 0:
                # This is a female audio file
                path = (ravdess_dataset_path + actor + '\\' + file)
                females_audios.append([emotion, path]) 
            else:
                # This is a male audio file
                path = (ravdess_dataset_path +  actor + '\\' + file)
                males_audios.append([emotion, path])
                
    # Create dataframes for male and female audio files
    ravdess_females_data_frame = pd.DataFrame(females_audios)
    ravdess_females_data_frame.columns = ['labels', 'path']

    ravdess_males_data_frame = pd.DataFrame(males_audios)
    ravdess_males_data_frame.columns = ['labels', 'path']

    # return the dataframes
    return ravdess_females_data_frame, ravdess_males_data_frame

In [3]:
def SAVEE():
    # Initialize list for emotions and file paths
    list_of_emotions = []
    list_of_file_paths = []
    
    #
    savee_dataset_path = "D:\\final_voice_model_3\\SAVEE\\"
    
    # Loop through each file in the directory
    for file in os.listdir(savee_dataset_path):
        if file[-8:-6]=='_a':
            list_of_emotions.append('angry')
        elif file[-8:-6]=='_d':
            list_of_emotions.append('disgust')
        elif file[-8:-6]=='_f':
            list_of_emotions.append('fear')
        elif file[-8:-6]=='_h':
            list_of_emotions.append('happy')
        elif file[-8:-6]=='_n':
            list_of_emotions.append('neutral')
        elif file[-8:-6]=='sa':
            list_of_emotions.append('sad')
        elif file[-8:-6]=='su':
            list_of_emotions.append('surprise')
        else:
            list_of_emotions.append('unknown') 
    
        # Add the full path of the file to the path_list
        list_of_file_paths.append(savee_dataset_path  + file)
        
    # Create a dataframe with the labels and file paths
    SAVEE_data_frame = pd.DataFrame(list_of_emotions, columns=['labels']) # the emotions list will be used as the data for the 'labels' column
    SAVEE_data_frame = pd.concat([SAVEE_data_frame, pd.DataFrame(list_of_file_paths, columns=['path'])], axis=1) # the list_of_file_paths will be used as the data for the 'path' column

    # return SAVEE dataframe
    return SAVEE_data_frame

In [4]:
def TESS():
    # Initialize list for emotions and file paths
    list_of_emotions = []
    list_of_file_paths = []
    
    #
    tess_dataset_path = "D:\\final_voice_model_3\\TESS\\"
    
    # Loop through each folder in the directory
    for folder in os.listdir(tess_dataset_path):
        # Get the list of files in the subdirectory
        files = os.listdir(tess_dataset_path + folder)   
        for file in files:
            # Check the subdirectory name to determine the emotion label
            if folder == 'OAF_angry' or folder == 'YAF_angry':
                list_of_emotions.append('angry')
            elif folder == 'OAF_disgust' or folder == 'YAF_disgust':
                list_of_emotions.append('disgust')
            elif folder == 'OAF_Fear' or folder == 'YAF_fear':
                list_of_emotions.append('fear')
            elif folder == 'OAF_happy' or folder == 'YAF_happy':
                list_of_emotions.append('happy')
            elif folder == 'OAF_neutral' or folder == 'YAF_neutral':
                list_of_emotions.append('neutral')                                
            elif folder == 'OAF_Pleasant_surprise' or folder == 'YAF_pleasant_surprised':
                list_of_emotions.append('surprise')               
            elif folder == 'OAF_Sad' or folder == 'YAF_sad':
                list_of_emotions.append('sad')
            else:
                list_of_emotions.append('unknown')
    
            
            # Add the full path of the file to the path_list
            list_of_file_paths.append(tess_dataset_path + folder + "\\" + file)
            
    # Create a data frame to store the emotion labels and file paths
    TESS_data_frame = pd.DataFrame(list_of_emotions, columns = ['labels'])
    TESS_data_frame = pd.concat([TESS_data_frame,pd.DataFrame(list_of_file_paths, columns = ['path'])],axis=1)

    # return the TESS dataframe
    return TESS_data_frame

In [5]:
def CREMA():
    # Create lists to store the file paths and their corresponding labels for females and males
    female_number = [1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,1021,1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,
          1052,1053,1054,1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,1076,1078,1079,1082,1084,1089,1091]
    males_audios = []
    females_audios = []
    
    # 
    crema_dataset = "D:\\final_voice_model_3\\CREMA\\"
    
    # Loop through each file in the directory
    for file in os.listdir(crema_dataset): 
        # Split the file name on the "_" character to get the parts
        idx = file.split('_')   
    
        # Check the emotion label based on the part of the file name
        if idx[2] == 'SAD':
            emotion = 'sad'
        elif idx[2] == 'ANG':
            emotion = 'angry'
        elif idx[2] == 'DIS':
            emotion = 'disgust'
        elif idx[2] == 'FEA':
            emotion = 'fear'
        elif idx[2] == 'HAP':
            emotion = 'happy'
        elif idx[2] == 'NEU':
            emotion = 'neutral'  
        else:
            emotion = 'unknown'
        
        # Check if the file is for a female or male speaker
        if int(idx[0]) in female_number:
            # Construct the full file path
            path = (crema_dataset + file)
            # Add the emotion and file path to the list of female files
            females_audios.append([emotion, path]) 
        else:
            # Construct the full file path
            path = (crema_dataset + file)
            # Add the emotion and file path to the list of male files
            males_audios.append([emotion, path])
            
    # Create a DataFrame for the female files
    crema_females_data_frame = pd.DataFrame(females_audios)
    # Set the column names for the DataFrame
    crema_females_data_frame.columns = ['labels', 'path']

    # Create a DataFrame for the male files
    crema_males_data_frame = pd.DataFrame(males_audios)
    # Set the column names for the DataFrame
    crema_males_data_frame.columns = ['labels', 'path']
    
    # return the CREMA dataframe
    return crema_females_data_frame, crema_males_data_frame

In [6]:
def integrate_datasets():
    SAVEE_data_frame = SAVEE()
    TESS_data_frame = TESS()
    ravdess_females_data_frame, ravdess_males_data_frame = RAVDESS()
    crema_females_data_frame, crema_males_data_frame = CREMA()
    
    # Display the first few rows of the each DataFrames
    print("SAVEE Dataset\n")
    print(SAVEE_data_frame.head(10),'\n')
    
    print("TESS Dataset\n")
    print(TESS_data_frame.head(10),'\n')
    
    print("RAVDESS Dataset - females dataset\n")
    print(ravdess_females_data_frame.head(10),'\n')
    print("RAVDESS Dataset - males dataset\n")
    print(ravdess_males_data_frame.head(10),'\n')
    
    print("CREMA Dataset - females dataset\n")
    print(crema_females_data_frame.head(10),'\n')
    print("CREMA Dataset - males dataset\n")
    print(crema_males_data_frame.head(10),'\n')
    
    # Merge the three dataframes into a single dataframe for males
    Males = pd.concat([SAVEE_data_frame, ravdess_males_data_frame, crema_males_data_frame], axis = 0)
    Males.to_csv("D:\\final_voice_model_3\\males_emotions_data_frame.csv", index = False)

    Females = pd.concat([TESS_data_frame, ravdess_females_data_frame, crema_females_data_frame], axis = 0)
    Females.to_csv("D:\\final_voice_model_3\\females_emotions_data_frame.csv", index = False)
    
    return Males,Females