In [2]:
import os
import pathlib

import tensorflow as tf

In [3]:
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()

In [4]:
classname = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fear', 'disgust', 'surprise']
# create folder on data folder
for i in range(len(classname)):
    os.mkdir('data/'+classname[i])

In [5]:
# CREMA-D 
# Path: Dataset\CREMA-D\AudioWAV
# copy file to data folder

data_folder = os.path.join(os.getcwd(), 'CREMA-D', 'AudioWAV')
numOfSongsPath = len(os.listdir(data_folder))
num = 0
printProgressBar(0, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)
for filename in os.listdir(data_folder):
    num += 1
    label = filename.split('_')[2]
    if label == 'SAD': label = 'sad'
    elif label == 'ANG': label = 'angry'
    elif label == 'DIS': label = 'disgust'
    elif label == 'FEA': label = 'fear'
    elif label == 'HAP': label = 'happy'
    elif label == 'NEU': label = 'neutral'
    elif label == 'CAL': label = 'calm'
    elif label == 'SUR': label = 'surprise'
    
    dst_folder = os.path.join('data', label)
    src = os.path.join(data_folder, filename)
    dst = os.path.join(dst_folder, filename)
    os.rename(src, dst)
    printProgressBar(num, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)



Progress (7442/7442): |██████████████████████████████████████████████████| 100.0% Complete


In [6]:
# RAVDESS Emotional speech audio

# Path: Dataset\RAVDESS\Audio_Speech_Actors_01-24
# copy file to data folder
data_folder = os.path.join(os.getcwd(), 'RAVDESS Emotional speech audio', 'Audio_Speech_Actors_01-24')
all_filenames = []
all_paths = []
all_labels = []
for folder in os.listdir(data_folder):
    if folder == '.DS_Store': continue
    for filename in os.listdir(os.path.join(data_folder, folder)):
        if filename == '.DS_Store': continue
        all_paths.append(os.path.join(data_folder, folder, filename))
        all_filenames.append(filename)
        label = filename.split('-')[2]
        if label == '01': label = 'neutral'
        elif label == '02': label = 'calm'
        elif label == '03': label = 'happy'
        elif label == '04': label = 'sad'
        elif label == '05': label = 'angry'
        elif label == '06': label = 'fear'
        elif label == '07': label = 'disgust'
        elif label == '08': label = 'surprise'
        all_labels.append(label)

numOfSongsPath = len(all_paths)
num = 0
printProgressBar(0, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)
for i in range(len(all_paths)):
    num += 1
    src = all_paths[i]
    dst_folder = os.path.join('data', all_labels[i])
    dst = os.path.join(dst_folder, all_filenames[i])
    os.rename(src, dst)
    printProgressBar(num, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)

Progress (1440/1440): |██████████████████████████████████████████████████| 100.0% Complete


In [7]:
# Surrey Audio-Visuel Expressed Emotion (SAVEE)

# Path: Dataset\SAVEE\ALL
# copy file to data folder
data_folder = os.path.join(os.getcwd(), 'Surrey Audio-Visual Expressed Emotion (SAVEE)', 'ALL')
numOfSongsPath = len(os.listdir(data_folder))
num = 0
printProgressBar(0, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)
for filename in os.listdir(data_folder):
    num += 1
    label = filename.split('_')[1]
    if label.startswith('a'): label = 'angry'
    elif label.startswith('d'): label = 'disgust'
    elif label.startswith('f'): label = 'fear'
    elif label.startswith('h'): label = 'happy'
    elif label.startswith('n'): label = 'neutral'
    elif label.startswith('sa'): label = 'sad'
    elif label.startswith('su'): label = 'surprise'
    dst_folder = os.path.join('data', label)
    src = os.path.join(data_folder, filename)
    dst = os.path.join(dst_folder, filename)
    os.rename(src, dst)
    printProgressBar(num, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)

Progress (480/480): |██████████████████████████████████████████████████| 100.0% Complete


In [8]:
# Toronto emotional speech set (TESS)

# Path: Dataset\TESS\TESS Toronto emotional speech set data
# copy file to data folder
data_folder = os.path.join(os.getcwd(), 'Toronto emotional speech set (TESS)', 'TESS Toronto emotional speech set data', 'TESS Toronto emotional speech set data')
all_filenames = []
all_paths = []
all_labels = []
for folder in os.listdir(data_folder):
    if folder == '.DS_Store': continue
    for filename in os.listdir(os.path.join(data_folder, folder)):
        if filename == '.DS_Store': continue
        all_paths.append(os.path.join(data_folder, folder, filename))
        all_filenames.append(filename)
        label = filename.split('_')[2] \
                        .split('.')[0]
        if label == 'ps': label = 'surprise'
        all_labels.append(label)

numOfSongsPath = len(all_paths)
num = 0
printProgressBar(0, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)
for i in range(len(all_paths)):
    num += 1
    src = all_paths[i]
    dst_folder = os.path.join('data', all_labels[i])
    dst = os.path.join(dst_folder, all_filenames[i])
    os.rename(src, dst)
    printProgressBar(num, numOfSongsPath, prefix = f'Progress ({num}/{numOfSongsPath}):', suffix = 'Complete', length = 50)



Progress (2800/2800): |██████████████████████████████████████████████████| 100.0% Complete


In [18]:
data_dir = pathlib.Path('data/')
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames)
num_samples = len(filenames)
print('Number of total examples:', num_samples)
print('Number of examples per label:',
      len(tf.io.gfile.listdir(str(data_dir/"angry"))))
print('Example file tensor:', filenames[0])

Number of total examples: 12162
Number of examples per label: 1923
Example file tensor: tf.Tensor(b'data\\angry\\YAF_shack_angry.wav', shape=(), dtype=string)
