In [2]:
import os
import csv
from sklearn.preprocessing import LabelEncoder

In [None]:
# This function divides the emotion file from the dataset.
# Iterate through the `Dataset` directory, and for each file, check if its name contains one of the keywords: `ANG`, `DIS`, `FEA`, `HAP`, `NEU`, or `SAD`. If a match is found, copy the file to the corresponding folder in the destination directory.

folder_emotion_map = [
    'anger',
    # 'disgust',
    'fear',
    'happy',
    'neutral',
    'sad'
]

def CremaDivider(folder):
    print(f"Dividing Crema dataset {folder} in ...")
    src_dir = './Dataset/' + folder
    dest_dir = './FixedDataset'
    # anger fear happy neutral sad 
    # Mapping of emotion codes in filename to target folders
    emotion_map = {
        'ANG': 'Anger',
        # 'DIS': 'Disgusted',
        'FEA': 'Fear',
        'HAP': 'Happy',
        'NEU': 'Neutral',
        'SAD': 'Sad'
    }
    for file in os.listdir(src_dir):
        if file.endswith('.wav'):
            parts = file.split('_')
            if len(parts) >= 3:
                emotion_code = parts[2]
                target_emotion = emotion_map.get(emotion_code)
                
                if target_emotion:
                    src_path = os.path.join(src_dir, file)
                    dest_folder = os.path.join(dest_dir, target_emotion)
                    dest_path = os.path.join(dest_folder, file)
                    os.makedirs(dest_folder, exist_ok=True)
                    with open(src_path, 'rb') as src_file:
                        with open(dest_path, 'wb') as dst_file:
                            dst_file.write(src_file.read())



for folder in folder_emotion_map:
    CremaDivider(folder)

Dividing Crema dataset...


In [15]:
# This function divides the emotion file from the Ravdess dataset.
# Iterate through the `./Dataset/Ravdess/audio_speech_actors_01-24` directory, and for each file, check if the third number keywords as per the emotion map. If a match is found, copy the file to the corresponding folder in the destination directory.

def RavdessDivider():
    print("Dividing RAVDESS dataset...")
    src_dir = './Dataset/Ravdess/audio_speech_actors_01-24'
    dest_dir = './FixedDataset'

    emotion_map = {
        '01': 'Neutral',
        '02': 'Neutral',  # The original is 'Calm', but we will use 'Neutral' for consistency
        '03': 'Happy',
        '04': 'Sad',
        '05': 'Anger',
        '06': 'Fear',
        '07': 'Disgusted',
        '08': 'Surprised' 
    }

    for actor_folder in os.listdir(src_dir):
        actor_path = os.path.join(src_dir, actor_folder)
        if os.path.isdir(actor_path):
            for file in os.listdir(actor_path):
                if file.endswith('.wav'):
                    parts = file.split('-')
                    if len(parts) >= 3:
                        emotion_code = parts[2]
                        emotion_name = emotion_map.get(emotion_code)
                        if emotion_name:
                            src_path = os.path.join(actor_path, file)
                            dest_folder = os.path.join(dest_dir, emotion_name)
                            dest_path = os.path.join(dest_folder, file)

                            os.makedirs(dest_folder, exist_ok=True)

                            with open(src_path, 'rb') as src_file, open(dest_path, 'wb') as dst_file:
                                dst_file.write(src_file.read())


In [16]:
# This function divides the emotion file from the Savee dataset.
# Iterate through the `./Dataset/Savee` directory, and for each file, check if its name contains one of the keywords: `a`, `d`, `f`, `h`, `n`, `sa`, or `su`. If a match is found, copy the file to the corresponding folder in the destination directory.

def SaveeDivider():
    print("Dividing SAVEE dataset...")
    src_dir = './Dataset/Savee'
    dest_dir = './FixedDataset'
    
    emotion_map = {
        'a': 'Anger',
        'd': 'Disgusted',
        'f': 'Fear',
        'h': 'Happy',
        'n': 'Neutral',
        'sa': 'Sad',
        'su': 'Surprised'
    }

    for file in os.listdir(src_dir):
        if file.endswith('.wav'):
            name = file.lower()
            emotion = None
            for prefix in sorted(emotion_map.keys(), key=lambda x: -len(x)):  # Check 'sa' before 's'
                if f'_{prefix}' in name:
                    emotion = emotion_map[prefix]
                    break
            if emotion:
                src_path = os.path.join(src_dir, file)
                dest_path = os.path.join(dest_dir, emotion)
                os.makedirs(dest_path, exist_ok=True)
                with open(src_path, 'rb') as fsrc, open(os.path.join(dest_path, file), 'wb') as fdst:
                    fdst.write(fsrc.read())


In [23]:
# This function divides the emotion file from the Tess dataset.
# Iterate through the `./Dataset/Tess` directory, and for each file, check if its folder contains one of the keywords as per the emotion map. If a match is found, copy the file to the corresponding folder in the destination directory.
def TessDivider():
    print("Dividing TESS dataset to existing folders...")

    src_dir = './Dataset/Tess'
    dest_dir = './FixedDataset'

    emotion_map = {
        'angry': 'Anger',
        'disgust': 'Disgusted',
        'fear': 'Fear',
        'happy': 'Happy',
        'neutral': 'Neutral',
        'sad': 'Sad',
        'surprise': 'Surprised'
    }

    for folder in os.listdir(src_dir):
        folder_path = os.path.join(src_dir, folder)
        if not os.path.isdir(folder_path):
            continue

        matched = False
        for key, target_folder in emotion_map.items():
            if key in folder.lower():
                target_path = os.path.join(dest_dir, target_folder)
                if not os.path.exists(target_path):
                    print(f"Skipping '{folder}' — no target folder: {target_path}")
                    break
                for file in os.listdir(folder_path):
                    if file.endswith('.wav'):
                        src_path = os.path.join(folder_path, file)
                        dst_path = os.path.join(target_path, file)
                        with open(src_path, 'rb') as fsrc, open(dst_path, 'wb') as fdst:
                            fdst.write(fsrc.read())
                matched = True
                break

        if not matched:
            print(f"No emotion match found for folder: {folder}")

In [None]:
# leftover folders is also getting sorted out here

def DividingDataset():
    dataset_root = './Dataset'
    fixed_root = './Datsaset'
    
    emotion_map = {
        'anger': 'Anger',
        # 'disgust': 'Disgusted',
        'fear': 'Fear',
        'happy': 'Happy',
        'neutral': 'Neutral',
        'sad': 'Sad'
    }

    for emotion in emotion_map:
        source_folder = os.path.join(dataset_root, emotion)
        dest_folder = os.path.join(fixed_root, emotion_map[emotion])

        if not os.path.exists(source_folder) or not os.path.exists(dest_folder):
            print(f"Skipping {emotion}: source or destination folder missing.")
            continue

        files = [f for f in os.listdir(source_folder) if f.endswith('.wav')]
        if not files:
            print(f"No .wav files found in {source_folder}")
            continue

        for file in files:
            src_path = os.path.join(source_folder, file)
            dst_path = os.path.join(dest_folder, file)

            os.makedirs(dest_folder, exist_ok=True)

            with open(src_path, 'rb') as src_file, open(dst_path, 'wb') as dst_file:
                dst_file.write(src_file.read())


In [9]:
CremaDivider()


Dividing Crema dataset...


In [20]:
RavdessDivider()


Dividing RAVDESS dataset...


In [21]:
SaveeDivider()


Dividing SAVEE dataset...


In [24]:
TessDivider()


Dividing TESS dataset to existing folders...


In [6]:
DividingLeftovers()

In [None]:
dataset_dir = "Dataset"

file_paths = []
labels = []

for label in os.listdir(dataset_dir):
    label_dir = os.path.join(dataset_dir, label)
    
    if os.path.isdir(label_dir):
        for file in os.listdir(label_dir):
            if file.endswith(".wav"):
                file_paths.append(os.path.join(label_dir, file))
                labels.append(label)

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
print("Unique Label Mappings:")
for original, encoded in zip(label_encoder.classes_, range(len(label_encoder.classes_))):
    print(f"{original} -> {encoded}")

with open("labeled_data.csv", mode="w", newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["file_path", "label"]) 
    for path, encoded_label in zip(file_paths, encoded_labels):
        writer.writerow([path, encoded_label])

print("Labeled data saved to labeled_data.csv")


Unique Label Mappings:
Anger -> 0
Fear -> 1
Happy -> 2
Neutral -> 3
Sad -> 4
Labeled data saved to labeled_data.csv
