In [None]:
import pandas as pd
import numpy as np
import os
import sys
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
# to play the audio files
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [21]:
# Use relative path from Notebooks folder to Data folder
Ravdess_speech = "../Data/Audio_Speech_Actors_01-24/"
Ravdess_song = "../Data/Audio_Song_Actors_01-24/"

In [25]:
import os
import pandas as pd

def create_ravdess_dataframe(directory_path, data_type='speech'):
    """
    Create a DataFrame for Ravdess audio files with emotion labels
    Args:
        directory_path (str): Path to the audio files
        data_type (str): Type of audio data ('speech' or 'song')
    """
    file_emotion = []
    file_path = []

    for dir in os.listdir(directory_path):
        if dir.startswith('.'):
            continue
            
        actor_files = os.listdir(os.path.join(directory_path, dir))
        
        for file in actor_files:
            if file.startswith('.'):
                continue
                
            current_file_path = os.path.join(directory_path, dir, file)
            part = file.split('.')[0].split('-')
            
            file_emotion.append(int(part[2]))
            file_path.append(current_file_path)

    # Create DataFrames
    df = pd.DataFrame({
        'Emotions': file_emotion,
        'Path': file_path,
        'Type': data_type
    })

    # Map emotions to labels
    emotion_dict = {
        1: 'neutral',
        2: 'calm',
        3: 'happy',
        4: 'sad',
        5: 'angry',
        6: 'fear',
        7: 'disgust',
        8: 'surprise'
    }
    
    df['Emotions'] = df['Emotions'].map(emotion_dict)
    return df

# Create DataFrames for both speech and song data
Ravdess_speech = "../Data/Audio_Speech_Actors_01-24/"
Ravdess_song = "../Data/Audio_Song_Actors_01-24/"

ravdess_speech_df = create_ravdess_dataframe(Ravdess_speech, 'speech')
ravdess_song_df = create_ravdess_dataframe(Ravdess_song, 'song')

# Display results
print("\nSpeech Dataset Preview:")
print(ravdess_speech_df.head())
print("\nSpeech Emotion Distribution:")
print(ravdess_speech_df['Emotions'].value_counts())

print("\nSong Dataset Preview:")
print(ravdess_song_df.head())
print("\nSong Emotion Distribution:")
print(ravdess_song_df['Emotions'].value_counts())

# Optionally, combine both datasets
ravdess_complete_df = pd.concat([ravdess_speech_df, ravdess_song_df], ignore_index=True)


Speech Dataset Preview:
  Emotions                                               Path    Type
0    angry  ../Data/Audio_Speech_Actors_01-24/Actor_16/03-...  speech
1     fear  ../Data/Audio_Speech_Actors_01-24/Actor_16/03-...  speech
2     fear  ../Data/Audio_Speech_Actors_01-24/Actor_16/03-...  speech
3    angry  ../Data/Audio_Speech_Actors_01-24/Actor_16/03-...  speech
4  disgust  ../Data/Audio_Speech_Actors_01-24/Actor_16/03-...  speech

Speech Emotion Distribution:
Emotions
angry       192
fear        192
disgust     192
sad         192
surprise    192
happy       192
calm        192
neutral      96
Name: count, dtype: int64

Song Dataset Preview:
  Emotions                                               Path  Type
0    happy  ../Data/Audio_Song_Actors_01-24/Actor_16/03-02...  song
1    happy  ../Data/Audio_Song_Actors_01-24/Actor_16/03-02...  song
2     calm  ../Data/Audio_Song_Actors_01-24/Actor_16/03-02...  song
3     calm  ../Data/Audio_Song_Actors_01-24/Actor_16/03-02...  song

In [27]:
ravdess_complete_df

Unnamed: 0,Emotions,Path,Type
0,angry,../Data/Audio_Speech_Actors_01-24/Actor_16/03-...,speech
1,fear,../Data/Audio_Speech_Actors_01-24/Actor_16/03-...,speech
2,fear,../Data/Audio_Speech_Actors_01-24/Actor_16/03-...,speech
3,angry,../Data/Audio_Speech_Actors_01-24/Actor_16/03-...,speech
4,disgust,../Data/Audio_Speech_Actors_01-24/Actor_16/03-...,speech
...,...,...,...
2447,fear,../Data/Audio_Song_Actors_01-24/Actor_08/03-02...,song
2448,fear,../Data/Audio_Song_Actors_01-24/Actor_08/03-02...,song
2449,angry,../Data/Audio_Song_Actors_01-24/Actor_08/03-02...,song
2450,sad,../Data/Audio_Song_Actors_01-24/Actor_08/03-02...,song


In [28]:
ravdess_complete_df['Emotions'].value_counts()

Emotions
angry       376
fear        376
sad         376
happy       376
calm        376
disgust     192
surprise    192
neutral     188
Name: count, dtype: int64

In [29]:
ravdess_complete_df['Type'].value_counts()

Type
speech    1440
song      1012
Name: count, dtype: int64

In [34]:
# make me hear some samples of audio files
def play_audio_sample(file_path):
    """
    Play an audio sample from the given file path.
    Args:
        file_path (str): Path to the audio file
    """
    try:
        return Audio(file_path)
    except Exception as e:
        print(f"Error playing audio: {e}")
# Example usage: Play a sample audio file
sample_file_path = ravdess_complete_df['Path'].iloc[0]
play_audio_sample(sample_file_path)

In [35]:
sample_file_path = ravdess_complete_df['Path'].iloc[1089]
play_audio_sample(sample_file_path)

In [33]:
# save the complete ravdess dataframe to a csv file
output_csv_path = "../Data/ravdess_complete_df.csv"
ravdess_complete_df.to_csv(output_csv_path, index=False)