In [7]:
import pandas as pd
import numpy as np
import matplotlib as plt
# !pip install seaborn
import seaborn as sns
from glob import glob
import librosa
import librosa.display
import IPython.display as ipd
from itertools import cycle
import soundfile
import os, pickle
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

FREQUENCY

INTENSITY

SAMPLE RATE

In [36]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.utils import to_categorical

In [17]:
dataset_path = 'D:\\RAVDESS Emotional speech audio\\archive (5)'

In [35]:
def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, sr=None)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
        mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sample_rate)
        
        features = np.hstack((
            np.mean(mfccs.T, axis=0),
            np.mean(chroma.T, axis=0),
            np.mean(mel.T, axis=0),
            np.mean(contrast.T, axis=0),
            np.mean(tonnetz.T, axis=0)
        ))
        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [37]:
emotion_map = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}

In [38]:
# Prepare a list to store the extracted features and corresponding labels
features = []
labels = []

# Iterate through the dataset
for root, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.endswith('.wav'):
            file_path = os.path.join(root, file)
            extracted_features = extract_features(file_path)
            
            if extracted_features is not None:
                features.append(extracted_features)
                # Extract label from the file name
                parts = file.split('-')
                emotion = emotion_map[parts[2]]
                labels.append(emotion)

In [39]:
df = pd.DataFrame(features)
df['label'] = labels

# Print the DataFrame
print(df.head())

# Optionally, save the DataFrame to a CSV file
df.to_csv('ravdess_features.csv', index=False)

            0          1         2          3         4          5         6  \
0 -726.217224  68.541420  3.293398  12.205300  5.510278  13.667408 -2.983828   
1 -719.128296  70.201569  1.168397  13.122541  7.836950  14.411290 -4.111360   
2 -714.995728  69.689346  3.924564  11.924190  6.421723  11.011614 -2.878103   
3 -710.975281  67.564880  5.782241  13.230726  6.190845  12.628252 -1.675169   
4 -759.921753  75.783524  6.023605  14.557394  6.454187  14.631508 -3.004551   

          7         8         9  ...        157        158        159  \
0  3.098029 -3.310813 -1.564384  ...  14.896630  15.938653  17.161146   
1  4.468973 -3.539367 -3.658607  ...  14.797068  16.028111  17.303416   
2  4.509558 -4.476109 -2.671549  ...  15.356175  16.092042  17.107516   
3  5.657494 -4.950634 -3.477545  ...  15.625618  15.486327  17.372365   
4  4.620970 -5.200016 -0.707430  ...  15.277864  15.372324  16.627257   

        160       161       162       163       164       165    label  
0 -0.04

In [40]:
import soundfile as sf
import sounddevice as sd
def play_audio(file_path):
    try:
        # Load the audio file
        data, sample_rate = sf.read(file_path)
        # Play the audio file
        sd.play(data, sample_rate)
        # Wait until the file is done playing
        sd.wait()
    except Exception as e:
        print(f"Error playing {file_path}: {e}")

# Example: Play a specific audio file
example_file_path = os.path.join(dataset_path, 'Actor_01', '03-01-01-01-01-01-01.wav')
play_audio(example_file_path)

In [41]:
# Load the data
data = pd.read_csv('ravdess_features.csv')
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,157,158,159,160,161,162,163,164,165,label
0,-726.217224,68.541420,3.293398,12.205300,5.510278,13.667408,-2.983828,3.098029,-3.310813,-1.564384,...,14.896630,15.938653,17.161146,-0.041720,0.033167,-0.046392,-0.050864,0.012085,0.012860,neutral
1,-719.128296,70.201569,1.168397,13.122541,7.836950,14.411290,-4.111360,4.468973,-3.539367,-3.658607,...,14.797068,16.028111,17.303416,-0.072259,0.040765,-0.058097,-0.065337,0.026146,0.005308,neutral
2,-714.995728,69.689346,3.924564,11.924190,6.421723,11.011614,-2.878103,4.509558,-4.476109,-2.671549,...,15.356175,16.092042,17.107516,-0.029248,0.009355,0.008024,0.003081,0.009819,0.024897,neutral
3,-710.975281,67.564880,5.782241,13.230726,6.190845,12.628252,-1.675169,5.657494,-4.950634,-3.477545,...,15.625618,15.486327,17.372365,-0.050754,0.018956,0.011561,-0.040490,0.010340,0.009326,neutral
4,-759.921753,75.783524,6.023605,14.557394,6.454187,14.631508,-3.004551,4.620970,-5.200016,-0.707430,...,15.277864,15.372324,16.627257,-0.077921,0.020432,0.007521,-0.108841,0.025209,0.010315,calm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,-616.322449,45.899368,-20.175966,3.753582,-11.131477,-3.964839,-14.135321,-4.771224,-15.621428,-8.477671,...,17.711438,16.802803,20.631427,0.011145,0.010914,-0.012989,0.029897,0.004431,0.016376,surprised
2876,-553.992004,56.614849,-14.907667,2.665701,-13.776782,-4.072508,-17.319796,-5.079650,-10.142247,-8.829944,...,16.785011,17.009239,21.708454,-0.019153,0.007158,-0.000938,-0.008314,0.013664,-0.000264,surprised
2877,-575.845764,53.281605,-21.947104,5.056113,-12.553356,-2.494527,-20.614611,-6.246452,-10.795346,-8.800463,...,16.639615,17.078960,21.849838,-0.018989,0.024669,-0.043076,0.009058,0.011479,0.010839,surprised
2878,-522.466980,43.700798,-14.606612,12.619474,-11.496836,1.743054,-13.268276,0.747497,-13.840753,-2.510027,...,17.729482,16.652768,23.641165,-0.023364,0.022528,-0.052340,-0.029746,0.012789,-0.000936,surprised


In [43]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Separate features and labels
X = data.iloc[:, :-1].values
y = data['label'].values

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Normalize the features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Reshape the data for CNN
X_reshaped = X_normalized.reshape(X_normalized.shape[0], X_normalized.shape[1], 1, 1)

X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_categorical, test_size=0.2, random_state=42)


In [51]:
# Define the CNN model
model = Sequential()
model.add(Conv2D(32, (2, 1), activation='relu', input_shape=(X_train.shape[1], 1, 1),padding='same'))
model.add(MaxPooling2D(pool_size=(2, 1),padding='same'))
model.add(Dropout(0.25))
model.add(Conv2D(64, (2, 1), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 1),padding='same'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(np.unique(y_encoded)), activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [52]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [53]:
#train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.2035 - loss: 1.9978 - val_accuracy: 0.3073 - val_loss: 1.7468
Epoch 2/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3186 - loss: 1.7293 - val_accuracy: 0.3924 - val_loss: 1.6191
Epoch 3/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3853 - loss: 1.6248 - val_accuracy: 0.4462 - val_loss: 1.5156
Epoch 4/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.4006 - loss: 1.5675 - val_accuracy: 0.4740 - val_loss: 1.4480
Epoch 5/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.4331 - loss: 1.4836 - val_accuracy: 0.4861 - val_loss: 1.3861
Epoch 6/50
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.4504 - loss: 1.4500 - val_accuracy: 0.5243 - val_loss: 1.3224
Epoch 7/50
[1m72/72[0m [32m━━━━━━━

In [54]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8928 - loss: 0.4683
Test Accuracy: 88.02%


In [59]:
model.save('ravdess_emotion_model_cnn.h5')
print("Model saved.")



Model saved.


In [71]:
# Assuming model.predict(X_test) gives predictions in the shape (num_samples, num_classes)
pred_test = model.predict(X_test)

# Decode predictions and true labels using inverse_transform
y_pred = label_encoder.inverse_transform(np.argmax(pred_test, axis=1))
y_true = label_encoder.inverse_transform(np.argmax(y_test, axis=1))

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [73]:
# Create a DataFrame to compare predicted and actual labels
df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df['Predicted Labels'] = y_pred
df['Actual Labels'] = y_true

# Check the first 10 rows
print(df.head(10))

  Predicted Labels Actual Labels
0          disgust       disgust
1            happy         happy
2          disgust       fearful
3          fearful       fearful
4            happy         happy
5        surprised     surprised
6          disgust       disgust
7            angry         angry
8          disgust       disgust
9          neutral       neutral
