In [1]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [20]:
# Define emotions in the dataset
emotion_labels = {
    1: 'neutral',
    2: 'calm',
    3: 'happy',
    4: 'sad',
    5: 'angry',
    6: 'fearful',
    7: 'disgust',
    8: 'surprised'
}

In [21]:
# Function to extract features from audio files
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=3, offset=0.5)  # Load audio file
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)  # Extract MFCCs
    mfccs = np.mean(mfccs.T, axis=0)  # Take mean for each coefficient
    return mfccs


In [22]:
# Load dataset
data_path = "D:/emotions"  # Update this path
features, labels = [], []


In [23]:
# Iterate through audio files and extract features
for folder in os.listdir(data_path):
    folder_path = os.path.join(data_path, folder)
    for file in os.listdir(folder_path):
        if file.endswith(".wav"):
            emotion_code = file.split("-")[2]  # Extract emotion code from filename
            if emotion_code in emotion_labels:
                feature = extract_features(os.path.join(folder_path, file))
                features.append(feature)
                labels.append(int(emotion_code) - 1)  # Convert label to numerical index

In [28]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define model
model = Sequential([
    Dense(256, activation='relu', input_shape=(40,)),  # Input layer
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(8, activation='softmax')  # Output layer (8 emotions)
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

In [29]:
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32)


Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - accuracy: 0.1291 - loss: 31.7657 - val_accuracy: 0.1181 - val_loss: 2.2413
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1373 - loss: 6.1957 - val_accuracy: 0.1389 - val_loss: 2.0789
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1561 - loss: 3.3633 - val_accuracy: 0.1389 - val_loss: 2.0781
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1328 - loss: 2.6601 - val_accuracy: 0.1389 - val_loss: 2.0771
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1307 - loss: 2.3128 - val_accuracy: 0.1389 - val_loss: 2.0763
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1139 - loss: 2.3581 - val_accuracy: 0.1389 - val_loss: 2.0756
Epoch 7/50
[1m36/36[0m [32m━━━━

In [32]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1188 - loss: 2.0644
Test Accuracy: 13.89%


In [33]:
def predict_emotion(file_path):
    feature = extract_features(file_path)  # Extract features from the audio
    feature = np.expand_dims(feature, axis=0)  # Reshape for model input
    prediction = model.predict(feature)
    emotion = emotion_labels[str(np.argmax(prediction) + 1)]  # Get predicted emotion
    return emotion

# Test on a sample audio file
test_audio = "D:/emotions/Actor_01/03-01-01-01-01-02-01.wav"  # Update with a test audio file
predicted_emotion = predict_emotion(test_audio)
print(f"Predicted Emotion: {predicted_emotion}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step


KeyError: '7'