In [42]:
import zipfile

# Upload the zip file manually in Colab or use this if it's already uploaded
zip_path = "/content/Audio_Speech_Actors_01-24.zip"
extract_path = "/content/ravdess_data"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset extracted to:", extract_path)
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
emotion_map = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, duration=3, offset=0.5)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

def load_data(data_path):
    X, Y = [], []

    for actor_folder in os.listdir(data_path):
        actor_path = os.path.join(data_path, actor_folder)
        if not os.path.isdir(actor_path):
            continue

        for file in os.listdir(actor_path):
            if file.endswith(".wav"):
                emotion_code = file.split("-")[2]
                emotion = emotion_map.get(emotion_code)
                if emotion is None:
                    continue
                file_path = os.path.join(actor_path, file)
                features = extract_features(file_path)
                X.append(features)
                Y.append(emotion)

    return np.array(X), np.array(Y)
# Update this path if your data is elsewhere
data_path = "/content/ravdess_data"

X, Y = load_data(data_path)

# Encode labels
le = LabelEncoder()
Y_encoded = to_categorical(le.fit_transform(Y))

# Split dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y_encoded, test_size=0.2, random_state=42)
model = Sequential()
model.add(Dense(256, input_shape=(40,), activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(8, activation='softmax'))  # 8 emotions

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Train
model.fit(X_train, Y_train, epochs=50, batch_size=32, validation_data=(X_test, Y_test))
loss, accuracy = model.evaluate(X_test, Y_test)
print("Test Accuracy:", accuracy)


Dataset extracted to: /content/ravdess_data


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.1219 - loss: 33.6301 - val_accuracy: 0.0938 - val_loss: 2.3180
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1398 - loss: 8.3398 - val_accuracy: 0.1076 - val_loss: 2.1739
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1309 - loss: 4.7602 - val_accuracy: 0.1354 - val_loss: 2.0778
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1610 - loss: 3.4259 - val_accuracy: 0.1319 - val_loss: 2.0765
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1235 - loss: 2.7212 - val_accuracy: 0.1250 - val_loss: 2.0756
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1233 - loss: 2.5085 - val_accuracy: 0.1250 - val_loss: 2.0750
Epoch 7/50
[1m36/36[0m [32m━━━━━━━━