In [1]:
!pip install librosa tensorflow numpy matplotlib



In [2]:
import librosa
import numpy as np
import os

def extract_features(file_path):
    audio, sr = librosa.load(file_path, duration=3, offset=0.5)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    return np.mean(mfcc.T, axis=0)

def load_data(dataset_path):
    X, y = [], []
    emotions = {
        "01": "neutral", "02": "calm", "03": "happy",
        "04": "sad", "05": "angry", "06": "fearful",
        "07": "disgust", "08": "surprised"
    }

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".wav"):
                emotion = emotions[file.split("-")[2]]
                features = extract_features(os.path.join(root, file))
                X.append(features)
                y.append(emotion)

    return np.array(X), np.array(y)

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten

def build_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(64, 3, activation='relu', input_shape=input_shape),
        MaxPooling1D(2),
        Dropout(0.3),

        Conv1D(128, 3, activation='relu'),
        MaxPooling1D(2),
        Dropout(0.3),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model


In [4]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

X, y = load_data("/content/")

le = LabelEncoder()
y = le.fit_transform(y)
y = to_categorical(y)

X = X.reshape(X.shape[0], X.shape[1], 1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = build_model((X.shape[1], 1), y.shape[1])
model.summary()

model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 671ms/step - accuracy: 0.0903 - loss: 19.2289 - val_accuracy: 0.0833 - val_loss: 7.6933
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 172ms/step - accuracy: 0.0868 - loss: 11.5196 - val_accuracy: 0.0833 - val_loss: 5.4487
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step - accuracy: 0.2083 - loss: 8.2516 - val_accuracy: 0.1667 - val_loss: 6.0842
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step - accuracy: 0.1389 - loss: 9.8272 - val_accuracy: 0.0833 - val_loss: 6.8109
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - accuracy: 0.1424 - loss: 7.8864 - val_accuracy: 0.0833 - val_loss: 5.3594
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.1910 - loss: 6.9125 - val_accuracy: 0.0833 - val_loss: 3.4927
Epoch 7/30
[1m2/2[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7c1e0905daf0>

In [5]:
model.save("emotion_model.h5")




In [7]:
from tensorflow.keras.models import load_model
import librosa
import numpy as np

model = load_model("emotion_model.h5")
emotions = ['neutral','calm','happy','sad','angry','fearful','disgust','surprised']

def predict_emotion(file):
    audio, sr = librosa.load(file, duration=3, offset=0.5)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    mfcc = np.mean(mfcc.T, axis=0)
    mfcc = mfcc.reshape(1, -1, 1)
    prediction = model.predict(mfcc)
    return emotions[np.argmax(prediction)]

print(predict_emotion("/content/03-01-01-01-01-01-01.wav"))



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
disgust
