In [None]:
!pip install kaggle
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d uwrfkaggler/ravdess-emotional-speech-audio
!unzip ravdess-emotional-speech-audio.zip -d ./ravdess_data




In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
EMOTIONS = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

In [None]:
def extract_mfcc(audio_path, max_len=40):
    signal, sample_rate = librosa.load(audio_path, sr=None)
    mfcc = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=13)

    if mfcc.shape[1] < max_len:
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]

    return mfcc

def get_emotion_from_filename(filename):
    emotion_code = filename.split('-')[2]
    return EMOTIONS[emotion_code]

def load_data(dataset_path, max_len=40):
    X = []
    y = []

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                mfcc = extract_mfcc(file_path, max_len=max_len)
                emotion = get_emotion_from_filename(file)
                if emotion in ['happy', 'sad', 'neutral']:
                    X.append(mfcc)
                    y.append(emotion)

    return np.array(X), np.array(y)

dataset_path = './ravdess_data'

X, y = load_data(dataset_path)
X = X[..., np.newaxis]
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_encoded = to_categorical(y_encoded, num_classes=3)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
print(f'Data loaded: {X_train.shape[0]} training samples, {X_test.shape[0]} test samples')

In [None]:
def build_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(3, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

input_shape = (13, 40, 1)
model = build_model(input_shape)
model.summary()


In [None]:
history = model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test))
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f'\nTest Accuracy: {test_accuracy:.4f}')

In [None]:
def predict_tone(audio_path):
    mfcc = extract_mfcc(audio_path)
    mfcc = mfcc[np.newaxis, ..., np.newaxis]
    prediction = model.predict(mfcc)
    predicted_label = le.inverse_transform([np.argmax(prediction)])
    return predicted_label[0]
audio_path = '/content/03-01-04-01-01-01-01.wav'
predicted_tone = predict_tone(audio_path)
print(f'The predicted tone of the audio is: {predicted_tone}')