In [None]:
import librosa
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Create the directory if it doesn't exist
if not os.path.exists('./ravdess-emotional-speech-audio'):
    os.makedirs('./ravdess-emotional-speech-audio')

# Download the RAVDESS dataset (this link provides a combined zip for all actors)
!wget -O ravdess-emotional-speech-audio.zip "https://zenodo.org/record/1188976/files/Audio_Speech_Actors_01-24.zip?download=1"

# Unzip the downloaded file directly into the target directory
!unzip -q ravdess-emotional-speech-audio.zip -d ./ravdess-emotional-speech-audio

print("RAVDESS dataset downloaded and extracted.")
print("Listing contents of the extracted directory:")
!ls ./ravdess-emotional-speech-audio

In [None]:
#Create a list of all audio file paths
audio_files = []
for root, dirs, files in os.walk('./ravdess-emotional-speech-audio'):

    for file in files:
        if file.lower().endswith('.wav'):
            audio_files.append(os.path.join(root, file))

print("Found files:", len(audio_files))
print(audio_files[:5])

In [None]:
#Extract Labels
emotion_map = {
    '01':'neutral', '02':'calm', '03':'happy', '04':'sad',
    '05':'angry', '06':'fearful', '07':'disgust', '08':'surprised'
}

labels = [emotion_map[os.path.basename(f).split('-')[2]] for f in audio_files]

In [None]:
# Install resampy
!pip install resampy
#Feature Extraction (MFCCs)
def extract_features(file):
    audio, sample_rate = librosa.load(file, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

# Only extract features if audio_files is not empty
if audio_files:
    features = np.array([extract_features(f) for f in audio_files])
else:
    features = np.array([])
    print("No audio files found to extract features from.")

In [None]:
#Real-Time Prediction
import sounddevice as sd
from scipy.io.wavfile import write

def record_audio(filename="test.wav", duration=3, fs=44100):
    print("Recording...")
    # In Colab, direct microphone access via sounddevice for real-time recording from user's local machine is not feasible.
    # This function is retained for demonstration purposes if running locally, but will likely cause an error in Colab.
    # For Colab, you would typically upload an audio file or use a browser-based recording solution.
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    write(filename, fs, audio)
    print("Saved:", filename)

print("Direct real-time recording from user's microphone is not supported in Colab backend.")
print("Instead, predicting emotion for an existing audio file from the dataset:")
print("Predicted Emotion:", predict_emotion('./ravdess-emotional-speech-audio/Actor_01/03-01-03-01-01-01-01.wav'))

In [None]:
#Data Augmentation (Audio Improve)
def add_noise(data):
    noise = np.random.randn(len(data))
    return data + 0.005 * noise

def pitch_shift(file):
    y, sr = librosa.load(file)
    return librosa.effects.pitch_shift(y, sr, n_steps=2)


In [None]:
#Encode Labels
le = LabelEncoder()
# Only transform labels if the labels list is not empty
if labels:
    y = le.fit_transform(labels)
    y = to_categorical(y)  # For multi-class classification
else:
    y = np.array([])
    print("No labels found to encode.")

In [None]:
print("Shape of features:", features.shape)

In [None]:
print("Shape of encoded labels:", y.shape)

In [None]:
#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)

In [None]:
print(X_test.shape)

In [None]:
!apt-get update
!apt-get install -y portaudio19-dev
!pip install sounddevice scipy

In [None]:
#Build the Model
model = Sequential()
model.add(Dense(256, input_shape=(40,), activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
#Train the Model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

In [None]:
#Try Different Models
X_cnn = X_train.reshape(-1, 40, 1, 1)
X_test_cnn = X_test.reshape(-1, 40, 1, 1)

In [None]:
cnn = Sequential([
    Conv2D(32, (3,1), activation='relu', input_shape=(40,1,1)),
    MaxPooling2D((2,1)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(y.shape[1], activation='softmax')
])
cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn.fit(X_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

In [None]:
#Plot accuracy
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Test')
plt.title('Accuracy')
plt.legend()

In [None]:
#Plot loss
plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Test')
plt.title('Loss')
plt.legend()
plt.show()

In [None]:
#Predict New Audio
def predict_emotion(file):
    feature = extract_features(file)
    feature = feature.reshape(1, -1)
    prediction = model.predict(feature)
    predicted_label = le.inverse_transform([np.argmax(prediction)])
    return predicted_label[0]

predict_emotion('./ravdess-emotional-speech-audio/Actor_01/03-01-03-01-01-01-01.wav')

In [None]:
#Confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
#Classification Report
from sklearn.metrics import classification_report
import seaborn as sns

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes, target_names=le.classes_))

In [None]:
#Model Save
model.save("emotion_model.h5")

In [None]:
#Model Load
from tensorflow.keras.models import load_model
model = load_model("emotion_model.h5")