
[Dataset](https://drive.google.com/file/d/1wWsrN2Ep7x6lWqOXfr4rpKGYrJhWc8z7/view)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
Root = "/content/drive/MyDrive/Resume/speech-emotion-recognition-ravdess-data"
os.chdir(Root)

In [3]:
!pip install --upgrade librosa



In [None]:
!ls

Actor_01  Actor_05  Actor_09  Actor_13	Actor_17  Actor_21  modelForPrediction1.sav
Actor_02  Actor_06  Actor_10  Actor_14	Actor_18  Actor_22
Actor_03  Actor_07  Actor_11  Actor_15	Actor_19  Actor_23
Actor_04  Actor_08  Actor_12  Actor_16	Actor_20  Actor_24


In [4]:
import librosa
import soundfile
import glob
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd


In [5]:
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel_spectrogram = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel_spectrogram))
    return result

In [8]:
emotions = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

observed_emotions = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']

In [10]:
def load_data(test_size=0.2):
    x, y = [], []
    for file in glob.glob(os.path.join(Root, "Actor_*/*.wav")):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)
x_train, x_test, y_train, y_test = load_data(test_size=0.25)

IndexError: list index out of range

In [None]:
print(f"Training samples: {x_train.shape[0]}, Testing samples: {x_test.shape[0]}")


Training samples: 576, Testing samples: 193


In [None]:
model = MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
model.fit(x_train, y_train)


In [None]:
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

f1 = f1_score(y_test, y_pred, average=None)
print(f"F1 Score: {f1}")

Accuracy: 63.21%
F1 Score: [0.65625    0.6440678  0.6        0.62903226]


In [None]:
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df.head(20))


     Actual Predicted
0   disgust   disgust
1      calm   disgust
2   disgust   disgust
3   disgust   disgust
4     happy   disgust
5     happy     happy
6   fearful     happy
7      calm     happy
8     happy     happy
9   disgust   disgust
10  fearful   disgust
11  disgust   disgust
12  fearful     happy
13    happy     happy
14  disgust   disgust
15  disgust     happy
16  disgust   disgust
17  disgust   disgust
18     calm     happy
19    happy     happy


In [None]:
model_filename = 'modelForPrediction1.sav'
with open(model_filename, 'wb') as model_file:
    pickle.dump(model, model_file)

loaded_model = pickle.load(open(model_filename, 'rb'))

In [None]:
from IPython.core.display import display, HTML
emotion_to_emoji = {
    'neutral': '😐',
    'calm': '😌',
    'happy': '😊',
    'sad': '😢',
    'angry': '😠',
    'fearful': '😨',
    'disgust': '🤢',
    'surprised': '😲'
}

sample_file = os.path.join(Root, "Actor_09/03-01-02-01-02-01-09.wav")
sample_feature = extract_feature(sample_file, mfcc=True, chroma=True, mel=True).reshape(1, -1)
sample_prediction = loaded_model.predict(sample_feature)[0]

emoji = emotion_to_emoji.get(sample_prediction, '')

html_content = f"""
<button onclick="openPopup()">Show Prediction</button>
<script>
function openPopup() {{
  var popup = window.open("", "popup", "width=400,height=500");
  popup.document.write(`
    <style>
      .exit-button {{
        font-size: 18px;
        padding: 10px 20px;
        background-color: #0000ff;
        color: #fff;
        border: none;
        border-radius: 5px;
        cursor: pointer;
      }}
      .exit-button:hover {{
        background-color: #007BFF;
      }}
    </style>
    <div style="display: flex; flex-direction: column; justify-content: center;
    align-items: center;
    height: 100%;
    background-color: #ff6666;">
      <div style="padding: 20px;
      border: 2px solid #ff0000; border-radius: 10px; background-color: #ff6666; text-align: center; margin-bottom: 20px;">
        <h3 style="color: #ff0000;">Predicted Emotion</h3>
        <p style="font-size: 24px; color: #ffdd00;">{sample_prediction}</p>
      </div>
      <div style="font-size: 100px; color: #ffdd00;">
        {emoji}
      </div>
      <div style="margin-top: 20px;">
        <button class="exit-button" onclick="window.close()">
          Exit ❌
        </button>
      </div>
    </div>
  `);
  popup.document.close();
}}
</script>
"""
display(HTML(html_content))


