In [1]:
!kaggle datasets download -d uwrfkaggler/ravdess-emotional-speech-audio -p D:\ravdess_data
!unzip D:\ravdess_data\ravdess-emotional-speech-audio.zip -d D:\ravdess_data 
#trzeba ręcznie unzipowac

Dataset URL: https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio
License(s): CC-BY-NC-SA-4.0
Downloading ravdess-emotional-speech-audio.zip to D:\ravdess_data




  0%|          | 0.00/429M [00:00<?, ?B/s]
  0%|          | 1.00M/429M [00:01<09:02, 827kB/s]
  0%|          | 2.00M/429M [00:01<05:46, 1.29MB/s]
  1%|          | 3.00M/429M [00:02<04:13, 1.76MB/s]
  1%|          | 4.00M/429M [00:02<03:27, 2.15MB/s]
  1%|          | 5.00M/429M [00:02<02:54, 2.54MB/s]
  1%|▏         | 6.00M/429M [00:03<02:39, 2.78MB/s]
  2%|▏         | 7.00M/429M [00:03<02:45, 2.67MB/s]
  2%|▏         | 8.00M/429M [00:03<02:53, 2.55MB/s]
  2%|▏         | 9.00M/429M [00:04<02:44, 2.67MB/s]
  2%|▏         | 10.0M/429M [00:04<02:41, 2.72MB/s]
  3%|▎         | 11.0M/429M [00:04<02:36, 2.80MB/s]
  3%|▎         | 12.0M/429M [00:05<02:31, 2.89MB/s]
  3%|▎         | 13.0M/429M [00:05<02:40, 2.73MB/s]
  3%|▎         | 14.0M/429M [00:06<02:30, 2.89MB/s]
  3%|▎         | 15.0M/429M [00:06<02:47, 2.60MB/s]
  4%|▎         | 16.0M/429M [00:06<02:53, 2.50MB/s]
  4%|▍         | 17.0M/429M [00:07<02:49, 2.55MB/s]
  4%|▍         | 18.0M/429M [00:07<02:46, 2.59MB/s]
  4%|▍         | 19.0

In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import wave

In [8]:
def load_data(data_dir):
    labels = []
    features = []
    for dirname, _, filenames in os.walk(data_dir):
        for filename in filenames:
            if filename.endswith('.wav'):
                file_path = os.path.join(dirname, filename)
                y, sr = librosa.load(file_path, sr=48000)
                mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                mfccs = np.mean(mfccs.T, axis=0)
                features.append(mfccs)
                label = int(filename.split('-')[2])
                labels.append(label)
    return np.array(features), np.array(labels)

X, y = load_data("D:\\ravdess_data\\")
print("Liczba próbek:", len(X))
print("Liczba etykiet:", len(y))
print("Przykładowe cechy (pierwsza próbka):", X[0])
print("Przykładowa etykieta (pierwsza próbka):", y[0])

Liczba próbek: 2880
Liczba etykiet: 2880
Przykładowe cechy (pierwsza próbka): [-726.2172      68.54142      3.2933977   12.2053       5.5102777
   13.667408    -2.9838283    3.0980291   -3.3108134   -1.5643843
   -7.8616524   -2.1242816    2.849204 ]
Przykładowa etykieta (pierwsza próbka): 1


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

predictions = model.predict(X_test)
print("Dokładność: ", accuracy_score(y_test, predictions))

Dokładność:  0.9097222222222222


In [12]:
import joblib

joblib.dump(model, "D:\\noc_naukowców\\emotion_recognition_model.joblib")
print("Model zapisany pomyślnie.")

Model zapisany pomyślnie.


In [25]:
import numpy as np
import librosa
import joblib
import os

def test_model_on_file(file_path, model):
    """
    Testuj model na nowym pliku audio.

    Args:
    - file_path (str): Ścieżka do pliku audio.
    - model: Wytrenowany model klasyfikacji.

    Returns:
    - emotion (str): Przewidywana emocja.
    """
    try:
        y, sr = librosa.load(file_path, sr=48000)
        
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs = np.mean(mfccs.T, axis=0)
        
        mfccs = mfccs.reshape(1, -1)
        
        prediction = model.predict(mfccs)
        
        label_map = {
            1: 'Neutral',
            2: 'Calm',
            3: 'Happiness',
            4: 'Sadness',
            5: 'Anger',
            6: 'Fear',
            7: 'Disgust',
            8: 'Surprise'
        }
        
        emotion = label_map.get(prediction[0], 'Unknown')
        return emotion
    
    except Exception as e:
        print(f"Error during prediction: {e}")
        return 'Error'

file_path = r"D:\ravdess_data\ravdess-emotional-speech-audio\Actor_21\03-01-03-02-01-01-21.wav"
emotion = test_model_on_file(file_path, model)
print("Przewidywana emocja:", emotion)

file_path = r"D:\ravdess_data\ravdess-emotional-speech-audio\Actor_21\03-01-06-02-02-01-21.wav"
emotion = test_model_on_file(file_path, model)
print("Przewidywana emocja:", emotion)



Przewidywana emocja: Happiness
Przewidywana emocja: Fear
