In [6]:
import numpy as np
import joblib
import soundfile
import numpy as np
import librosa
import os
from sklearn.model_selection import train_test_split
import glob


In [7]:
# all emotions in the new dataset
int2emotion_new = {
    "anger": "angry",
    "sad": "sad",
    "happy": "happy",
    "neutral": "calm"
}

# we allow only these four emotions
AVAILABLE_EMOTIONS_NEW = set(int2emotion_new.values())


In [8]:
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
    Features supported:
        - MFCC (mfcc)
        - Chroma (chroma)
        - MEL Spectrogram Frequency (mel)
        - Contrast (contrast)
        - Tonnetz (tonnetz)
    e.g:
    `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
            
        result = np.array([])
        
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
            result = np.hstack((result, chroma))
            
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))
            
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
            result = np.hstack((result, contrast))
            
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0)
            result = np.hstack((result, tonnetz))
            
    return result

In [None]:
model_filename = "./result/random_forest_model.joblib" # achieved around 0.75 accuracy with high precision and recall for this model
loaded_classifier = joblib.load(model_filename)
print(f"Loaded model from {model_filename}")

Loaded model from ./result/random_forest_model.joblib


In [9]:
file_name = "./../sample_audio/5.3.sad-06.wav"
X = []
features = extract_feature(file_name, mfcc=True, chroma=True, mel=True)
X.append(features)
print("Extracted features from audio")

Extracted features from audio


In [17]:
loaded_prediction = loaded_classifier.predict(X)
print("Predictions using loaded model:", loaded_prediction[0])

Predictions using loaded model: sad
