In [1]:
from keras.models import load_model
import librosa
import numpy as np
from sklearn.preprocessing import OneHotEncoder

arr = ['angry','calm','disgust','fear','happy','neutral','sad','surprise']
encoder = OneHotEncoder()
res = encoder.fit_transform(np.array(arr).reshape(-1,1)).toarray()

In [2]:
def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)

    # without augmentation
    res1 = extract_features(data, sample_rate)
    result = np.array(res1)

    return result


def extract_features(data, sample_rate):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result = np.hstack((result, zcr))  # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft))  # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc))  # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms))  # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel))  # stacking horizontally

    return result

In [3]:
feature = get_features('DataSets/RAVDESS/Actor_01/03-01-03-02-02-02-01.wav')
feature = np.expand_dims(feature, axis=0)
feature.shape

(1, 162)

In [6]:
loadedModel = load_model('finalModel_Acc_73_61.h5')
#loadedModel.summary()
print("Model Loaded Successfully")

Model Loaded Successfully


In [7]:
result = loadedModel.predict([feature], batch_size=1)
prediction = encoder.inverse_transform(result)
print(prediction)

[['neutral']]
