# **Testing The Speech Emotion Recognition Model**    

In [1]:
import librosa
import librosa.display
import numpy as np

In [2]:
def zcr(data, frame_length, hop_length):
    zcr = librosa.feature.zero_crossing_rate(data, frame_length=frame_length, hop_length=hop_length)
    return np.squeeze(zcr)

def rmse(data, frame_length=2048, hop_length=512):
    # Use 'y' keyword argument for the audio data
    rms = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)
    return np.squeeze(rms)

def mfcc(data, sr, frame_length=2048, hop_length=512, flatten=True):
    mfcc = librosa.feature.mfcc(y=data, sr=sr, n_fft=frame_length, hop_length=hop_length)
    return np.squeeze(mfcc.T) if not flatten else np.ravel(mfcc.T)

def extract_features(data, sr=22050, frame_length=2048, hop_length=512):
    result = np.array([])
    result = np.hstack((result,
                        zcr(data, frame_length, hop_length),
                        rmse(data, frame_length, hop_length),
                        mfcc(data, sr, frame_length, hop_length)
                       ))
    return result

def get_features(path, duration=2.5, offset=0.6):
    data, sr = librosa.load(path, duration=duration, offset=offset)
    aud = extract_features(data, sr)
    audio = np.array(aud)

    # Assuming noise() and pitch() functions are defined elsewhere
    noised_audio = noise(data)
    aud2 = extract_features(noised_audio, sr)
    audio = np.vstack((audio, aud2))

    pitched_audio = pitch(data, sr)
    aud3 = extract_features(pitched_audio, sr)
    audio = np.vstack((audio, aud3))

    pitched_audio1 = pitch(data, sr)
    pitched_noised_audio = noise(pitched_audio1)
    aud4 = extract_features(pitched_noised_audio, sr)
    audio = np.vstack((audio, aud4))

    return audio

In [3]:
from tensorflow.keras.models import model_from_json

# Load the model architecture from the JSON file
json_file = open('CNN_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# Load the model weights
loaded_model.load_weights("best_model1_weights.h5")

# Compile the loaded model
loaded_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print("Model loaded and compiled successfully")


Model loaded and compiled successfully


In [4]:
import pickle

# Load the saved scaler
with open('scaler2.pickle', 'rb') as f:
    scaler2 = pickle.load(f)

# Load the saved encoder
with open('encoder2.pickle', 'rb') as f:
    encoder2 = pickle.load(f)

print("Scaler and encoder loaded successfully")


Scaler and encoder loaded successfully




In [5]:
def get_predict_feat(path):
    d, s_rate = librosa.load(path, duration=2.5, offset=0.6)
    res = extract_features(d)
    result = np.array(res)
    result = np.reshape(result, newshape=(1, 2376))  # Make sure the shape matches your model input
    i_result = scaler2.transform(result)  # Use the loaded scaler
    final_result = np.expand_dims(i_result, axis=2)
    return final_result

def prediction(path1):
    res = get_predict_feat(path1)
    predictions = loaded_model.predict(res)
    y_pred = encoder2.inverse_transform(predictions)
    print("Predicted Emotion: ", y_pred[0][0])


In [None]:
# prediction("RAVDESS Emotional speech audio/Actor_06/03-01-08-02-01-01-06.wav") # It should be "Suprised"

In [None]:
# prediction("RAVDESS Emotional speech audio/Actor_18/03-01-04-01-01-02-18.wav") # It should be "Sad"

In [None]:
# prediction("Surrey Audio-Visual Expressed Emotion (SAVEE)/ALL/DC_f12.wav") # It should be "Fear"