In [1]:
import librosa    # For feature extraction
import os    # For file navigation
import soundfile    # To open sound files
import numpy as np    # better number processing
from IPython.display import clear_output    # Clear cell outputs

# Machine Learning and metrics
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# import model saving 
import joblib

In [2]:
# give this function a file and it will return the features from the audio as a list
def get_features(file):
    
    with soundfile.SoundFile(file) as sound_file:
        
        result = np.array([])
        X = sound_file.read(dtype="float32")
        
        # Extract Features
        sample_rate = sound_file.samplerate
        stft = np.abs(librosa.stft(X))
        
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
        
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, chroma))
        
        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
        result = np.hstack((result, mel))
        
    return result

In [3]:
# Convert emotions to ID
emotions = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# List emotions that you would like to test
used_emotions = {
    "03": "happy",
    "05": "angry",
    "04": "sad"
}

In [4]:
directory = "data_16khz"
X,y = [],[]
total = 0

# get features for all audio files in directory
for dir,sub_dir,files in os.walk(directory):
    for file in files:
        # Check if emotion is in used emotions
        emotion = file.split("-")[2]
        if emotion not in used_emotions: continue
        
        # add to counter
        total += 1
        clear_output(wait=True)
        print("total files added :",total)
        
        # Get features
        url = dir+"\\"+file
        y.append(emotions[emotion])
        X.append(get_features(url))

total files added : 576


In [5]:
# Split data into train and test sets
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [6]:
# Build model
model = MLPClassifier(hidden_layer_sizes=(175), learning_rate='adaptive', max_iter=1000,epsilon= 1e-08,alpha = .01)

# Train Model
model.fit(X_train, y_train)

# Test Model
accuracy = model.score(X_test,y_test)

# Show accuracy
print("Accuracy of the Recognizer is: {:.1f}%".format(accuracy*100))

Accuracy of the Recognizer is: 66.7%


In [None]:
# Test multiple model configurations

test_model_count = 100
epochs = 800
max_hidden_layer_count = 3
min_nodes = 75
max_nodes = 300
best_accuracy = 0

for _ in range(test_model_count) :
    
    # Build a new model
    model = MLPClassifier(hidden_layer_sizes=(np.random.randint(min_nodes,max_nodes,np.random.randint(1,max_hidden_layer_count))), learning_rate='adaptive', max_iter=epochs,epsilon= 1e-08,alpha = .01)
    model.fit(X_train, y_train)
    
    # Train Model
    model.fit(X_train, y_train)
    
    # Create Accuracy Score
    accuracy = model.score(X_test,y_test)
                                
    # If model outperformed last save model and print results
    if accuracy > best_accuracy:
        print("Accuracy of the Recognizer has gone to: {:.1f}%".format(accuracy*100))
        best_accuracy = accuracy
        if accuracy > .75:
            joblib.dump(model, "models/model{:.1f}".format(accuracy*100))
            print("Model Saved")
                          