In [None]:
import soundfile
import numpy as np 
import librosa  
import glob 
import os # to use operating system dependent functionality
from sklearn.model_selection import train_test_split # for splitting training and testing 
from sklearn.neural_network import MLPClassifier # multi-layer perceptron model 
from sklearn.metrics import accuracy_score # to measure how good we are
def get_feature(file_name,mfccs,mel,chroma,contrast):
    
        data, sample_rate = librosa.load(file_name)
        stft = np.abs(librosa.stft(data))
        mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(data, sr=sample_rate).T,axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
        
        
    
        return mfccs,mel,chroma,contrast

# emotions in dataset
list_emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# I am using only 3 emotions to observe,feel free to add more.
classify_emotions = {
    "sad",
    "happy",
    "surprised"
    
    
}



def load_data(test_size=0.2):
    feature, y = [], []
    for file in glob.glob("C:\\Users\\Documents\\ravdess data\\Actor_*\\*.wav"):
        basename = os.path.basename(file)  # get the base name of the audio file
       
        emotion = list_emotion[basename.split("-")[2]]   # get the emotion label
       
        if emotion not in classify_emotions:    # we allow only classify_emotions we set
            try:
                mfccs,mel,chroma,contrast = get_feature(file)
            except Exception as e:
                print ("Error encountered while parsing file: ", file)
                continue
            ext_features = np.hstack([mfccs,mel,chroma,contrast])
            feature.append(ext_features)
            y.append(emotion)
        
    # split the data to training and testing and return it
    return train_test_split(np.array(feature), y, test_size=test_size, random_state=9)




feature_train, feature_test, y_train, y_test = load_data(test_size=0.25)

# using get_features() function
print("Number of samples in training data:", feature_train.shape[0])

print("Number of samples in testing data:", feature_test.shape[0])

# predict 25% of data 
y_pred = clf.predict(feature_test)

# calculate the accuracy
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy is: {:.2f}%".format(accuracy*100))


print("Number of features:", feature_train.shape[1])