In [None]:
from pathlib import Path
import warnings; warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import numpy as np
import opensmile
from mlxtend.plotting import plot_confusion_matrix
from sklearn import svm
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

## M09 Emotion Recognition exercise 
These two notebooks are guiding you through a simple emotion recognition script on the german EmoDB database.
First of all, let's download the data:

In [None]:
wavfolder = Path('wav')

if not wavfolder.is_dir():
    ! wget http://www.emodb.bilderbar.info/download/download.zip
    ! unzip download.zip "wav/*"
    Path("download.zip").unlink()

### 01. Feature Extraction
We start by extracting features using the openSMILE Python package. https://github.com/audeering/opensmile-python
( There also exists a C++ library, that is a bit less convenient to use https://github.com/audeering/opensmile)

However, classifying openSMILE features is a very popular baseline system, since it lets you extract a pre-defined set of features that work on most tasks. Generally, we distinguish two kinds of features:
1) frame-level acoustic features 
2) utterance-level functionals (statistics) of acoustic features
Since  paralinguistic tasks are not about modeling sequences, it is more convenient to just obtain one feature-vector per utterance and classify that. 

In this exercise we will extract utterance-level [eGeMAPS features](https://sail.usc.edu/publications/files/eyben-preprinttaffc-2015.pdf) from EmoDB and classify them in part 2 of the exercise. 
Make sure you pip-install all the imports, especially opensmile (e.g. <b>pip install opensmile</b>)


In [None]:
# CONFIGURATION
labelfile = 'emodb_labels.csv'
if not Path(labelfile).is_file():
    print(f"ERROR: Please upload the file {labelfile} to this folder.")
classtype = '"{Anger,Boredom,Disgust,Fear,Happiness,Neutral,Sadness}"'

In [None]:
# IDs of speakers used for training set
indexes_train = ['11','12','13','14','15','16'] 

# IDs of speakers used for training set
indexes_test = ['03','08','09','10']

In [None]:
''' 
Creating an openSMILE object: 
Notice we're setting the feature set to be eGeMAPS
and the feature-level to be functionals
'''
smile = opensmile.Smile(feature_set=opensmile.FeatureSet.eGeMAPSv01b,
                        feature_level=opensmile.FeatureLevel.Functionals,)

In [None]:
train_files = []
train_labels = []

test_files = []
test_labels = []

with open(labelfile) as filelist:
    for line in filelist:
        instance, label = line.strip().split(",")
        filename = wavfolder / instance

        if instance[:2] in indexes_train:
            train_files.append(filename)
            train_labels.append(label)
        elif instance[:2] in indexes_test:
            test_files.append(filename)
            test_labels.append(label)

In [None]:
# Extract OpenSMILE features. Takes a few minutes.
train_df = smile.process_files(train_files)
test_df = smile.process_files(test_files)

In [None]:
# Add the class labels to the dataframe
train_df["class"] = train_labels
test_df["class"] = test_labels

train_df.head()

In [None]:
# the results should be (347,89), meaning 347 instances, with 89 attributes
# (88 eGeMAPS features and 1 label for each instance)
print(f"Instances: {train_df.shape[0]}, eGeMAPS-features and label: {train_df.shape[1]}")

# Training SVM classifier

In [None]:
# check balance of instances per class
labels, counts = np.unique(train_df.values[:,-1], return_counts=True)
# difference of instances
len_diffs = counts - max(counts)
print("Labels:")
print (labels)
print("counts per class:")
print (counts)
print("Differences of instances:")
print (len_diffs)

In [None]:
# Convert to Numpy arrays
train_data = train_df.values
test_data = test_df.values

# equalize the number instances across classes for better performance
# (try later the effect when switching it on)
balance_counts = False
if balance_counts:    
    for diff, label in zip(len_diffs, labels):
        indices = np.where(train_data==label)[0]
        for i in range(abs(diff)):
            train_data = np.append(train_data,[train_data[np.random.choice(indices),:]],axis=0)
        
labels, counts = np.unique(train_data[:,-1], return_counts=True)
print("New counts per class:")
print (counts)

In [None]:
# extract all features information of the dataset, i.e., all content except the last column
train_features = train_data[:,0:-1].astype(np.float32)

# extract all labels information, i.e., the last column
train_labels = train_data[:,-1]

In [None]:
# set SVM classifier with linear kernel, for more details about svm and parameter settings, see
# http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC
clf = svm.SVC(kernel='linear', C=0.01) 

# split data into 10 smaller sets and use 10-fold cross validation on the estimator and the data,
# then generate predictions for all data, for more details see
# http://scikit-learn.org/stable/modules/cross_validation.html
predicted = cross_val_predict(clf, train_features, train_labels, cv=10)

# print the predictions
# print (predicted) 

print ("Detailed classification report:")
print ("")

# build a text report showing the main classification metrics.
print (classification_report(train_labels, predicted)) 

print ("confusion matrix, without normalisation:")
print ("")

# compute the confusion matrix to evaluate the accuracy
conf = confusion_matrix(train_labels, predicted)
# visual representation of the confusion matrix
fig, ax = plot_confusion_matrix(conf_mat=conf)
plt.show()

In [None]:
## run SVM with various kernels and check out the different results

for c_kernel in ['linear', 'rbf', 'poly', 'sigmoid']:
    clf = svm.SVC(kernel=c_kernel, C=0.1, gamma="scale")
    predicted = cross_val_predict(clf, train_features, train_labels, cv=10) 
    print ("Detailed classification report with {}-kernel SVM:".format(c_kernel))
    print ("")
    print (classification_report(train_labels, predicted))
    print ("confusion matrix with {}-kernel SVM, without normalisation:".format(c_kernel))
    print ("")
    conf = confusion_matrix(train_labels, predicted)
    # visual representation of the confusion matrix
    fig, ax = plot_confusion_matrix(conf_mat=conf)
    plt.show()

In [None]:
## run SVM with different complexity values and check out the different results

for c_value in [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]:
    clf = svm.SVC(kernel='linear', C=c_value)
    predicted = cross_val_predict(clf, train_features, train_labels, cv=10) 
    print ("Detailed classification report when C={}:".format(c_value))
    print ("")
    print (classification_report(train_labels, predicted))
    print ("confusion matrix when C={}, without normalisation:".format(c_value))
    print ("")
    conf = confusion_matrix(train_labels, predicted)
    # visual representation of the confusion matrix
    fig, ax = plot_confusion_matrix(conf_mat=conf)
    plt.show()

In [None]:
# the results should be (188,89), meaning 188 instances, with 89 attributes 
# (88 eGeMAPS features and 1 label for each instance)
print(test_data.shape)

# extract all features information of the dataset, i.e., all content except the last column
test_features = test_data[:,0:-1].astype(np.float32)

# extract all labels information, i.e., the last column
test_labels = test_data[:,-1]

In [None]:
## Test stage 2: choose the parameters which lead to the best result during cross validation 

clf = svm.SVC(kernel='linear', C=0.01)

# fit the best model on training data
clf.fit(train_features, train_labels)
predicted_test = clf.predict(test_features)

# print the predictions
# print (predicted)

print ("Detailed classification report on test data:")
print ("")

# build a text report showing the main classification metrics.
print (classification_report(test_labels, predicted_test))

print ("confusion matrix, without normalisation on test data:")

# compute the confusion matrix to evaluate the accuracy
conf = confusion_matrix(test_labels, predicted_test)
# visual representation of the confusion matrix
fig, ax = plot_confusion_matrix(conf_mat=conf)
plt.show()