In [461]:
# pip install librosa

In [462]:
'''
Please install this specific version of resampy for librosa to work without errors.
'''

'\nPlease install this specific version of resampy for librosa to work without errors.\n'

In [463]:
# pip install resampy==0.3.1

In [464]:
import soundfile
import os
import glob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import librosa
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report
import warnings; warnings.filterwarnings('ignore')

In [465]:
emotions ={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

### Data for binary classification


In [466]:
def load_extract_features(data_path):

    '''
    load_extract_features() is a function that is used to load all the audio files one at a time, compute their features and return the features as well as the target values.

    There are around 8-10 audio files which are corrupted. We hardcode zero values for such files in order to maintain consistency.

    ['calm', 'happy'] emotion data is categorized into 'positive' and  ['angry', 'fearful'] into 'negative'

    Returns:
    1. Features
    2. Binary Target Values
    '''
    final_features,target_emotions, binary_label = [],[], []
    count = 0
    
    for i in glob.glob(data_path + "/Actor_*/*.wav"): #Loop to read every file.
        
        name = os.path.basename(i)
        #We split the name of the file to understand the emotion associated with the file.
        split = name.split("-")
        #We know that the third identifier is associated with the emotion of the audio file. Hence, we use [2] as it represents the third identifier.
        emotion = emotions[split[2]]

        #Below is the code to categorize the emotions into two classes to make this a binary problem.
        if emotion in ['calm', 'happy']:
            binary_label.append(0)
        elif emotion in ['angry', 'fearful']:
            binary_label.append(1)
        else:
            continue
        
        with soundfile.SoundFile(i) as audio:
            waveform = audio.read(dtype="float32")
            sr = audio.samplerate
            
            #Below is the code to extract the Mel spectrogram features
            #128 is the standard for machine learning applications using Mel spectrograms
            m_feature = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128, fmax=sr / 2.0).T
            melspectrogram = np.mean(m_feature,axis=0)
            if melspectrogram.shape != (128,):
                melspectrogram = np.zeros(128)
            
            #Below is the code to extract the chromagram features
            stft_wave = librosa.stft(waveform)
            stft = np.abs(stft_wave)
            c_feature = librosa.feature.chroma_stft(S=stft, sr=sr).T
            chromagram = np.mean(c_feature,axis=0)
            
            #12 is the number of pitch classes
            if chromagram.shape != (12,):
                chromagram = np.zeros(12)
                
            features=np.array([])
            features=np.hstack((chromagram, melspectrogram))
        
            final_features.append(features)
            target_emotions.append(emotion)
            
            count += 1
            if count % 100 == 0:
                print("Processed Audio File Number: ", count)
    
    #We return the features and the binary target values.
    return np.array(final_features), np.array(binary_label)

In [467]:
#Please change the path below to the path of the folder saved in your computer.
# data_path = './Audio_Speech_Actors_01-24'
# X, binary_label = load_extract_features(data_path)
# print(X.shape)
# print(binary_label.shape)


In [468]:
# np.savetxt('matrix.txt', X, delimiter = ',')  
# np.savetxt('binary_label.txt',binary_label,delimiter = ',')

In [469]:
X = np.loadtxt(open("matrix.txt","rb"), delimiter=",", skiprows=0)
binary_label = np.loadtxt(open("binary_label.txt","rb"), delimiter=",", skiprows=0)

print(X.shape) #should be (768,140)
print(binary_label.shape) # should be (768,)

(768, 140)
(768,)


In [470]:
X_bia = np.c_[X, np.ones(X.shape[0])]   # Pad 1's for the bias term
l = 10  # lambda

new_label = np.ones((binary_label.shape[0]))

for i in range (binary_label.shape[0]):
    if binary_label[i] == 0:
        new_label[i] = -1

X_train, X_test, y_train, y_test = train_test_split(X_bia, new_label, test_size=0.3, random_state=0)

In [471]:
def compute_cost(W, X, Y, l):
    # calculate hinge loss
    n_s = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss =  (np.sum(distances) / n_s)
    # calculate cost
    cost = l / 2 * np.dot(W, W) + hinge_loss 
    
    return cost

In [472]:
def calculate_gradient(W, X, Y,l):

    distance = 1 - (Y * np.dot(X, W))
    dw = np.zeros(len(W))

    for ind, d in enumerate(distance):
        if (ind == W.shape[0]):
            if max(0, d) != 0:
                di =  - ( Y[ind] * X[ind])
        else:
            if max(0, d) == 0:
                di = l*W
            else:
                di = l*W - ( Y[ind] * X[ind])
        dw += di

    dw = dw/len(Y)  # average

    return dw

In [473]:
def svm_accuracy(W,X,Y):
    count = 0

    for i in range(X.shape[0]):
        yp = np.sign(np.dot(W, X[i])) #model
        if(yp == Y[i]):
            count = count +1

    return count / Y.shape[0]

In [474]:
def svm(X, Y, step, l):
    w = np.zeros(X_train.shape[1])
    cost = compute_cost(w, X, Y, l)
    for i in range(1000):
        grad = calculate_gradient(w, X, Y, l)
        new_w = w - step * grad
        new_cost = compute_cost(new_w, X, Y, l)
        
        if (new_cost<cost):
            w = new_w
            cost = new_cost
        else:
            break
    
    return w

In [475]:
w_result = svm(X_train, y_train, 0.0001, l)
print(svm_accuracy(w_result, X_train, y_train))
print(svm_accuracy(w_result,X_test,y_test))

0.7169459962756052
0.7272727272727273


In [476]:
def pca(X, k):
    mean = np.mean(X, axis = 0)   # the mean value of X
    std = np.std(X, axis=0)       # the standard deviation along axis 0
    X_std = (X - mean) / std      # standardized data
    cov_mat = np.cov(X_std.T)     # calculate the covariance matrix

    values, vectors = np.linalg.eig(cov_mat)   # Eigendecomposition of covariance matrix

    # Make a list of (eigenvalue, eigenvector) tuples
    eig_pairs = [(np.abs(values[i]), vectors[i,:]) for i in range(len(values))]

    # Sort the tuples from the highest to the lowest based on eigenvalues magnitude
    eig_pairs.sort(key=lambda x: x[0], reverse=True)

    # Store the soted eigenvalues and their corresponding eigenvectors
    vals_sorted = np.array([x[0] for x in eig_pairs])
    vecs_sorted = np.array([x[1] for x in eig_pairs])

    W = vecs_sorted[:k, :] # Projection matrix with the top k eigenvectors selected

    return W

In [477]:
X_train, X_test, y_train, y_test = train_test_split(X, binary_label, test_size=0.3, random_state=0)
pca(X_train, 40)

array([[ 0.02266093, -0.22815377, -0.03924663, ...,  0.00512451,
        -0.02844146, -0.00405691],
       [ 0.0196625 , -0.24525547, -0.04168403, ..., -0.00802313,
         0.02200994, -0.00566887],
       [ 0.01708405, -0.2493753 , -0.03513404, ...,  0.01706568,
        -0.01741235,  0.01216215],
       ...,
       [-0.07487257,  0.04122855, -0.04938989, ..., -0.00122934,
        -0.03456936, -0.07029348],
       [-0.08525853,  0.02495706, -0.02639557, ..., -0.02021209,
         0.05095034,  0.00127721],
       [-0.10048081, -0.00403834,  0.0219871 , ...,  0.05759587,
         0.03126924,  0.01705577]])