In [7]:
import soundfile
import os
import glob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import librosa
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report
import warnings; warnings.filterwarnings('ignore')
import time

In [8]:
emotions ={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

In [9]:
def load_extract_features(data_path):

    '''
    load_extract_features() is a function that is used to load all the audio files one at a time, compute their features and return the features as well as the target values.

    There are around 8-10 audio files which are corrupted. We hardcode zero values for such files in order to maintain consistency.

    ['calm', 'happy'] emotion data is categorized into 'positive' and  ['angry', 'fearful'] into 'negative'

    Returns:
    1. Features
    2. Binary Target Values
    '''
    final_features,target_emotions, binary_label = [],[], []
    count = 0

    for i in glob.glob(data_path + "/Actor_*/*.wav"): #Loop to read every file.

        name = os.path.basename(i)
        #We split the name of the file to understand the emotion associated with the file.
        split = name.split("-")
        #We know that the third identifier is associated with the emotion of the audio file. Hence, we use [2] as it represents the third identifier.
        emotion = emotions[split[2]]

        #Below is the code to categorize the emotions into two classes to make this a binary problem.
        if emotion in ['calm', 'happy']:
            binary_label.append(0)
        elif emotion in ['angry', 'fearful']:
            binary_label.append(1)
        else:
            continue

        with soundfile.SoundFile(i) as audio:
            waveform = audio.read(dtype="float32")
            sr = audio.samplerate

            #Below is the code to extract the Mel spectrogram features
            #128 is the standard for machine learning applications using Mel spectrograms
            m_feature = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128, fmax=sr / 2.0).T
            melspectrogram = np.mean(m_feature,axis=0)
            if melspectrogram.shape != (128,):
                melspectrogram = np.zeros(128)

            #Below is the code to extract the chromagram features
            stft_wave = librosa.stft(waveform)
            stft = np.abs(stft_wave)
            c_feature = librosa.feature.chroma_stft(S=stft, sr=sr).T
            chromagram = np.mean(c_feature,axis=0)

            #12 is the number of pitch classes
            if chromagram.shape != (12,):
                chromagram = np.zeros(12)

            features=np.array([])
            features=np.hstack((chromagram, melspectrogram))

            final_features.append(features)
            target_emotions.append(emotion)

            count += 1
            if count % 100 == 0:
                print("Processed Audio File Number: ", count)

    #We return the features and the binary target values.
    return np.array(final_features), np.array(binary_label)

In [10]:
#Please change the path below to the path of the folder saved on your computer.

# *** NOTE: This is an ABSOLUTE PATH, it won't work on your device! *** #
# Either change the tgt dir, or see the (folder) output in: Q5_Audio #
data_path = '/home/sean_the_sheep/Desktop/University/2023_Fall_Term/Intro to Machine Learning/A/2/Q5_Audio'
X, binary_label = load_extract_features(data_path)

Processed Audio File Number:  100
Processed Audio File Number:  200
Processed Audio File Number:  300
Processed Audio File Number:  400
Processed Audio File Number:  500
Processed Audio File Number:  600
Processed Audio File Number:  700


In [11]:
df_1 = pd.DataFrame(X)
df_2 = pd.DataFrame()
df_2['label'] = binary_label
df = pd.merge(df_1, df_2, left_index=True, right_index=True)

In [12]:
from sklearn.model_selection import train_test_split
# Splitting! Let's do a 70-30 split, as usual #
X_train, X_test, y_train, y_test = train_test_split(X, binary_label, test_size=0.3, random_state=42)

In [13]:
class kNN():
  X_data = None
  Y_data = None
  k = None

  def __init__(self, X_train, y_train, k):
    self.X_data = X_train
    self.Y_data = y_train
    self.k = k

  def predict(self, x):
    X_data = self.X_data
    Y_data = self.Y_data
    k = self.k
    result = []
    x_index = np.where(np.all(X_data == x, axis=1))[0]
    if x_index.size > 0:
        X_data = np.delete(X_data, x_index, axis=0)
        Y_data = np.delete(Y_data, x_index)
    distances = np.sqrt(np.sum((X_data - x)**2, axis=1))
    for i in range(k):
        index = np.argmin(distances)
        distances[index] = np.inf
        result.append(y_train[index])
    classify = np.sum(result)/k
    if classify >= 0.5:
      return 1
    return 0

  def score(self, x, y):
    if self.predict(x) == y:
      return 1
    return 0

In [14]:
from sklearn.model_selection import StratifiedKFold

num_partitions = 25

def optimal_k(knn, X_train, y_train):
  kf = StratifiedKFold(n_splits=num_partitions)
  k_range = list(range(1, 101))
  k_scores = []
  
  for k in k_range:
      total_score = 0
      for i, (train_index, test_index) in enumerate(kf.split(X_train, y_train)):
        model = knn(X_train[train_index], y_train[train_index], k)
        for index in test_index:
          test_score = model.score(X_train[index], y_train[index])
          total_score += test_score
      k_scores.append(total_score/num_partitions)
  return k_scores.index(max(k_scores))

In [16]:
k = optimal_k(kNN, X_train, y_train)
k

5

In [17]:
init_time = time.perf_counter()
preds = []
model = kNN(X_train, y_train, k)
for row in X_train:
  preds.append(model.predict(row))

final_time = time.perf_counter()

initial_train_accuracy = (np.sum(np.where(y_train - preds == 0, 1, 0))/len(y_train)) * 100
initial_train_runtime = final_time - init_time
print("Training Accuracy: " + str(initial_train_accuracy))
print("Training Runtime: " + str(initial_train_runtime))

init_time = time.perf_counter()
preds = []
model = kNN(X_train, y_train, k)

for row in X_test:
  preds.append(model.predict(row))

final_time = time.perf_counter()

init_test_accuracy = (np.sum(np.where(y_test - preds == 0, 1, 0))/len(y_test))  * 100
init_test_runtime = final_time - init_time
print("Testing Accuracy: " + str(init_test_accuracy))
print("Testing Runtime: " + str(init_test_runtime))

Training Accuracy: 66.79611650485437
Training Runtime: 0.3693712319991391
Testing Accuracy: 80.54298642533936
Testing Runtime: 0.13180899899998622


In [18]:
b = np.mean(X, axis=0)
cov = np.dot((X - b).T, (X - b))/len(X)

eigenvalues, eigenvectors = np.linalg.eig(cov)

max_abs_idx = np.argmax(np.abs(eigenvectors), axis=0)
signs = np.sign(eigenvectors[max_abs_idx, range(eigenvectors.shape[0])])
eigenvectors = eigenvectors*signs[np.newaxis,:]
eigenvectors = eigenvectors.T

eigenpairs = [(np.abs(eigenvalues[i]), eigenvectors[:, i]) for i in range(len(eigenvalues))]
eigenpairs.sort(key=lambda x: x[0], reverse=True)
sorted_eigenvectors = np.array([x[1] for x in eigenpairs])

In [19]:
def convert_X(X, i):
  W = sorted_eigenvectors[:i, :]
  new_X = np.dot((X - b), W.T)
  return new_X

In [20]:
i = 38
new_X = convert_X(X, i)
new_X.shape

(736, 38)

In [21]:
total_Var = np.sum(eigenvalues)

Var_conserved = np.sum(eigenvalues[:i]) / total_Var

percentage = Var_conserved * 100

print(f"Percentage of Variance Conserved: {percentage}%")

Percentage of Variance Conserved: 99.92185235023499%


In [22]:
from sklearn.model_selection import train_test_split
# Splitting! Let's do a 70-30 split, as usual #
X_train, X_test, y_train, y_test = train_test_split(new_X, binary_label, test_size=0.3, random_state=42)

In [23]:
init_time = time.perf_counter()
preds = []
model = kNN(X_train, y_train, k)

for row in X_train:
  preds.append(model.predict(row))

final_time = time.perf_counter()

final_train_accuracy = (np.sum(np.where(y_train - preds == 0, 1, 0))/len(y_train)) * 100
final_train_runtime = final_time - init_time
print("Training Accuracy: " + str(final_train_accuracy))
print("Training Runtime: " + str(final_train_runtime))

init_time = time.perf_counter()
preds = []
model = kNN(X_train, y_train, k)

for row in X_test:
  preds.append(model.predict(row))

final_time = time.perf_counter()

final_test_accuracy = (np.sum(np.where(y_test - preds == 0, 1, 0))/len(y_test)) * 100
final_test_runtime = final_time - init_time
print("Testing Accuracy: " + str(final_test_accuracy))
print("Testing Runtime: " + str(final_test_runtime))

Training Accuracy: 67.57281553398057
Training Runtime: 0.2297173799997836
Testing Accuracy: 82.35294117647058
Testing Runtime: 0.06583682800010138


In [24]:
print("Train accuracy change: " + str(final_train_accuracy - initial_train_accuracy))
print("Train runtime change: " + str(final_train_runtime - initial_train_runtime))
print("Test accuracy change: " + str(final_test_accuracy - init_test_accuracy))
print("Test runtime change: " + str(final_test_runtime - init_test_runtime))

Train accuracy change: 0.7766990291262061
Train runtime change: -0.13965385199935554
Test accuracy change: 1.8099547511312153
Test runtime change: -0.06597217099988484
