### Question 1
Under this ensemble binary classification technique, error occurs when more than half of the result of the classifiers is wrong.
So we should sums the possibilities of more than $\frac{N-1}{2}$ classifiers produce wrong prediction.
Hence we have:
$$
P_{\text{ensemble}} = \sum_{k= \frac{N+1}{2}}^{N} {N \choose k} \epsilon^k (1-\epsilon)^{N-k}
$$

Where $ {N \choose k}$  representing the number of ways to choose k classifiers from N\
$\epsilon^k$ representing the probability that k classifiers wrong\
$(1-\epsilon)^{N-k}$  representing the probability that the remaining classifiers correct

### Question 2


In [1]:
#pip install librosa

Note: you may need to restart the kernel to use updated packages.


In [2]:
'''
Please install this specific version of resampy for librosa to work without errors.
'''

'\nPlease install this specific version of resampy for librosa to work without errors.\n'

In [3]:
#pip install resampy==0.3.1

Note: you may need to restart the kernel to use updated packages.


In [4]:
import soundfile
import os
import glob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import librosa
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report
import warnings; warnings.filterwarnings('ignore')

In [5]:
emotions ={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

### Data for binary classification


In [6]:
def load_extract_features(data_path):

    '''
    load_extract_features() is a function that is used to load all the audio files one at a time, compute their features and return the features as well as the target values.

    There are around 8-10 audio files which are corrupted. We hardcode zero values for such files in order to maintain consistency.

    ['calm', 'happy'] emotion data is categorized into 'positive' and  ['angry', 'fearful'] into 'negative'

    Returns:
    1. Features
    2. Binary Target Values
    '''
    final_features,target_emotions, binary_label = [],[], []
    count = 0
    
    for i in glob.glob(data_path + "/Actor_*/*.wav"): #Loop to read every file.
        
        name = os.path.basename(i)
        #We split the name of the file to understand the emotion associated with the file.
        split = name.split("-")
        #We know that the third identifier is associated with the emotion of the audio file. Hence, we use [2] as it represents the third identifier.
        emotion = emotions[split[2]]

        #Below is the code to categorize the emotions into two classes to make this a binary problem.
        if emotion in ['calm', 'happy']:
            binary_label.append(0)
        elif emotion in ['angry', 'fearful']:
            binary_label.append(1)
        else:
            continue
        
        with soundfile.SoundFile(i) as audio:
            waveform = audio.read(dtype="float32")
            sr = audio.samplerate
            
            #Below is the code to extract the Mel spectrogram features
            #128 is the standard for machine learning applications using Mel spectrograms
            m_feature = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128, fmax=sr / 2.0).T
            melspectrogram = np.mean(m_feature,axis=0)
            if melspectrogram.shape != (128,):
                melspectrogram = np.zeros(128)
            
            #Below is the code to extract the chromagram features
            stft_wave = librosa.stft(waveform)
            stft = np.abs(stft_wave)
            c_feature = librosa.feature.chroma_stft(S=stft, sr=sr).T
            chromagram = np.mean(c_feature,axis=0)
            
            #12 is the number of pitch classes
            if chromagram.shape != (12,):
                chromagram = np.zeros(12)
                
            features=np.array([])
            features=np.hstack((chromagram, melspectrogram))
        
            final_features.append(features)
            target_emotions.append(emotion)
            
            count += 1
            if count % 100 == 0:
                print("Processed Audio File Number: ", count)
    
    #We return the features and the binary target values.
    return np.array(final_features), np.array(binary_label)

In [36]:
#Please change the path below to the path of the folder saved on your computer.
data_path = './Audio_Speech_Actors_01-24'
X, binary_label = load_extract_features(data_path)

Processed Audio File Number:  100
Processed Audio File Number:  200
Processed Audio File Number:  300
Processed Audio File Number:  400
Processed Audio File Number:  500
Processed Audio File Number:  600
Processed Audio File Number:  700


In [37]:
X.shape

(768, 140)

In [38]:
binary_label.shape

(768,)

# Training

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, binary_label, test_size=0.33, random_state=42)

In [41]:
X_train.shape

(514, 140)

In [42]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [43]:
from collections import Counter
def knn_classifier(X_train, y_train, X_test, k):
    predictions = []

    for test_point in X_test:
        # Compute distances between the test point and all training points
        distances = [euclidean_distance(test_point, train_point) for train_point in X_train]

        # Sort by distance and return the indices of k closest neighbors
        k_indices = np.argsort(distances)[:k]

        # Extract the labels of the k nearest neighbors
        k_nearest_labels = [y_train[i] for i in k_indices]

        # Majority vote: most common class label among the k-nearest neighbors
        most_common = Counter(k_nearest_labels).most_common(1)
        predictions.append(most_common[0][0])

    return np.array(predictions)


In [45]:
def calculate_accuracy(y_true, y_pred):
    correct = np.sum(y_true == y_pred)
    total = len(y_true)
    accuracy = correct / total
    return accuracy


In [44]:
# kNN prediction
k = 3
y_pred = knn_classifier(X_train, y_train, X_test, k)
print("Predicted labels:", y_pred)


Predicted labels: [1 0 0 1 0 1 1 0 1 1 0 0 0 0 1 1 0 0 0 0 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 1 1 0 1 0 1 1 0 0 0 1 1 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 1 1
 1 0 0 0 1 1 1 1 0 0 1 0 1 0 1 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0
 1 1 1 1 0 0 1 0 1 1 0 0 1 0 1 0 1 0 1 1 0 1 0 1 1 0 0 0 0 0 1 1 0 0 1 0 0
 1 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 1 1 0 0 1 0
 0 1 0 1 0 1 0 0 0 1 0 1 1 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1]


In [46]:
accuracy = calculate_accuracy(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7283464566929134


# Cross Validation

In [49]:
from sklearn.model_selection import KFold

def cross_validation(X, y, k, num_folds):
    kf = KFold(n_splits=num_folds)
    accuracies = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Assuming knn_classifier is your kNN classifier function
        y_pred = knn_classifier(X_train, y_train, X_test, k)

        # Calculate accuracy
        accuracy = calculate_accuracy(y_test, y_pred)
        accuracies.append(accuracy)

    # Average accuracy across all folds
    average_accuracy = sum(accuracies) / len(accuracies)
    return average_accuracy

In [54]:
score=[]
for i in range(3,30):
    k=i
    average_accuracy = cross_validation(X_train, y_train, k, num_folds=6)
    score.append(average_accuracy)

In [62]:
k_values = list(range(3, 30))

In [65]:
max_index = score.index(max(score))

In [67]:
k_values[max_index]

4

# Using K=4 for test data

In [69]:
# kNN prediction
import time

# Start time
start_time = time.time()

k = 4
y_pred = knn_classifier(X_train, y_train, X_test, k)
accuracy = calculate_accuracy(y_test, y_pred)
print("Accuracy:", accuracy)

# End time
end_time = time.time()
# Calculate elapsed time
elapsed_time = end_time - start_time
print("Elapsed time:", elapsed_time, "seconds")

Accuracy: 0.7283464566929134
Elapsed time: 1.4515068531036377 seconds
