**MFCC feature extraction**

In [None]:
import librosa
import numpy as np

def extract_mfcc(file_path, n_mfcc=13, n_fft=2048, hop_length=512):
    audio, sr = librosa.load(file_path)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)

    return mfccs.T


In [None]:
fear_dir = '/content/speech/fear'
non_fear_dir = '/content/speech/no_fear'

In [None]:
print(len(data))
print(len(labels))

834
834


In [None]:
import os
import numpy as np

data = []
labels = []
for file in os.listdir(fear_dir):
  if file.endswith('.mp4'):
    mfcc = extract_mfcc(os.path.join(fear_dir, file))
    data.append(mfcc)
    labels.append('fear')

for file in os.listdir(non_fear_dir):
  if file.endswith('.mp4'):
    mfcc = extract_mfcc(os.path.join(non_fear_dir, file))
    data.append(mfcc)
    labels.append('non-fear')

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.5, random_state=42)
svm_classifier = SVC(kernel='linear', random_state=42)

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_length = 100
x_train_padded = pad_sequences(x_train, dtype='float32', padding='post', maxlen=max_length)
x_test_padded = pad_sequences(x_test, dtype='float32', padding='post', maxlen=max_length)
x_train_flattened = x_train_padded.reshape(x_train_padded.shape[0], -1)
x_test_flattened = x_test_padded.reshape(x_test_padded.shape[0], -1)

svm_classifier.fit(x_train_flattened, y_train)

In [None]:
print(len(x_train))
print(len(x_test))
print(len(y_test))

417
417
417


In [None]:
print(x_train_padded.shape[1])
print(x_test_padded.shape[1])


100
100


In [None]:
desired_shape = (x_train_padded.shape[0], max_length, 13)
x_train_padded = x_train_padded.reshape(desired_shape)
x_test_padded = x_test_padded.reshape(desired_shape)

**Accuracy MFCC + SVM**

>  0.8273381294964028



In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = svm_classifier.predict(x_test_flattened)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8273381294964028


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        fear       0.61      0.70      0.65        97
    non-fear       0.91      0.87      0.88       320

    accuracy                           0.83       417
   macro avg       0.76      0.78      0.77       417
weighted avg       0.84      0.83      0.83       417



**F0 feature extraction**

In [None]:
import matplotlib.pyplot as plt

def extract_fundamental_frequency(file_path, plot=True):
    y, sr = librosa.load(file_path)

    # fundamental frequency using Harmonic Product Spectrum (HPS)
    def calculate_hps(y, sr, N=4):
        y_harmonic, y_percussive = librosa.effects.hpss(y)
        y_harmonic = librosa.resample(y_harmonic, orig_sr=sr, target_sr=sr * N)
        S_harmonic = np.abs(librosa.stft(y_harmonic))
        S_harmonic = np.power(S_harmonic, N)
        S_percussive = np.abs(librosa.stft(y_percussive))
        min_len = min(S_harmonic.shape[1], S_percussive.shape[1])
        S_harmonic = S_harmonic[:, :min_len]
        S_percussive = S_percussive[:, :min_len]

        S = S_harmonic / (S_percussive + 1e-10)
        return S

    hps = calculate_hps(y, sr)
    # Find the index of the maximum value in HPS
    f0_index = np.argmax(hps)
    # Calculate the fundamental frequency in Hz
    f0 = sr / f0_index

    print(f"Fundamental Frequency (F0): {f0:.2f} Hz")

    return f0

In [None]:
data_f0 = []
labels_f0 = []

for file in os.listdir(fear_dir):
  if file.endswith('.mp4'):
    f0 = extract_fundamental_frequency(os.path.join(fear_dir, file))
    data_f0.append(f0)
    labels_f0.append('fear')

for file in os.listdir(non_fear_dir):
  if file.endswith('.mp4'):
    f0 = extract_fundamental_frequency(os.path.join(non_fear_dir, file))
    data_f0.append(f0)
    labels_f0.append('non-fear')

In [None]:
print(len(data_f0))
print(len(labels_f0))

834
834


**Accuracy F0 + SVM Cassifier**

> 0.7673860911270983


In [None]:
data_f0_np = np.array(data_f0)
data_f0_np = np.nan_to_num(data_f0_np, nan=0.0, posinf=0.0, neginf=0.0)
data_f0_re = data_f0_np.reshape(-1,1)
x_trainf, x_testf, y_trainf, y_testf = train_test_split(data_f0_re, labels_f0, test_size=0.5, random_state=42)
svm_classifier.fit(x_trainf, y_trainf)

In [None]:
y_predf = svm_classifier.predict(x_testf)

accuracy = accuracy_score(y_testf, y_predf)
print(f"Accuracy: {accuracy}")
print(classification_report(y_testf, y_predf))

Accuracy: 0.7673860911270983
              precision    recall  f1-score   support

        fear       0.00      0.00      0.00        97
    non-fear       0.77      1.00      0.87       320

    accuracy                           0.77       417
   macro avg       0.38      0.50      0.43       417
weighted avg       0.59      0.77      0.67       417



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**MFCC & F0 feature extraction**

In [None]:
data_f0_mfcc = []
labels_f0_mfcc = []

for file in os.listdir(fear_dir):
  if file.endswith('.mp4'):
    f0 = extract_fundamental_frequency(os.path.join(fear_dir, file))
    mfcc = extract_mfcc(os.path.join(fear_dir, file))
    min_frames = mfcc.shape[0]
    f0_feature = np.full_like(mfcc[:, 0], f0)
    feature_vector = np.hstack((mfcc, f0_feature.reshape(-1, 1)))
    data_f0_mfcc.append(feature_vector)
    labels_f0_mfcc.append('fear')

for file in os.listdir(non_fear_dir):
  if file.endswith('.mp4'):
    f0 = extract_fundamental_frequency(os.path.join(non_fear_dir, file))
    mfcc = extract_mfcc(os.path.join(non_fear_dir, file))
    f0_feature = np.full_like(mfcc[:, 0], f0)
    feature_vector = np.hstack((mfcc, f0_feature.reshape(-1, 1)))
    data_f0_mfcc.append(feature_vector)
    labels_f0_mfcc.append('non-fear')

In [None]:
print("MFCC Feature Shape:", mfcc.shape)
print("F0 Feature Shape:", f0.shape)

MFCC Feature Shape: (148, 13)
F0 Feature Shape: ()


In [None]:
data_f0_mfcc = np.array(data_f0_mfcc)
labels_f0_mfcc = np.array(labels_f0_mfcc)
print(data_f0_mfcc.dtype)
print(labels_f0_mfcc.dtype)

object
<U8


In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
labels_f0_mfcc_encoded = label_encoder.fit_transform(labels_f0_mfcc)

In [None]:
max_length = max(len(sample) for sample in data_f0_mfcc)
data_f0_mfcc_padded = pad_sequences(data_f0_mfcc, maxlen=max_length, padding='post', dtype='float32')

# Flatten nested sequences in data_f0_mfcc
data_f0_mfcc_flattened = np.array([np.ravel(sample) for sample in data_f0_mfcc_padded])


In [None]:
data_f0_mfcc_flattened = np.nan_to_num(data_f0_mfcc_flattened, nan=0.0, posinf=0.0, neginf=0.0)

**Accuracy MFCC + F0 features and SVM Classifier**


> 0.7961630695443646


In [None]:
x_train2, x_test2, y_train2, y_test2 = train_test_split(data_f0_mfcc_flattened, labels_f0_mfcc_encoded, test_size=0.5, random_state=42)
svm_classifier.fit(x_train2, y_train2)

In [None]:
y_pred2 = svm_classifier.predict(x_test2)

accuracy = accuracy_score(y_test2, y_pred2)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test2, y_pred2))

Accuracy: 0.7961630695443646
              precision    recall  f1-score   support

           0       0.56      0.56      0.56        97
           1       0.87      0.87      0.87       320

    accuracy                           0.80       417
   macro avg       0.71      0.71      0.71       417
weighted avg       0.80      0.80      0.80       417



**Accuracy MFCC + F0 features and MLP Classifier**


> 0.8009592326139089


In [None]:
mlp_classifier.fit(x_train2, y_train2)

In [None]:
y_pred2 = mlp_classifier.predict(x_test2)

accuracy = accuracy_score(y_test2, y_pred2)
classification_rep = classification_report(y_test2, y_pred2)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_rep)

Accuracy: 0.8009592326139089
Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.43      0.50        97
           1       0.84      0.91      0.88       320

    accuracy                           0.80       417
   macro avg       0.72      0.67      0.69       417
weighted avg       0.79      0.80      0.79       417



**Accuracy MFCC + MLP Classifier**

> 0.8345323741007195



In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# X_train, X_test, Y_train, Y_test = train_test_split(data, labels, test_size=0.2, random_state=42)
# scaler = StandardScaler()
# X_train = scaler.fit_transform(x_train_flattened)
# X_test = scaler.transform(x_test_flattened)

mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
mlp_classifier.fit(x_train_flattened, y_train)

In [None]:
y_pred = mlp_classifier.predict(x_test_flattened)

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_rep)

Accuracy: 0.8345323741007195
Classification Report:
              precision    recall  f1-score   support

        fear       0.63      0.69      0.66        97
    non-fear       0.90      0.88      0.89       320

    accuracy                           0.83       417
   macro avg       0.77      0.78      0.78       417
weighted avg       0.84      0.83      0.84       417

