# Libraries and dependencies

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
# from pathlib import Path
# import scipy                # scipy.signal.spectrogram is the squared mag of stft of a signal 
import os
import librosa
import numpy as np
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pickle

# Convenience functions

Maybe I should normalise the data, but cba atm.

In [3]:
base_dir = "./AudioWAV"

def audio_to_flattened_spectrogram(file_path, max_pad_len):
    y, sr = librosa.load(file_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_DB = librosa.amplitude_to_db(S, ref=np.max)
    padded_S_DB = np.pad(S_DB, ((0, 0), (0, max_pad_len - S_DB.shape[1])), mode='constant') if S_DB.shape[1] < max_pad_len else S_DB[:, :max_pad_len]
    return padded_S_DB.flatten()

# Function to determine the max length for padding/truncating by analyzing audio data
def determine_max_length(base_dir):
    max_len = 0
    for filename in os.listdir(base_dir):
        if filename.endswith(".wav"):
            file_path = os.path.join(base_dir, filename)
            y, sr = librosa.load(file_path)
            S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
            if S.shape[1] > max_len:
                max_len = S.shape[1]
    return max_len

def load_data(base_dir, max_pad_len):
    X = []
    y = []
    for filename in os.listdir(base_dir):
        if filename.endswith(".wav"):
            file_path = os.path.join(base_dir, filename)
            if "IN" in filename:
                label = 0  # Category "IN"
            elif "NOT" in filename:
                label = 1  # Category "NOT"
            spectrogram = audio_to_flattened_spectrogram(file_path, max_pad_len)
            X.append(spectrogram)
            y.append(label)
    return np.array(X), np.array(y)

# maximum spectrogram length
max_length = determine_max_length(base_dir)
X, y = load_data(base_dir, max_length)

# 60/20/20 split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42) 
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

## Logistic Regressor

In [4]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


def print_score(clf, X_train, y_train, X_test, y_test, train=True):
    if train:
        pred = clf.predict(X_train)
        clf_report = pd.DataFrame(classification_report(y_train, pred, output_dict=True))
        print("Train Result:\n================================================")
        print(f"Accuracy Score: {accuracy_score(y_train, pred) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_train, pred)}\n")
        
    elif train==False:
        pred = clf.predict(X_test)
        clf_report = pd.DataFrame(classification_report(y_test, pred, output_dict=True))
        print("Test Result:\n================================================")        
        print(f"Accuracy Score: {accuracy_score(y_test, pred) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_test, pred)}\n")

In [5]:
from sklearn.linear_model import LogisticRegression

lr_clf = LogisticRegression(solver='liblinear')
lr_clf.fit(X_train, y_train)

# save model. I cannot currently think of another way to check sizes of classical ml models
filename = 'lr_model.sav'
pickle.dump(lr_clf, open(filename, 'wb'))

print_score(lr_clf, X_train, y_train, X_test, y_test, train=True)
print_score(lr_clf, X_train, y_train, X_test, y_test, train=False)

Train Result:
Accuracy Score: 100.00%
_______________________________________________
CLASSIFICATION REPORT:
                0       1  accuracy  macro avg  weighted avg
precision     1.0     1.0       1.0        1.0           1.0
recall        1.0     1.0       1.0        1.0           1.0
f1-score      1.0     1.0       1.0        1.0           1.0
support    1502.0  2963.0       1.0     4465.0        4465.0
_______________________________________________
Confusion Matrix: 
 [[1502    0]
 [   0 2963]]

Test Result:
Accuracy Score: 61.65%
_______________________________________________
CLASSIFICATION REPORT:
                    0           1  accuracy    macro avg  weighted avg
precision    0.468401    0.700315  0.616521     0.584358      0.616677
recall       0.469274    0.699580  0.616521     0.584427      0.616521
f1-score     0.468837    0.699947  0.616521     0.584392      0.616599
support    537.000000  952.000000  0.616521  1489.000000   1489.000000
____________________________

## Tree

In [6]:
from sklearn import tree

tree_clf = tree.DecisionTreeClassifier()
tree_clf.fit(X_train, y_train)

filename = 'tree_model.sav'
pickle.dump(tree_clf, open(filename, 'wb'))

print_score(tree_clf, X_train, y_train, X_test, y_test, train=True)
print_score(tree_clf, X_train, y_train, X_test, y_test, train=False)

Train Result:
Accuracy Score: 100.00%
_______________________________________________
CLASSIFICATION REPORT:
                0       1  accuracy  macro avg  weighted avg
precision     1.0     1.0       1.0        1.0           1.0
recall        1.0     1.0       1.0        1.0           1.0
f1-score      1.0     1.0       1.0        1.0           1.0
support    1502.0  2963.0       1.0     4465.0        4465.0
_______________________________________________
Confusion Matrix: 
 [[1502    0]
 [   0 2963]]

Test Result:
Accuracy Score: 61.45%
_______________________________________________
CLASSIFICATION REPORT:
                    0           1  accuracy    macro avg  weighted avg
precision    0.463511    0.692464  0.614506     0.577988      0.609893
recall       0.437616    0.714286  0.614506     0.575951      0.614506
f1-score     0.450192    0.703206  0.614506     0.576699      0.611958
support    537.000000  952.000000  0.614506  1489.000000   1489.000000
____________________________

Try to plot decision tree:

In [8]:
# plt.figure(figsize=(12, 8))
# tree.plot_tree(tree_clf, feature_names=list(X.columns), class_names=['No', 'Yes'], filled=True, rounded=True)
# plt.savefig("decision_tree.png")
# plt.show()

## SVM

This is an arbitrary model, and serves to show the size of an SVM model.

In [9]:
from sklearn.svm import SVC

svm_clf  = SVC( C=1.0,              # regularization. This may require tuning
               kernel='sigmoid',    # radial or gaussian kernel. Can try differnt ones esp radial
               random_state=42,
                gamma=0.1,
                degree=2,           # irrelevant except for poly
                class_weight = "balanced")

svm_clf.fit(X_train, y_train)       # should I normalise or nah

filename = 'svm_model.sav'
pickle.dump(svm_clf, open(filename, 'wb'))

print_score(svm_clf, X_train, y_train, X_test, y_test, train=True)
print_score(svm_clf, X_train, y_train, X_test, y_test, train=False)

Train Result:
Accuracy Score: 66.38%
_______________________________________________
CLASSIFICATION REPORT:
                     0            1  accuracy    macro avg  weighted avg
precision     1.000000     0.663754   0.66383     0.831877      0.776866
recall        0.000666     1.000000   0.66383     0.500333      0.663830
f1-score      0.001331     0.797900   0.66383     0.399615      0.529938
support    1502.000000  2963.000000   0.66383  4465.000000   4465.000000
_______________________________________________
Confusion Matrix: 
 [[   1 1501]
 [   0 2963]]

Test Result:
Accuracy Score: 63.94%
_______________________________________________
CLASSIFICATION REPORT:
               0           1  accuracy    macro avg  weighted avg
precision    0.0    0.639355  0.639355     0.319678      0.408775
recall       0.0    1.000000  0.639355     0.500000      0.639355
f1-score     0.0    0.780008  0.639355     0.390004      0.498702
support    537.0  952.000000  0.639355  1489.000000   1489.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## On model sizes

| Model Type          | Size   |
|---------------------|--------|
| Logistic Regression | 217 KB |
| Tree                |  65 KB |
| SVM                 | 964,529 KB |

As our NICLA voice only has 512KB flash, the SVM is not a viable model. The accuracy of the other two models are too low to be the optimal models, but could be used in the worst case.