## Objective of the Notebook
(NoobAtem) Were going to create a experimental model before integrating into the model.py file. This would only take a sample of our dataset and our objective is to make sure we have the right configuration for each model that we will use.
- YAMNnet

### Prerequesite Library

In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical
import numpy as np
import librosa
import itertools
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import matplotlib
import csv
import scipy
import joblib
from scipy.io import wavfile
from IPython.display import Audio
import warnings
warnings.filterwarnings("ignore")
matplotlib.style.use("dark_background")

2024-07-29 02:30:34.270145: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-29 02:30:34.272683: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-29 02:30:34.281452: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-29 02:30:34.296616: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-29 02:30:34.300950: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-29 02:30:34.311469: I tensorflow/core/platform/cpu_feature_gu

### Project Structure

In [10]:
# The main parent folder for handling in all sorts of data
DATA_P: str = "../data"
SRC_P: str = "../src"

INTERIM_P: str = os.path.join(DATA_P, "interim") # Dataset that is being process and its under commission

RAW_P: str = os.path.join(DATA_P, "raw") # Data that we've formatted and designed/collected
AUDIO_INTERIM_P: str = os.path.join(INTERIM_P, "audio")
IMAGE_INTERIM_P: str = os.path.join(INTERIM_P, "image")

DOOR_RAW_P: str = os.path.join(RAW_P, "door")
GLASS_RAW_P: str = os.path.join(RAW_P, "glass")
FEATURE_INTERIM_P: str = os.path.join(INTERIM_P, "feature.csv")
TARGET_INTERIM_P: str = os.path.join(INTERIM_P, "target.csv")

WEIGHTS_P: str = os.path.join(SRC_P, "weights")

### Initialize Model

One interesting pre-trained model is the YAMNet. this model is used to classify sounds in a large quantity of labels. It has an inbuilt preprocess spectogram meaning it requires you to pass an audio than the spectogram. The code below is the mention step by step guide that tensorflow documentation layed out

In [3]:
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

Next, will extract the features using YAMNet from the audio

In [4]:
audio_files: list = os.listdir(AUDIO_INTERIM_P)
features: list = []

for p in audio_files:
    audio_data, sr = librosa.load(os.path.join(AUDIO_INTERIM_P, p), sr=16000)
    audio_data.astype(np.float32)
    _, embeddings, _ = yamnet_model(audio_data)
    features.append(embeddings.numpy())

In [5]:
def extract_embeddings(yamnet_model: object, filepath: str) -> np.array:
    audio_data, sr = librosa.load(filepath, sr=16000)
    audio_data.astype(np.float32)
    _, embeddings, _ = yamnet_model(audio_data)
    return embeddings.numpy()

Were now going take a sample of our dataset and split into train and test sets

In [14]:
def data_prepare(doorpath: str, glasspath: str, yamnet_model: object) -> tuple:
    X, y = [], []
    for label in os.listdir(doorpath):
        if label == ".ipynb_checkpoints":
            continue
        _label: str = label.split("-")[0]
        filepath: str = os.path.join(doorpath, label)
        embeddings: np.array = extract_embeddings(yamnet_model, filepath)
        X.append(embeddings)
        y.append(_label)
        
    for label in os.listdir(glasspath):
        if label == ".ipynb_checkpoints":
            continue
        _label: str = label.split("-")[0]
        filepath: str = os.path.join(glasspath, label)
        embeddings: np.array = extract_embeddings(yamnet_model, filepath)
        X.append(embeddings)
        y.append(_label)
        
    return np.array(X), np.array(y)

Were are using the embeddings from the YAMNet then change the following label to numerical

In [15]:
X, y = data_prepare(DOOR_RAW_P, GLASS_RAW_P, yamnet_model)
X: np.array = np.squeeze(X)
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)

In [16]:
# Our labels
class_names: list = list(set(y))

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

Lets defined model to fit our requirements

In [18]:
model = Sequential([
    Flatten(input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(128, activation='relu'),
    Dense(len(le.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Training with Validation

With *YAMNet* pretrained model

In [19]:
score = model.fit(X_train, y_train, epochs=40, verbose=0, validation_data=(X_test, y_test))

### Evaluate and Predict

Lets check on the performance of the model

In [20]:
loss, accuracy = model.evaluate(X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 1.0000 - loss: 0.0157


Try to use the model

In [32]:
sample_audio_wav: str = os.path.join(GLASS_RAW_P, os.listdir(GLASS_RAW_P)[0])
sample_label: str = sample_audio_wav.split("/")[-1].split("-")[0]
sample_extract_emb: np.array = extract_embeddings(yamnet_model, sample_audio_wav)
target_shape: tuple = (11, sample_extract_emb.shape[1])

In [25]:
# Pad or truncate embeddings to the required shape
def pad_or_truncate(embeddings, target_shape):
    current_shape = embeddings.shape
    if current_shape[0] > target_shape[0]:
        return embeddings[:target_shape[0], :]
    elif current_shape[0] < target_shape[0]:
        padding = np.zeros((target_shape[0] - current_shape[0], target_shape[1]))
        return np.vstack((embeddings, padding))
    else:
        return embeddings

In [33]:
#sample_extract_emb = pad_or_truncate(sample_extract_emb, target_shape)
sample_extract_emb = np.expand_dims(sample_extract_emb, axis=0)
predictions: np.array = model.predict(sample_extract_emb)
predicted_class: int = np.argmax(predictions, axis=-1)
predicted_label: str = le.inverse_transform(predicted_class)[0]
predicted_label

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


'glass'

For convenience, I'm converting this into a function

### Save the model

There is formats to naming the model that we should follow, this here would be the guide: mm-dd-yy-dataset_used-total_epochs.h5

In [34]:
# This is for the the name dataset used, you will apply xor that will act as a decoder
ESC50_DATA: int = 1
RAW_DOOR_1_DATA: int = 2
RAW_GLASS_2_DATA: int = 4

In [38]:
now: str = datetime.now().strftime("%m-%d-%Y")
filename: str = now + "-" + str(ESC50_DATA | RAW_DOOR_1_DATA) + "-40"
model.save(f'{os.path.join(WEIGHTS_P, filename)}.keras')
print(f'{os.path.join(WEIGHTS_P, filename)}.keras')

../src/weights/07-29-2024-3-40.keras


In [37]:
joblib.dump(le, os.path.join("../src", "weights", 'label_encoder.joblib'))

['../src/weights/label_encoder.joblib']