# Audio Classification

## Todo:
- [Gunshot Audio Classification using Deep Learning Python and Keras](https://www.youtube.com/watch?v=DPXEh9iodYQ&list=LL&index=3&t=991s)

In [4]:
import numpy as np

import wandb
from wandb.keras import WandbCallback

from pathlib import Path

## Data Loading & Preprocessing

In [6]:
import librosa
from tqdm import tqdm

from keras.utils import to_categorical

DATA_PATH = Path("__file__").resolve().parents[2] / "data" / "ml-class" / "cnn-audio"
INPUT_PATH = DATA_PATH / "raw"
INTERMEDIATE_PATH = DATA_PATH / "intermediate"

In [7]:
def get_labels(data_path):
    labels = [f.name for f in data_path.iterdir() if not f.name.startswith('.')]

    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)


In [8]:
def wav2mfcc(file_path, n_mfcc=20, max_len=11):
    # convert file to wav2mfcc
    # Mel-frequency cepstral coefficients
    wave, _ = librosa.load(file_path, mono=True, sr=None)
    
    # convert wave to a array which is laid out in Fortran order in memory
    wave = np.asfortranarray(wave[::3])

    mfcc = librosa.feature.mfcc(y=wave, sr=16000, n_mfcc=n_mfcc)

    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Else cutoff the remaining parts
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc

In [9]:
def save_data_to_array(path=INPUT_PATH, max_len=11, n_mfcc=20):
    labels, _, _ = get_labels(path)

    for label in labels:
        # Init mfcc vectors
        mfcc_vectors = []
        wavfiles = [wavfile.as_posix() for wavfile in (INPUT_PATH / label).glob("*.wav")]

        for wavfile in tqdm(wavfiles, f"Saving vectors of label - '{label}'\t"):
            mfcc = wav2mfcc(wavfile, max_len=max_len, n_mfcc=n_mfcc)
            mfcc_vectors.append(mfcc)

        np.save(INTERMEDIATE_PATH / (label + '.npy'), mfcc_vectors)
    return labels

In [30]:
wandb.init(
    # set the wandb project where this run will be logged
    project="audio-classification",
)
config = wandb.config
config.epochs = 50
config.batch_size = 32

config.max_len = 11
config.buckets = 20



In [10]:


# Save data to array file first: bucket of frequency & time
labels = save_data_to_array(max_len=config.max_len, n_mfcc=config.buckets)

Saving vectors of label - 'cat'	: 100%|██████████| 1733/1733 [00:06<00:00, 273.10it/s]
Saving vectors of label - 'bed'	: 100%|██████████| 1713/1713 [00:02<00:00, 630.48it/s]
Saving vectors of label - 'happy'	: 100%|██████████| 1742/1742 [00:02<00:00, 634.73it/s]


## Train Test Split

In [11]:
from sklearn.model_selection import train_test_split

def get_train_test(data_path: Path, labels, split_ratio=0.6, random_state=42):
    # Get available labels

    # Getting first arrays
    X = np.load(data_path / (labels[0] + '.npy'))
    y = np.zeros(X.shape[0]) # 0 -> cat 

    # Append all of the dataset into one single array, same goes for y
    for i, label in enumerate(labels[1:]):
        x = np.load(data_path / (label + '.npy'))
        X = np.vstack((X, x))
        y = np.append(y, np.full(x.shape[0], fill_value= (i + 1))) # 1 -> bed, 2 -> happy

    assert X.shape[0] == len(y)

    return train_test_split(X, y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)


In [12]:
# # Loading train set and test set
X_train, X_test, y_train, y_test = get_train_test(INTERMEDIATE_PATH, labels)

In [13]:
X_train.shape

(3112, 20, 11)

In [42]:
channels = 1
# reshape X to add channel as last (20, 11, 1)
X_train = X_train.reshape(X_train.shape[0], config.buckets, config.max_len, channels)
X_test = X_test.reshape(X_test.shape[0], config.buckets, config.max_len, channels)

In [14]:
y_train_hot = to_categorical(y_train) # one-hot version
y_test_hot = to_categorical(y_test)   # one-hot version

## Model Training

In [46]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPool2D

In [28]:
num_classes = 3


### Basic ANN

In [43]:
model = Sequential()
model.add(Flatten(input_shape=(config.buckets, config.max_len, channels))) # flatten into signal vector
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=['accuracy'])

### CNN

In [51]:
model = Sequential()
model.add(Conv2D(
            32, (3,3), input_shape=(config.buckets, config.max_len, channels), 
            activation='relu'
        ))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten()) # flatten into signal vector
model.add(Dropout(0.25))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=['accuracy'])

In [52]:
wandb.init()
model.fit(X_train, 
          y_train_hot, 
          epochs=config.epochs, 
          validation_data=(X_test, y_test_hot), 
          callbacks=[WandbCallback()])
wandb.finish()

Epoch 1/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 2/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 3/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 4/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 5/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 6/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 7/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 8/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 13/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 14/50
Epoch 15/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 16/50


  saving_api.save_model(
INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 17/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 32/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/codexplore/Developer/repos/audio/projects/speech_classification/wandb/run-20231123_181119-a3g1i6za/files/model-best)... Done. 0.0s


Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




0,1
accuracy,▁▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▄▅▆▇▇▅▆▇▇▆▇█▇▇█▇▇▇▆█▆▇██▇▇███▄████▇███
val_loss,█▅▄▃▂▂▂▂▂▂▁▂▁▁▁▁▁▂▁▁▂▁▂▁▁▁▁▁▁▁▁▄▁▁▁▁▁▁▁▁

0,1
accuracy,0.93445
best_epoch,44.0
best_val_loss,0.26438
epoch,49.0
loss,0.19619
val_accuracy,0.91137
val_loss,0.27308
