# Audio Classification

In [1]:
import numpy as np

import wandb

from pathlib import Path

In [2]:
wandb.init(
    # set the wandb project where this run will be logged
    project="audio-classification",
)
config = wandb.config

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcodexplore[0m ([33mcodexplore-ai[0m). Use [1m`wandb login --relogin`[0m to force relogin


## Data Loading & Preprocessing

In [5]:
import librosa
from tqdm import tqdm

from keras.utils import to_categorical

DATA_PATH = Path("__file__").resolve().parents[2] / "data" / "ml-class" / "cnn-audio"
INPUT_PATH = DATA_PATH / "raw"
INTERMEDIATE_PATH = DATA_PATH / "intermediate"

In [6]:
def get_labels(data_path):
    labels = [f.name for f in data_path.iterdir() if not f.name.startswith('.')]

    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)


In [7]:
def wav2mfcc(file_path, n_mfcc=20, max_len=11):
    # convert file to wav2mfcc
    # Mel-frequency cepstral coefficients
    wave, _ = librosa.load(file_path, mono=True, sr=None)
    
    # convert wave to a array which is laid out in Fortran order in memory
    wave = np.asfortranarray(wave[::3])

    mfcc = librosa.feature.mfcc(y=wave, sr=16000, n_mfcc=n_mfcc)

    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Else cutoff the remaining parts
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc

In [8]:
def save_data_to_array(path=INPUT_PATH, max_len=11, n_mfcc=20):
    labels, _, _ = get_labels(path)

    for label in labels:
        # Init mfcc vectors
        mfcc_vectors = []
        wavfiles = [wavfile.as_posix() for wavfile in (INPUT_PATH / label).glob("*.wav")]

        for wavfile in tqdm(wavfiles, f"Saving vectors of label - '{label}'\t"):
            mfcc = wav2mfcc(wavfile, max_len=max_len, n_mfcc=n_mfcc)
            mfcc_vectors.append(mfcc)

        np.save(INTERMEDIATE_PATH / (label + '.npy'), mfcc_vectors)
    return labels

In [9]:
config.max_len = 11
config.buckets = 20

# Save data to array file first
labels = save_data_to_array(max_len=config.max_len, n_mfcc=config.buckets)

Saving vectors of label - 'cat'	: 100%|██████████| 1733/1733 [00:03<00:00, 472.87it/s]
Saving vectors of label - 'bed'	: 100%|██████████| 1713/1713 [00:02<00:00, 630.67it/s]
Saving vectors of label - 'happy'	: 100%|██████████| 1742/1742 [00:02<00:00, 626.62it/s]

['cat', 'bed', 'happy']





## Train Test Split

In [None]:
from sklearn.model_selection import train_test_split

def get_train_test(data_path: Path, labels, split_ratio=0.6, random_state=42):
    # Get available labels

    # Getting first arrays
    X = np.load(data_path / (labels[0] + '.npy'))
    y = np.zeros(X.shape[0])

    # Append all of the dataset into one single array, same goes for y
    for i, label in enumerate(labels[1:]):
        x = np.load(label + '.npy')
        X = np.vstack((X, x))
        y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

    assert X.shape[0] == len(y)

    return train_test_split(X, y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)
