### Initial Setup

In [None]:
# Importing other packages
import numpy as np
import librosa
import io
import os
import soundfile as sf
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

### Wrangling Data

In [None]:
def processSample(path, sample): 
    # Reading audio files
    audio = sf.read(os.path.join(path, sample))[0]

    # Padding audio to 1 second if slightly shorter
    if len(audio) < 16000:
        audio = np.concatenate((audio, np.zeros(16000 - len(audio),)), axis=0)
    # Cutting audio to 1 second if slightly longer
    elif len(audio) > 16000:
        audio = audio[:16000]

    # Applying Fourier transformation
    return librosa.amplitude_to_db(abs(librosa.stft(audio, hop_length=321)))

In [None]:
# Path to train data
mainPath = 'data/train/audio/'

numpyAudio = []
labels = []

# Looping over directory
for word in os.listdir(mainPath):
    specPath = os.path.join(mainPath, word)
    
    # Looping over samples
    for sample in tqdm(os.listdir(specPath), desc=f"Processing Word: {word}"):
        # Applying processing function
        transformed = processSample(specPath, sample)
        numpyAudio.append(transformed)
        labels.append(word)

try:
    numpyAudio = np.array(numpyAudio)
    _, labels = np.unique(labels, return_inverse=True)
    
except:
    pass

### Getting Data Model-Ready

In [None]:
# Creating numpy arrays
# Skipping last 5 samples because they are not part of what we want to train on
X = np.array(numpyAudio[:-5])
y = np.array(labels[:-5])

In [None]:
# Saving processed data
np.save('X', X)
np.save('y', y)

In [None]:
# Reshaping X for model
X = X.reshape(-1, 1025, 50, 1)

In [None]:
# Encoding y values
_, y = np.unique(y, return_inverse=True)

In [None]:
# Shuffling X and y in unison
from sklearn.utils import shuffle
X_Shuffled, y_Shuffled = shuffle(X, y, random_state = 0)

In [None]:
# Saving shuffled data
np.save('X_Shuffled', X_Shuffled)
np.save('y_Shuffled', y_Shuffled)

In [None]:
# Only execute when already processed and saved data before
# Loading Regular Data
X = np.load('X.npy')
y = np.load('y.npy')

In [None]:
# Only execute when already processed and saved data before
# Loading Shuffled Data
X_Shuffled = np.load('X_Shuffled.npy')
y_Shuffled = np.load('y_Shuffled.npy')

### Tensorflow Model

In [None]:
# Importing TensorFlow
import tensorflow as tf

# Importing classes necessary for neural net
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, Flatten, Conv2D, MaxPool2D, Input, BatchNormalization
import tensorflow.keras as keras

In [None]:
# Initiating model
model = Sequential()

# Building model structure
model.add(Input(shape=(1025, 50, 1)))

# First convolution and pooling step
model.add(Conv2D(16, kernel_size=[3,3], activation='relu', data_format='channels_last'))
model.add(MaxPool2D(pool_size=[3,3], data_format='channels_last'))
model.add(Dropout(0.2))

# Second convolution and pooling step
model.add(Conv2D(32, kernel_size=[3,3], activation='relu', data_format='channels_last'))
model.add(MaxPool2D(pool_size=[3,3], data_format='channels_last'))
model.add(Dropout(0.2))

# Flattening convolutional output for Dense layers
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(128, activation='relu'))

# Output layers
model.add(Dense(30, activation='softmax'))

# Compiling model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
# Checking model
model.summary()

### Testing If GPU In Use

In [None]:
tf.test.is_built_with_cuda()

In [None]:
tf.test.is_gpu_available()

In [None]:
tf.test.gpu_device_name()

### Training Model

In [None]:
# Setting up Tensorboard
import datetime

logDir = os.path.join("Logs", 'Model-' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tbCallback = tf.keras.callbacks.TensorBoard(log_dir=logDir, histogram_freq=1, profile_batch=10000000)

In [None]:
# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X_Shuffled, y_Shuffled, test_size=0.2)

In [None]:
# Fitting model
model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=16, epochs=5, callbacks=[tbCallback])