# Welcome to TinyML part 2: Data prep, model, train, and validate 
## This is an interactive Jupyter notebook where you can run the Python code that we have prepared interactively.
## Below you will find cells that are either code or description.  


In [1]:
print('hello')
import sys
print(sys.version)

hello
3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]


# Lets get started 
## Import libraries that we need to prep data, work with a model, train and validate the results 

## Install requrements 

In [3]:
pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import keras
import utils.calc_mem
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten
from matplotlib import pyplot as plt

from preprocess import *

2023-11-30 08:25:00.002286: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-30 08:25:00.049697: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-30 08:25:00.049726: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-30 08:25:00.050725: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-30 08:25:00.056425: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-30 08:25:00.056858: I tensorflow/core/platform/cpu_feature_guard.cc:1

## setup the TensorFlow environment

In [5]:
# Minimize TensorFlow logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

# Load and preprocess data

In [6]:
# Load and preprocess data
data_dir = '../Data/'
x_files = []
y_files = []
for c_file in os.listdir('../Data/'):
    if c_file.startswith('audio_') and c_file.endswith('.wav'):
        recording_id = c_file[6:-4]
        label_file = 'labels_' + recording_id + '.txt'
        print('Preprocessing ' + c_file + ' and ' + label_file)
        x_file, y_file = preprocess(data_dir + c_file, data_dir + label_file)
        x_files.append(x_file)
        y_files.append(y_file)

Preprocessing audio_1.wav and labels_1.txt
Preprocessing audio_2.wav and labels_2.txt


# Concatenate files into feature and label arrays

In [7]:
x = np.concatenate(x_files)
y = np.concatenate(y_files)

# Shuffle data, but only what will become training and validation sets
# We will keep the test set as a contiguous sequence for plotting

In [None]:
indices = np.arange(int(.8 * len(x)))
np.random.shuffle(indices)
indices = np.concatenate((indices, np.arange(int(.8 * len(x)), len(x))))
x = x[indices]
y = y[indices]

# Split into training, validation and test sets

In [None]:
# Split into training, validation and test sets
x_train, x_val, x_test = np.split(x, [int(.6 * len(x)), int(.8 * len(x))])
y_train, y_val, y_test = np.split(y, [int(.6 * len(y)), int(.8 * len(y))])

# Determine class weights

In [None]:
num_positives = np.sum(y)
num_negatives = len(y) - num_positives
ratio = num_negatives / num_positives
class_weights = {0: 1 / np.sqrt(ratio), 1: np.sqrt(ratio)}  # Divide by sqrt(ratio) to make losses comparable
print('Class weights:', class_weights)


# Build and compile model

In [None]:
print('Building model...')
model = Sequential()
model.add(Conv1D(8, 3, activation='relu', input_shape=(WINDOW_SIZE, SPECTRUM_SIZE)))
model.add(Conv1D(8, 3, activation='relu'))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


# Train model with early stopping and class weights; save best model

In [None]:
print('Training model...')
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=16)
model_checkpoint = keras.callbacks.ModelCheckpoint('model.h5', monitor='val_loss', save_best_only=True)
model.fit(x_train, y_train, epochs=100, batch_size=128, validation_data=(x_val, y_val), class_weight=class_weights,
          callbacks=[early_stopping, model_checkpoint])

# Plot learning curves

In [None]:
# Plot learning curves
plt.figure(figsize=(12, 6))
plt.plot(model.history.history['loss'])
plt.plot(model.history.history['val_loss'])
plt.title('Learning curves')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Training loss', 'Validation loss'])
plt.show()


# Load best model

In [None]:
# Load best model
model = keras.models.load_model('model.h5')