In [1]:
import numpy as np
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from tensorflow import keras

from methods_audio import data_augmentation, data_handling, denoising
from models import get_model

caused by: ["[Errno 2] The file to load file system plugin from does not exist.: '/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io_plugins.so'"]
caused by: ["dlopen(/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io.so, 0x0006): tried: '/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io.so' (no such file)"]


# In the following code, the best models (resulting from experiments) and pre-processing techniques will be trained once and exported. 

1. Get data

In [2]:
data = data_handling.get_data()

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2. Read data (transforming file names into waves)

In [3]:
data = data.map(data_handling.read_in_data)

3. Get input for model training 

In [4]:
samples, labels = data_handling.extract_samples_labels(data)

2023-06-14 11:24:04.343510: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


4. Split data into train and validation sets

In [5]:
validation_set_size = 0.20
x_train, x_valid, y_train, y_valid = train_test_split(
    samples, labels, test_size=validation_set_size, random_state=123
)  # TODO: check type

5. Set fixed parameters

In [6]:
# augmentation
type_augmentation = "signal"
probability = 0.7

# Denoising
differentiation = False

# Low pass filter
low_pass_order = 4
low_pass_cutoff = 1500

batch_size = 8
epoch = 50

6. Data augmentation

In [7]:
x_train, y_train = data_augmentation.time_augmentation(x_train, y_train, probability)

7. Set changing parameters: to generate different models for experiments

In [8]:
# denoising
type_denoising = "low_pass"  # spectral_gating, low_pass

# transforming data
type_transformation = "mfcc"  # mfcc_delta, mfcc

# model selection
number_model = 4  # 4, 5

# where to save model
file_path = "data/models/best_models/model_4/low_pass.h5"

8. Denoising 

In [9]:
if type_denoising == "spectral_gating":
    x_train_denoised = denoising.apply_spectral(x_train, differentiation)
    x_valid_denoised = denoising.apply_spectral(x_valid, differentiation)
elif type_denoising == "low_pass":
    x_train_denoised = denoising.apply_low_pass(x_train, low_pass_cutoff, low_pass_order, differentiation)
    x_valid_denoised = denoising.apply_low_pass(x_valid, low_pass_cutoff, low_pass_order, differentiation)

9. Data transformation and padding 

In [10]:
x_train_transformed = data_handling.transform_data(x_train_denoised, type_transformation)
x_valid_transformed = data_handling.transform_data(x_valid_denoised, type_transformation)

  0.01964134] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(wave.numpy(), n_mfcc = 13, sr = sr)
 -0.02587545] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(wave.numpy(), n_mfcc = 13, sr = sr)
  0.3930511 ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(wave.numpy(), n_mfcc = 13, sr = sr)
 -0.04269667] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(wave.numpy(), n_mfcc = 13, sr = sr)
 -0.15715067] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mfccs = librosa.feature.mfcc(wave.numpy(), n_mfcc = 13, sr = sr)
  mfccs = librosa.feature.mfcc(wave.numpy(), n_mfcc = 13, sr = sr)
 -0.15823652] as keyword args. From version

10. Transform data from list to np.numpy so that it can be used by keras methods


In [11]:
x_train_transformed = np.array(x_train_transformed)
y_train_transformed = np.array(y_train)
x_valid_transformed = np.array(x_valid_transformed)
y_valid_transformed = np.array(y_valid)

11. Load model 

In [12]:
model, learning_rate = get_model.get_model(number_model)
# model.summary()

12. Compile model

In [13]:
model.compile(
    optimizer=keras.optimizers.SGD(learning_rate=learning_rate),
    loss="BinaryCrossentropy",
    metrics=["accuracy", "Recall", "Precision", "TruePositives", "TrueNegatives", "FalsePositives", "FalseNegatives"],
)



13. Define conditions for early stop: if the loss is not improving, and of when to save the model: when accuracy is highest

In [14]:
es = EarlyStopping(
    monitor="val_loss", mode="min", verbose=1, patience=20
)  # the goal of a training is to minimize the loss of missclassifications
mc = ModelCheckpoint(
    file_path, monitor="val_accuracy", mode="max", verbose=1, save_best_only=True
)  # save the model where the accuracy is highest

14. Train model 

In [15]:
hist = model.fit(
    x_train_transformed,
    y_train_transformed,
    batch_size=batch_size,
    epochs=epoch,
    validation_data=(x_valid_transformed, y_valid_transformed),
    callbacks=[es, mc],
)

Epoch 1/50
Epoch 1: val_accuracy improved from -inf to 0.88608, saving model to data/models/best_models/model_4/low_pass.h5
Epoch 2/50
Epoch 2: val_accuracy improved from 0.88608 to 0.94233, saving model to data/models/best_models/model_4/low_pass.h5
Epoch 3/50
Epoch 3: val_accuracy did not improve from 0.94233
Epoch 4/50
Epoch 4: val_accuracy improved from 0.94233 to 0.94655, saving model to data/models/best_models/model_4/low_pass.h5
Epoch 5/50
Epoch 5: val_accuracy did not improve from 0.94655
Epoch 6/50
Epoch 6: val_accuracy improved from 0.94655 to 0.95499, saving model to data/models/best_models/model_4/low_pass.h5
Epoch 7/50
Epoch 7: val_accuracy improved from 0.95499 to 0.96062, saving model to data/models/best_models/model_4/low_pass.h5
Epoch 8/50
Epoch 8: val_accuracy improved from 0.96062 to 0.96343, saving model to data/models/best_models/model_4/low_pass.h5
Epoch 9/50
Epoch 9: val_accuracy did not improve from 0.96343
Epoch 10/50
Epoch 10: val_accuracy did not improve from