In [1]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import keras_tuner
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D
from tensorflow import keras
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from methods_audio import data_handling
from methods_audio import data_augmentation
from methods_audio import denoising 
from methods_audio import model_performance_training
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

caused by: ["[Errno 2] The file to load file system plugin from does not exist.: '/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io_plugins.so'"]
caused by: ["dlopen(/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io.so, 0x0006): tried: '/Users/rosameliacarioni/miniconda3/envs/bach_thesis_4/lib/python3.9/site-packages/tensorflow_io-0.32.0-py3.9-macosx-11.0-arm64.egg/tensorflow_io/python/ops/libtensorflow_io.so' (no such file)"]


# 1. Get data (file names)

In [2]:
data = data_handling.get_data()

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



# 2. Read data (transforming file names into waves) <br>
Additionally, the mean is removed and the data is normalized. 

In [3]:
data = data.map(data_handling.read_in_data) 

# 3. Get input for model training 

In [4]:
samples, labels = data_handling.extract_samples_labels(data)

2023-05-09 13:12:01.362813: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


# 4. Split data into train and validation sets

In [5]:
validation_set_size = 0.30
x_train, x_valid, y_train, y_valid = train_test_split(samples, labels, test_size= validation_set_size, random_state=123)

# 5. Transform data to spectograms

In [6]:
type_transformation = 'spectrogram'
x_train = data_handling.transform_data(x_train, type_transformation)
x_valid = data_handling.transform_data(x_valid, type_transformation)

## 6. Build model with hyperparameter tunning 
https://keras.io/guides/keras_tuner/getting_started/ <br>
https://www.youtube.com/watch?v=6Nf1x7qThR8&ab_channel=GregHogg 

In [7]:
def build_model(hp):
    input = (624, 129,1)
    model = keras.Sequential()
    
    # Add input layer 
    #matching samples.shape
    model.add(
        Conv2D(
            filters= hp.Int("conv_filters_0", min_value=16, max_value=128, step=16), 
            activation= hp.Choice("conv_activation_0", ["relu", "tanh"]),
            kernel_size = (3,3), 
            input_shape=input
        )
    ) 
    model.add(MaxPool2D(pool_size= (2,2)))

    # Tune the number of Conv layers 
    for i in range(hp.Int("num_conv_layers", 1, 3)):
        model.add(
            Sequential([
                layers.Conv2D(
                    filters=hp.Int(f"conv_filters_{i}", min_value=16, max_value=128, step=16),
                    activation=hp.Choice(f"conv_activation_{i}", ["relu", "tanh"]),
                    kernel_size=(3,3),
                ), 
                layers.MaxPool2D(pool_size=(2,2)),
            ])
        )

    model.add(layers.Flatten())

    # Tune the number of Dense layers and Tune whether to use dropout layer
    for i in range(hp.Int("num_dense_layers", 1, 3)):
            model.add(
                Sequential([
                    layers.Dense(
                        # Tune number of units separately.
                        units=hp.Int(f"dense_units_{i}", min_value=50, max_value=600, step=50),
                        activation=hp.Choice(f"dense_activation_{i}", ["relu", "tanh"]),
                    ), 
                    layers.Dropout(
                        rate=hp.Float(f"dense_dropout_{i}", min_value = 0, max_value = 1)
                    )
                ]) 
            )

    model.add(
        layers.Dense(
        units=1, #because we have 2 classes 
        activation=hp.Choice("activatio_last_layer", ["softmax", "sigmoid"]), 
        )
    )

    # Define the optimizer learning rate as a hyperparameter.
    # sampling="log", the step is multiplied between samples.
    lr= hp.Float("learning_rate", min_value=1e-4, max_value=1e-1, sampling="log")
    model.compile(
        optimizer=keras.optimizers.SGD(learning_rate=lr), 
        loss="BinaryCrossentropy", 
        metrics=["accuracy"],
    )
    
    return model

In [8]:
build_model(keras_tuner.HyperParameters())



<keras.engine.sequential.Sequential at 0x288f6a3d0>

### Initialize tuner by specifying different arguments 

In [10]:
tuner = keras_tuner.Hyperband(
    hypermodel=build_model,
    objective= "val_accuracy", # we want maximize accuracy 
    overwrite=True,
    directory="param_optimization2",
    project_name="first_try",
)




In [11]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor= 'val_loss', patience=5) 
# patience refers to number of epochs: if the val loss is not improving fter 5 ephocs, we stop it. 

### During the search, the model is called with different hyperparameters 

In [12]:
tuner.search_space_summary()
# Default search space size: number of hyper parameters that we are tunning 

Search space summary
Default search space size: 9
conv_filters_0 (Int)
{'default': None, 'conditions': [], 'min_value': 16, 'max_value': 128, 'step': 16, 'sampling': 'linear'}
conv_activation_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
num_conv_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
num_dense_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
dense_units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 50, 'max_value': 600, 'step': 50, 'sampling': 'linear'}
dense_activation_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
dense_dropout_0 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 1.0, 'step': None, 'sampling': 'linear'}
activatio_last_layer (Choice)
{'default': 'softmax', 'conditions': [], 'values': ['softmax', 'sigm

In [13]:
epochs = 20
tuner.search(np.stack(x_train), np.stack(y_train), epochs= epochs, validation_data=(np.stack(x_valid), np.stack(y_valid)), callbacks=[stop_early]) #similar to fit 

Trial 203 Complete [00h 24m 45s]
val_accuracy: 0.8641049861907959

Best val_accuracy So Far: 0.9578257203102112
Total elapsed time: 09h 23m 29s

Search: Running Trial #204

Value             |Best Value So Far |Hyperparameter
32                |80                |conv_filters_0
relu              |tanh              |conv_activation_0
2                 |2                 |num_conv_layers
3                 |3                 |num_dense_layers
550               |600               |dense_units_0
relu              |relu              |dense_activation_0
0.6743            |0.77812           |dense_dropout_0
sigmoid           |sigmoid           |activatio_last_layer
0.059899          |0.084273          |learning_rate
96                |128               |conv_filters_1
tanh              |relu              |conv_activation_1
150               |550               |dense_units_1
tanh              |relu              |dense_activation_1
0.27221           |0.097455          |dense_dropout_1
350       



Epoch 13/34
Epoch 14/34
Epoch 15/34
Epoch 16/34

: 

: 

In [None]:
tuner.results_summary()

### After all of that we don't have a model yet but rather a set of hyper parameters. Let's query the results and create a model:

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)

In [None]:
model = tuner.hypermodel.build(best_hps) #saving model 

In [None]:
location = 'data/models/02'
model.save(location)

# 7. Evaluate model performance 

In [None]:
# evaluate the model on a validation set
loss, accuracy = model.evaluate(np.stack(x_valid), np.stack(y_valid))

# print the evaluation results
print(f'Validation loss: {loss:.4f}')
print(f'Validation accuracy: {accuracy:.4f}')

In [None]:
predictions = model.predict(np.stack(x_valid))


In [None]:

y_pred = [1 if prediction > 0.5 else 0 for prediction in predictions]
accuracy = accuracy_score(np.stack(y_valid), y_pred)
accuracy



In [None]:
precision = precision_score(np.stack(y_valid), y_pred)
recall = recall_score(np.stack(y_valid), y_pred)
f1 = f1_score(np.stack(y_valid), y_pred)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')