In [None]:
from google.colab import drive

drive.mount('/content/gdrive/', force_remount=True)

from pathlib import Path
import os


os.getcwd()
os.chdir('gdrive/MyDrive')

Mounted at /content/gdrive/


In [None]:
import time
import numpy as np
import os
from keras import layers, models, callbacks, regularizers, optimizers

# from keras.layers import advanced_activations
from contextlib import redirect_stdout
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)


def create_CNN_architecture(
    window_size,
    number_of_layers_in_encoder,
    encoder_filters,
    activation_functions,
    kernel_sizes,
    batch_normalizations,
    max_poolings,
    max_pooling_size=2,
    allowed_bottleneck_sizes=[16, 24, 32],
    **kwargs,
):
    TIMESTEPS = window_size
    num_inputs = 1
    input_placeholder = layers.Input(shape=[TIMESTEPS, num_inputs])
    encoded = input_placeholder
    for i in range(number_of_layers_in_encoder):
        encoder_filter = encoder_filters[i]
        activation_function = activation_functions[i]
        kernel_size = kernel_sizes[i]
        batch_normalization = batch_normalizations[i]
        max_pooling = max_poolings[i]

        encoded = layers.Conv1D(
            encoder_filter,
            kernel_size=kernel_size,
            padding="same",
            activation=activation_function,
        )(encoded)
        if max_pooling:
            encoded = layers.MaxPool1D(max_pooling_size)(encoded)
        if batch_normalization:
            encoded = layers.BatchNormalization()(encoded)
    # bottleneck
    encoded = layers.Dense(1, activation="relu")(encoded)
    encoded = layers.BatchNormalization(name=f"embedding")(encoded)
    bottleneck_shape = list(encoded.shape)[1]
    # print(f'Bottleneck size: {bottleneck_shape}')
    if not (bottleneck_shape in allowed_bottleneck_sizes):
        raise Exception(f"Wrong bottleneck shape: {bottleneck_shape}")

    decoded = encoded

    for i in reversed(range(number_of_layers_in_encoder)):
        encoder_filter = encoder_filters[i]
        activation_function = activation_functions[i]
        kernel_size = kernel_sizes[i]
        batch_normalization = batch_normalizations[i]
        decoded = layers.Conv1DTranspose(
            encoder_filter,
            kernel_size=kernel_size,
            padding="same",
            activation=activation_function,
        )(decoded)
        if max_pooling:
            decoded = layers.UpSampling1D(max_pooling_size)(decoded)
        if batch_normalization:
            decoded = layers.BatchNormalization()(decoded)

    decoded = layers.Conv1DTranspose(
        filters=1, kernel_size=kernel_size, padding="same"
    )(decoded)

    autoencoder = models.Model(inputs=input_placeholder, outputs=decoded)
    return autoencoder, bottleneck_shape


import pandas as pd
import numpy as np


def load_data(main_data_folder, exclude_dataset_for_testing):
    data_folders = os.listdir(main_data_folder)
    train_data_df = pd.DataFrame()
    test_data_df = pd.DataFrame()
    exceptions = {}
    train_length = 0
    test_length = 0
    print(f"Total datasets {len(data_folders)}")
    for f in data_folders:
        try:
            test_df = pd.read_csv(f"{main_data_folder}/{f}/test.csv")
            
            if f == exclude_dataset_for_testing:
                continue
            else:
                test_length += len(test_df)
                train_df = pd.read_csv(f"{main_data_folder}/{f}/train.csv")
            train_length += len(train_df)
            train_data_df = pd.concat(
                [train_data_df, train_df], ignore_index=True
            )  # train_data_df.append(train_df, ignore_index=True)
            test_data_df = pd.concat(
                [test_data_df, test_df], ignore_index=True
            )  # test_data_df.append(test_df, ignore_index=True)

        except Exception as e:
            exceptions[f] = e
    assert train_length == len(
        train_data_df
    ), "Not all training data was appended to final training set"
    assert test_length == len(
        test_data_df
    ), "Not all testing data was appended to final testing set"
    return train_data_df, test_data_df, exceptions

def compile_model(model, optimizer, loss="mse"):
    model.compile(optimizer=optimizer, loss=loss)
    return model



def train_model(
    model,
    model_name,
    train_data,
    test_data,
    main_model_folder,
    epochs=100,
    batch_size=32,
):
    history = model.fit(
        train_data,
        train_data,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(test_data, test_data),
        callbacks=[
            callbacks.ModelCheckpoint(
                f"{main_model_folder}/"
                + model_name
                + f"/callbacks"
                + "/epoch{epoch:02d}-loss{val_loss:.3f}.tf"
            ),
            callbacks.ModelCheckpoint(
                f"{main_model_folder}/" + model_name + f"/callbacks" + "/best.tf",
                save_best_only=True,
            ),
            callbacks.EarlyStopping(monitor="val_loss", min_delta=0.001, patience=5, verbose=1)
        ],
        verbose=0,
    )

    return history


def save_model_data(model, history, main_model_folder):
    def save_model_summary(model, path_to_save):
        with open(f"{path_to_save}/model_summary.txt", "w") as f:
            with redirect_stdout(f):
                model.summary()
        pd.DataFrame.from_dict(history.history).to_csv(f"{path_to_save}/history.csv")

    if not os.path.exists(main_model_folder):
        os.mkdir(main_model_folder)
    with open(f'{main_model_folder}' + '/model_structure.json', mode='w') as ofile:
        ofile.write(model.to_json())
    save_model_summary(model, main_model_folder)


In [None]:
import time
import numpy as np
import os
from keras import layers, models, callbacks, regularizers, optimizers
#from keras.layers import advanced_activations


# Create graph structure.
TIMESTEPS = 128
num_inputs = 1
input_placeholder = layers.Input(shape=[TIMESTEPS, num_inputs])
# Encoder.
encoded = layers.Conv1D(512, kernel_size=23, padding="same", activation='relu')(input_placeholder)
encoded = layers.MaxPool1D(2)(encoded)
encoded = layers.BatchNormalization()(encoded)
encoded = layers.Conv1D(
            filters=256, kernel_size=13, padding="same", activation='relu')(encoded)
#encoded = layers.MaxPool1D(2)(encoded)
encoded = layers.MaxPool1D(2)(encoded)
encoded = layers.BatchNormalization()(encoded)
encoded = layers.Conv1D(
            filters=256, kernel_size=7, padding="same", activation='relu')(encoded)
encoded = layers.MaxPool1D(2)(encoded)
encoded = layers.BatchNormalization()(encoded)
encoded = layers.Conv1D(
            filters=128, kernel_size=3, padding="same", activation='relu')(encoded)
encoded = layers.Dense(1, activation='relu')(encoded)
encoded = layers.BatchNormalization(name='embedding')(encoded)
# Decoder.
#decoded = layers.UpSampling1D(2)(encoded)
decoded = layers.Conv1DTranspose(128, kernel_size=3, padding="same", activation='relu')(encoded)
decoded = layers.Conv1DTranspose(256, kernel_size=7, padding="same", activation="relu")(decoded)
decoded = layers.BatchNormalization()(decoded)
#decoded = layers.Conv1DTranspose(256, kernel_size=7, padding="same", activation="relu")(decoded)
decoded = layers.UpSampling1D(2)(decoded)
decoded = layers.Conv1DTranspose(256, kernel_size=7, padding="same", activation="relu")(decoded)
decoded = layers.BatchNormalization()(decoded)
#decoded = layers.UpSampling1D(2)(decoder)
#decoded = layers.BatchNormalization()(decoded)
decoded = layers.UpSampling1D(2)(decoded)
decoded = layers.Conv1DTranspose(512, kernel_size=13, padding="same", activation="relu")(decoded)
#decoded = layers.UpSampling1D(2)(decoder)
decoded = layers.BatchNormalization()(decoded)
#decoded = layers.Conv1DTranspose(512, kernel_size=23, padding="same", activation="relu")(decoded)
decoded = layers.UpSampling1D(2)(decoded)
#decoded = layers.UpSampling1D(2)(decoded)
#decoded = layers.BatchNormalization()(decoded)
#decoded = layers.Conv1DTranspose(2, kernel_size=5, padding="same", strides=2, activation="relu")(decoded)
decoded = layers.Conv1DTranspose(filters=1, kernel_size=13, padding="same")(decoded)

encoder = models.Model(inputs=input_placeholder, outputs=encoded)
model = models.Model(inputs=input_placeholder, outputs=decoded)

"""model_7 = {
    "name": "model_8",
    "window_size": 128,
    "number_of_layers_in_encoder": 4,
    "input": 128,
    "encoder_filters": [512, 256, 128],
    "kernel_sizes": [23, 13, 3],
    "activation_functions": ["relu"] * 3,
    "batch_normalizations": [False] * 3,
    "max_poolings": [True] * 3,
}"""

#model, emb = create_CNN_architecture(**model_7)

'model_7 = {\n    "name": "model_8",\n    "window_size": 128,\n    "number_of_layers_in_encoder": 4,\n    "input": 128,\n    "encoder_filters": [512, 256, 128],\n    "kernel_sizes": [23, 13, 3],\n    "activation_functions": ["relu"] * 3,\n    "batch_normalizations": [False] * 3,\n    "max_poolings": [True] * 3,\n}'

In [None]:
model.summary()

Model: "model_43"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_24 (InputLayer)       [(None, 128, 1)]          0         
                                                                 
 conv1d_88 (Conv1D)          (None, 128, 512)          12288     
                                                                 
 max_pooling1d_68 (MaxPoolin  (None, 64, 512)          0         
 g1D)                                                            
                                                                 
 batch_normalization_130 (Ba  (None, 64, 512)          2048      
 tchNormalization)                                               
                                                                 
 conv1d_89 (Conv1D)          (None, 64, 256)           1704192   
                                                                 
 max_pooling1d_69 (MaxPoolin  (None, 32, 256)          0  

In [None]:
main_model_folder = 'trained_models'
#os.mkdir(f'{main_model_folder}/')
EPOCHS = 100
BATCH_SIZE = 32

In [None]:
exclude_dataset_for_testing = "InsectSound"
folder_name = "fully_processed_data/w_128_o_64_p_0"
train_data_df, test_data_df, exceptions = load_data(
    folder_name, exclude_dataset_for_testing
)
train_data = train_data_df.values
train_data = train_data.reshape(train_data.shape[0], train_data.shape[1], 1)
test_data = test_data_df.values
test_data = test_data.reshape(test_data.shape[0], test_data.shape[1], 1)
main_model_folder = 'trained_models'

Total datasets 97


In [None]:
model_arch = 'model_11'
model, embed = model, 32#create_CNN_architecture(**model_arch)
opt = optimizers.Adam(learning_rate=.0001)
epochs = 100
k = 'model_11'
embedding_size = embed
model_arch = None
print(f'Model iteration: 0 name: {k}')
if model_arch is not None:
  number_of_layers_in_encoder = model_arch['number_of_layers_in_encoder']
  encoder_filters = model_arch['encoder_filters']
  kernel_sizes = model_arch['kernel_sizes']
  print(f'INFO: Layers: {number_of_layers_in_encoder} | embedding size {embedding_size} | Kernel filters {encoder_filters} | Kernel sizes {kernel_sizes}')
model.compile(optimizer=opt, loss='mse')
folder_name = f'{main_model_folder}/{k}'
history = train_model(model, k, train_data, test_data, main_model_folder, epochs=epochs)
save_model_data(model, history, folder_name)
re = model.evaluate(test_data)
hist_df = pd.DataFrame.from_dict(history.history)
lowest_test_val_loss = hist_df.iloc[hist_df['val_loss'].argmin()]
print(f'Model {k} results {re}')
vals = lowest_test_val_loss.values
print(f'Val loss: train {vals[0]} test: {vals[1]}')
print("========================= Finished training model =========================")
print('\n')

Model iteration: 0 name: model_11
