## General

In [2]:
import sys
print(sys.version)

3.11.3 (main, Apr 19 2023, 18:49:55) [Clang 14.0.6 ]


In [3]:
import os
import sys

in_colab = 'google.colab' in sys.modules
if in_colab:
    if not os.getcwd().split('/')[-1].split('_')[-1]=='2023':
        from google.colab import drive
        drive.mount('/content/drive')
        os.chdir(r'/content/drive/MyDrive/HDA/Human_Data_Analytics_Project_2023') 

    if not 'tensorflow_io' in sys.modules:
        print('Installing tensorflow-IO')
        !pip install tensorflow-io
    #username = 'GianmarcoLattaruolo'
    #repository = 'Human_Data_Analytics_Project_2023'
    #git_token = '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'
    #!git clone https://{git_token}@github.com/{username}/{repository}
    #download_dataset('ESC-50', make_subfold = True)
    #!git config --global user.name "Gianmarco Lattaruolo"
    #!git config --global user.email "lattaruologianmarco@gmail.com"

main_dir = os.getcwd()
if main_dir not in sys.path:
    print('Adding the folder for the modules')
    sys.path.append(main_dir)

In [4]:
#BASE LIBRARIES
import numpy as np
import pandas as pd
import h5py
import shutil
import time
import random

#PLOT LIBRARIES
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sb
sb.set(style="white", palette="muted")
import IPython.display as ipd

#AUDIO LIBRARIES
import librosa
from scipy.io import wavfile
from scipy import signal
from scipy.fft import fft,ifft,fftfreq, fftshift
from scipy.signal import stft,spectrogram,periodogram

#MACHINE LEARNING LIBRARIES
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, LeaveOneOut, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
#import tensorflow_io as tfio
print("TensorFlow version:", tf.__version__)

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

#EVALUATION LIBRAIRES
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve
from sklearn.metrics import RocCurveDisplay, precision_recall_curve, PrecisionRecallDisplay

#OUR PERSONAL FUNCTIONS
import importlib
from Preprocessing.data_loader import download_dataset,load_metadata
from Preprocessing.exploration_plots import one_random_audio, plot_clip_overview, Spectral_Analysis



TensorFlow version: 2.13.0-rc0


In [5]:
import subprocess
import os

def is_folder_empty(folder_path):
    return len(os.listdir(folder_path)) == 0

path_input = os.path.join(main_dir,'data','ESC-US','01')

# Get a list of all files and directories in the specified directory
files = os.listdir(path_input)
number_files = len(files)

def convert_ogg_to_wav(input_file, output_file):
    command = ['ffmpeg', '-i', input_file, output_file]
    subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

if is_folder_empty(os.path.join(main_dir,'data','ESC-US','01_conv')):
    for file in files:
        input_file = os.path.join(main_dir,'data','ESC-US','01',file)
        new_filename = file[:-3] + "wav"
        output_file = os.path.join(main_dir,'data','ESC-US','01_conv',new_filename)
        convert_ogg_to_wav(input_file, output_file)

In [6]:
import os
import numpy as np
import tensorflow as tf
import librosa

def create_dataset(subfolder_path, 
                   batch_size = 64,  
                   shuffle = True, 
                   validation_split = 0.25, 
                   cache_file_train = None, 
                   cache_file_val = None, 
                   cache_file_test = None, 
                   normalize = True, 
                   preprocessing = None,  # "STFT", "MEL", "MFCC"
                   delta = True,  # True or False only if preprocessing = "MFCC"
                   delta_delta = True,  #  True or False only if preprocessing = "MFCC"
                   labels = "inferred"): #labels = 'inferred' or None
    
    def squeeze(audio, labels=None):
        if audio.shape[-1] is None:
            audio = tf.squeeze(audio, axis=-1)
        if labels is not None:
            return audio, labels
        else:
            return audio
    
    def spectral_preprocessing_audio(audio, 
                                     target = None, 
                                     sample_rate = 44100, 
                                     segment = 20, 
                                     n_fft = None,
                                     overlapping = 10, 
                                     cepstral_num = 40, 
                                     N_filters = 50, 
                                     preprocessing = preprocessing,
                                     delta = delta, 
                                     delta_delta = delta_delta):
        audio = audio.numpy()
        if n_fft is None:
            n_fft = segment
        nperseg = round(sample_rate * segment / 1000)
        noverlap = round(sample_rate * overlapping / 1000)
        n_fft = round(sample_rate * n_fft / 1000)
        hop_length = nperseg - noverlap
        r = None
        if preprocessing == "STFT":
            stft_librosa = librosa.stft(audio, hop_length=hop_length, win_length=nperseg, n_fft=n_fft)
            r = librosa.amplitude_to_db(np.abs(stft_librosa), ref=np.max)
        elif preprocessing == "MEL":
            mel_y = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_fft=n_fft, hop_length=hop_length,
                                                   win_length=nperseg) 
            r = librosa.power_to_db(mel_y, ref=np.max)
        elif preprocessing == "MFCC":
            mfcc_y = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=cepstral_num, n_fft=n_fft,
                                          hop_length=hop_length, htk=True, fmin=40, n_mels=N_filters)
            if delta:
                delta_mfccs = librosa.feature.delta(mfcc_y)
                if delta_delta:
                    delta2_mfccs = librosa.feature.delta(mfcc_y, order=2)
                if delta and not delta_delta:
                    mfccs_features = np.concatenate((mfcc_y, delta_mfccs))
                elif delta and delta_delta:
                    mfccs_features = np.concatenate((mfcc_y, delta_mfccs, delta2_mfccs))
            if not delta and not delta_delta:
                mfccs_features = mfcc_y
            r = mfccs_features
        if target is not None:
            return r, target
        else:
            return r

    train, val_test = tf.keras.utils.audio_dataset_from_directory(
        directory =subfolder_path.replace('\\','/'),
        labels=labels, 
        label_mode='categorical',
        class_names=None,
        batch_size=None,
        sampling_rate=None,
        output_sequence_length=220500,
        ragged=False,
        shuffle=shuffle,
        seed=42,
        validation_split=validation_split,
        subset='both',
        follow_links=False
    )

    train = train.map(squeeze, tf.data.AUTOTUNE)
    val_test = val_test.map(squeeze, tf.data.AUTOTUNE)

    # Split the validation and test set
    val_size = round(val_test.cardinality().numpy() * (1 - validation_split))
    test_size = val_test.cardinality().numpy() - val_size
    test = val_test.shard(num_shards=2, index=0)
    val = val_test.shard(num_shards=2, index=1)

    if preprocessing:
        if labels:
            train = train.map(lambda audio, target: tf.py_function(spectral_preprocessing_audio,
                                                                   [audio, target],
                                                                   [tf.float32, tf.float32]), tf.data.AUTOTUNE)
            val = val.map(lambda audio, target: tf.py_function(spectral_preprocessing_audio,
                                                               [audio, target],
                                                               [tf.float32, tf.float32]), tf.data.AUTOTUNE)
            test = test.map(lambda audio, target: tf.py_function(spectral_preprocessing_audio,
                                                                 [audio, target],
                                                                 [tf.float32, tf.float32]), tf.data.AUTOTUNE)
        else:
            train = train.map(lambda audio: tf.py_function(spectral_preprocessing_audio, [audio], [tf.float32]),
                              tf.data.AUTOTUNE)
            val = val.map(lambda audio: tf.py_function(spectral_preprocessing_audio, [audio], [tf.float32]),
                            tf.data.AUTOTUNE)
            test = test.map(lambda audio: tf.py_function(spectral_preprocessing_audio, [audio], [tf.float32]),
                             tf.data.AUTOTUNE)

    if normalize:
        m=0
        for element in train:
            n = np.max(np.abs(element[0].numpy()))
            if n>m:
                m = n
        def normalize_map(matrix, target=None):
            matrix = matrix / m
            if target is not None:
                return matrix, target
            else:
                return matrix
        
        if labels:
            train = train.map(lambda matrix, target: tf.py_function(normalize_map, [matrix, target],
                                                                    [tf.float32, tf.float32]), tf.data.AUTOTUNE)
            val = val.map(lambda matrix, target: tf.py_function(normalize_map, [matrix, target],
                                                                [tf.float32, tf.float32]), tf.data.AUTOTUNE)
            test = test.map(lambda matrix, target: tf.py_function(normalize_map, [matrix, target],
                                                                [tf.float32, tf.float32]), tf.data.AUTOTUNE)
        else:
            train = train.map(lambda matrix: tf.py_function(normalize_map, [matrix],
                                                                    [tf.float32]), tf.data.AUTOTUNE)
            val = val.map(lambda matrix: tf.py_function(normalize_map, [matrix],
                                                                [tf.float32]), tf.data.AUTOTUNE)
            test = test.map(lambda matrix: tf.py_function(normalize_map, [matrix],
                                                                [tf.float32]), tf.data.AUTOTUNE)

    if batch_size:
        train = train.batch(batch_size)
        val = val.batch(batch_size)
        test = test.batch(batch_size)

    if cache_file_train:
        train = train.cache(cache_file_train)
    if cache_file_val:
        val = val.cache(cache_file_val)
    if cache_file_test:
        test = test.cache(cache_file_test)

    if shuffle:
        train = train.shuffle(train.cardinality().numpy(), reshuffle_each_iteration=True)
        val = val.shuffle(val_size, reshuffle_each_iteration=True)
        test = test.shuffle(test_size, reshuffle_each_iteration=True)

    train = train.repeat().prefetch(buffer_size=tf.data.AUTOTUNE)
    val = val.repeat().prefetch(buffer_size=tf.data.AUTOTUNE)
    test = test.repeat().prefetch(buffer_size=tf.data.AUTOTUNE)

    return train, val, test


In [7]:
#subfolder_path = os.path.join(main_dir,'data','ESC-10-depth')
subfolder_path = os.path.join(main_dir,'data','ESC-US')
validation_split = 0.25
batch_size = 64

train, val, test = create_dataset(subfolder_path, 
                      batch_size = batch_size, 
                      shuffle = True, 
                      validation_split = validation_split, 
                      cache_file_train = None, 
                      cache_file_val = None, 
                      cache_file_test = None, 
                      normalize = True,
                      preprocessing = "STFT",
                      delta = True,
                      delta_delta = True, 
                      labels = None)  #None, 'inferred'

#show the first element of the dataset train 
for element in train.take(1).unbatch():
    print(element[0].shape)
    #print(element[1].shape)
    print(element[0])
    #print(element[1])
    break

Found 10000 files belonging to 1 classes.
Using 7500 files for training.
Using 2500 files for validation.


2023-06-01 16:01:58.703331: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:01:58.703474: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:02:14.196213: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500

(442, 501)
tf.Tensor(
[[-0.25941548 -0.27291566 -0.38987213 ... -0.5656474  -0.3032414
  -0.25531954]
 [-0.28107277 -0.25376305 -0.30415255 ... -0.41670433 -0.340716
  -0.27018642]
 [-0.32152385 -0.28085697 -0.3218667  ... -0.4582808  -0.45606
  -0.30461907]
 ...
 [-0.8450736  -1.         -1.         ... -1.         -1.
  -0.9374911 ]
 [-0.84628665 -1.         -1.         ... -1.         -1.
  -0.93521327]
 [-0.8467587  -1.         -1.         ... -1.         -1.
  -0.93606246]], shape=(442, 501), dtype=float32)


In [8]:
# Duplicate data for the autoencoder (input = output)
py_funct = lambda audio: (audio, audio)
train = train.map(py_funct)
val = val.map(py_funct)
test = test.map(py_funct)

In [9]:
train

<_MapDataset element_spec=(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=<unknown>, dtype=tf.float32, name=None))>

In [10]:
#show the first element of the dataset train 
for element in train.take(1).unbatch():
    print(element[0].shape)
    print(type(element[0]))
    #print(element[1].shape)
    print(element[0])
    #print(element[1])
    #print((element[0] == element[1]).numpy().all())
    break

2023-06-01 16:02:31.377895: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:02:31.378142: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:02:41.575743: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 65 of 118
2023-06-01 16:02:51.551768: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 99 of 118
2023-06-01 16:02:5

(442, 501)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(
[[-0.36311442 -0.4473196  -0.25178072 ... -0.1282061  -0.12372088
  -0.09211516]
 [-0.37164718 -0.54834336 -0.28911006 ... -0.17860636 -0.14098349
  -0.112606  ]
 [-0.39932853 -0.5170544  -0.40389442 ... -0.2961281  -0.26615128
  -0.17607489]
 ...
 [-0.90408117 -1.         -1.         ... -1.         -1.
  -0.749118  ]
 [-0.90404975 -1.         -1.         ... -1.         -1.
  -0.74918854]
 [-0.90407276 -1.         -1.         ... -1.         -1.
  -0.74923795]], shape=(442, 501), dtype=float32)


In [11]:
# rename the dataset
X_train = train
X_val = val
X_test = test
batch_size = 64

num_elements_train = round( number_files * (1 - validation_split))
num_elements_val = round(number_files *  validation_split / 2)
num_elements_test = round(number_files * validation_split / 2)

train_steps = num_elements_train // batch_size
val_steps = num_elements_val // batch_size
test_steps = num_elements_test // batch_size

print("Train steps required: ", train_steps)
print("Val steps required: ", val_steps)
print("Test steps required: ", test_steps)

Train steps required:  117
Val steps required:  19
Test steps required:  19


## First autoencoder: linear ae with one dense hidden layer on row audio data

In [28]:
def build_autoencoder_1(array_shape, code_size):
    encoder = tf.keras.Sequential()
    encoder.add(tf.keras.Input(array_shape))
    encoder.add(layers.Flatten())                  
    encoder.add(layers.Dense(code_size))         

    decoder = tf.keras.Sequential()
    decoder.add(tf.keras.Input((code_size,)))
    decoder.add(layers.Dense(np.prod(array_shape)))  
    decoder.add(layers.Reshape(array_shape))      
    
    return encoder,decoder

In [29]:
array_shape = (220500,)
code_size = 32

encoder, decoder = build_autoencoder_1(array_shape, code_size=code_size)

encoder.summary()
decoder.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 220500)            0         
                                                                 
 dense_2 (Dense)             (None, 32)                7056032   
                                                                 
Total params: 7056032 (26.92 MB)
Trainable params: 7056032 (26.92 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 220500)            7276500   
                                                                 
 reshape_1 (Reshape)         (None, 220500)            0         
                                                          

In [30]:
inp = tf.keras.Input(array_shape)
code = encoder(inp)
reconstruction = decoder(code)
autoencoder = tf.keras.Model(inputs=inp, outputs=reconstruction)

autoencoder.compile(optimizer='adamax', loss='mse') 
autoencoder.fit(train, 
                epochs=3,
                steps_per_epoch = train_steps,
                validation_data=test,
                validation_steps = test_steps )


Epoch 1/3


2023-05-31 11:05:43.703255: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-05-31 11:05:43.703603: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-05-31 11:05:54.219414: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 104 of 118
2023-05-31 11:05:55.297436: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] Shuffle buffer filled.




2023-05-31 11:06:30.340501: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2500]
	 [[{{node Placeholder/_0}}]]
2023-05-31 11:06:30.340852: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2500]
	 [[{{node Placeholder/_0}}]]


Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x1578e36d0>

In [31]:
loss = autoencoder.evaluate(test, steps=test_steps, verbose=0)
print("PCA MSE:", loss)

PCA MSE: 0.014761404134333134


In [32]:
def hear(audio, encoder, decoder):
    audio = tf.reshape(audio, (1, -1))  # Reshape audio to match the expected shape
    code = encoder.predict(audio)
    reco = decoder.predict(code)
    display(ipd.Audio(data = audio.numpy(), rate=41000))
    display(ipd.Audio(data = reco, rate=41000))

for element in train.take(1).unbatch():
    hear(element[0], encoder, decoder)
    break

2023-05-31 11:08:14.983526: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-05-31 11:08:14.983903: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]




## Second AE: deep autoencoder with convolutional layers on row audio data

In [32]:
import keras_utils

In [33]:
#show the first element of the dataset train 
for element in train.take(1).unbatch():
    print(element[1].shape)
    print(type(element[1]))
    #print(element[1].shape)
    print(element[1])
    #print(element[1])
    #print((element[0] == element[1]).numpy().all())
    break

2023-06-01 16:20:55.161916: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:20:55.162380: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:21:05.427263: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 62 of 118
2023-06-01 16:21:15.283996: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 105 of 118
2023-06-01 16:21:

(442, 501)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(
[[-0.50632733 -0.31124753 -0.1984582  ... -0.21832915 -0.34012493
  -0.21206069]
 [-0.33931392 -0.4006009  -0.2087169  ... -0.10858865 -0.09172721
  -0.21066873]
 [-0.29598397 -0.28406194 -0.2280776  ... -0.17619105 -0.14099184
  -0.2271016 ]
 ...
 [-1.         -1.         -1.         ... -1.         -1.
  -0.85185796]
 [-1.         -1.         -1.         ... -1.         -1.
  -0.851968  ]
 [-1.         -1.         -1.         ... -1.         -1.
  -0.8519907 ]], shape=(442, 501), dtype=float32)


In [34]:
IMG_SHAPE = (442,501,1)
code_size = 32

def build_deep_autoencoder(img_shape, code_size):
    """
    Arguments:
    img_shape -- size of the input layer
    code_size -- the size of the hidden representation of the input (code)

    Returns:
    encoder -- keras model for the encoder network
    decoder -- keras model for the decoder network
    """

    # encoder
    encoder = tf.keras.Sequential()
    encoder.add(tf.keras.Input(img_shape))

    encoder.add(layers.Conv2D(32, (3, 3), activation='elu', padding='same'))
    encoder.add(layers.MaxPool2D((2, 2), padding='same'))

    encoder.add(layers.Conv2D(64, (3, 3), activation='elu', padding='same'))
    encoder.add(layers.MaxPool2D((2, 2), padding='same'))

    encoder.add(layers.Conv2D(128, (3, 3), activation='elu', padding='same'))
    encoder.add(layers.MaxPool2D((2, 2), padding='same'))

    encoder.add(layers.Conv2D(256, (3, 3), activation='elu', padding='same'))
    encoder.add(layers.MaxPool2D((2, 2), padding='same'))

    encoder.add(layers.Flatten())
    encoder.add(layers.Dense(code_size))

    # decoder
    decoder = tf.keras.Sequential()
    decoder.add(tf.keras.Input((code_size,)))

    decoder.add(layers.Dense(27 * 32 * 256, activation='elu'))
    decoder.add(layers.Reshape((27, 32, 256)))
    decoder.add(layers.Conv2DTranspose(128, (3, 3), strides=2, activation='elu', padding='same'))
    decoder.add(layers.Conv2DTranspose(64, (3, 3), strides=2, activation='elu', padding='valid'))
    decoder.add(layers.Conv2DTranspose(32, (3, 3), strides=2, activation='elu', padding='same'))
    decoder.add(layers.Conv2DTranspose(1, (3, 3), strides=2, activation=None, padding='same'))

    return encoder, decoder

In [35]:
# check autoencoder shapes along different code_sizes
get_dim = lambda layer: np.prod(layer.output_shape[1:])
for code_size in [1, 8, 32, 128, 512]:
    encoder, decoder = build_deep_autoencoder(IMG_SHAPE, code_size=code_size)
    print("Testing code size %i" % code_size)
    assert encoder.output_shape[1:] == (code_size,), "encoder must output a code of required size"
    assert decoder.output_shape[1:] == IMG_SHAPE, "decoder must output an image of valid shape"
    assert len(encoder.trainable_weights) >= 6, "encoder must contain at least 3 layers"
    assert len(decoder.trainable_weights) >= 6, "decoder must contain at least 3 layers"

    for layer in encoder.layers + decoder.layers:
        assert get_dim(layer) >= code_size, "Encoder layer %s is smaller than bottleneck (%i units)" % (
        layer.name, get_dim(layer))

print("All tests passed!")

Testing code size 1


AssertionError: decoder must output an image of valid shape

In [36]:
# Look at encoder and decoder shapes. The total number of trainable parameters of encoder and decoder should be close
encoder, decoder = build_deep_autoencoder(IMG_SHAPE, code_size=32)
encoder.summary()
decoder.summary()

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_27 (Conv2D)          (None, 442, 501, 32)      320       
                                                                 
 max_pooling2d_24 (MaxPooli  (None, 221, 251, 32)      0         
 ng2D)                                                           
                                                                 
 conv2d_28 (Conv2D)          (None, 221, 251, 64)      18496     
                                                                 
 max_pooling2d_25 (MaxPooli  (None, 111, 126, 64)      0         
 ng2D)                                                           
                                                                 
 conv2d_29 (Conv2D)          (None, 111, 126, 128)     73856     
                                                                 
 max_pooling2d_26 (MaxPooli  (None, 56, 63, 128)     

In [37]:
encoder, decoder = build_deep_autoencoder(IMG_SHAPE, code_size=32)

In [38]:
inp = tf.keras.Input(IMG_SHAPE)
code = encoder(inp)
reconstruction = decoder(code)

In [39]:
autoencoder = tf.keras.Model(inputs=inp, outputs=reconstruction)
autoencoder.compile(optimizer="adamax", loss='mse')

In [40]:
model_filename = 'autoencoder_network.hdf5'
last_finished_epoch = None

In [41]:
history = autoencoder.fit(train, epochs=2,
                          steps_per_epoch=train_steps,
                          validation_data=test,
                          validation_steps=test_steps,
                          callbacks=[keras_utils.ModelSaveCallback(model_filename)],
                          verbose=0, initial_epoch=last_finished_epoch or 0)

2023-06-01 16:21:39.442840: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:21:39.443103: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [7500]
	 [[{{node Placeholder/_0}}]]
2023-06-01 16:21:50.283289: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] Filling up shuffle buffer (this may take a while): 61 of 118
2023-06-01 16:22:00.112922: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] Shuffle buffer filled.
2023-06-01 16:22:00.129805: W tensorflow/core/framework/

InvalidArgumentError: Graph execution error:

Detected at node 'model_4/sequential_17/conv2d_31/Elu' defined at (most recent call last):
    File "<frozen runpy>", line 198, in _run_module_as_main
    File "<frozen runpy>", line 88, in _run_code
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/traitlets/config/application.py", line 992, in launch_instance
      app.start()
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 711, in start
      self.io_loop.start()
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
      self._run_once()
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
      handle._run()
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 411, in do_execute
      res = shell.run_cell(
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 531, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3006, in run_cell
      result = self._run_cell(
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3061, in _run_cell
      result = runner(coro)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3266, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3445, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3505, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/bn/q7hx91b1081fm2grcj1g9npm0000gn/T/ipykernel_67022/4050245915.py", line 1, in <module>
      history = autoencoder.fit(train, epochs=2,
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1742, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1338, in train_function
      return step_function(self, iterator)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1322, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1303, in run_step
      outputs = model.train_step(data)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1080, in train_step
      y_pred = self(x, training=True)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/training.py", line 569, in __call__
      return super().__call__(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/training.py", line 569, in __call__
      return super().__call__(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/sequential.py", line 404, in call
      return super().call(inputs, training=training, mask=mask)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/layers/convolutional/base_conv.py", line 321, in call
      return self.activation(outputs)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/activations.py", line 152, in elu
      return backend.elu(x, alpha)
    File "/Users/leonardo/anaconda3/envs/DSEnv3/lib/python3.11/site-packages/keras/src/backend.py", line 5423, in elu
      res = tf.nn.elu(x)
Node: 'model_4/sequential_17/conv2d_31/Elu'
convolution input must be 4-dimensional: [64,442,501]
	 [[{{node model_4/sequential_17/conv2d_31/Elu}}]] [Op:__inference_train_function_215342]