In [1]:
import os
import sys
import warnings
import time

sys.path.insert(0, os.path.abspath(os.path.join("..", "src")))
warnings.simplefilter("ignore")

import tensorflow as tf
import numpy as np
from keras.layers import Dense, BatchNormalization, Dropout, Activation, Reshape
from keras.losses import Huber
from keras.activations import swish
from keras.initializers import HeNormal
from keras.models import Sequential
from keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint
from keras.optimizers import Nadam
from keras.optimizers.schedules import ExponentialDecay
from keras import mixed_precision
from tqdm import tqdm

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        print(f"Using device: {physical_devices[0]}")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU device found, using CPU")

print(mixed_precision.global_policy())
# mixed_precision.set_global_policy('mixed_float16')

from models import create_dataset_from_mapping, load_entire, encode

root_dir = os.path.abspath(os.path.join(".", ".."))

Using device: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
<FloatDTypePolicy "float32">


# Dataset creation

To speed up further training we'll load entire dataset into memory. `create_dataset_from_mapping` on load reshapes features back to 4, 4, 1536 and fits 2 average poolings on top of it to reduce size to 4 * 1536. Final dataset is 2D array of shape (n_samples, 4 * 1536) - so we have up to now 1 - (1536 * 4) / (150 * 150 * 3) = ~90% reduction of data.

In [2]:
# train_dataset, train_size = create_dataset_from_mapping(mapping_file="train_mapping.csv")
# validation_dataset, validation_size = create_dataset_from_mapping(mapping_file="validation_mapping.csv")

In [3]:
# X_train, y_train = load_entire(train_dataset, validation_size)
# np.save(os.path.join(root_dir, "data", "X_train.npy"), X_train)
# np.save(os.path.join(root_dir, "data", "y_train.npy"), y_train)
# del y_train
# del train_dataset

In [4]:
# X_valid, y_valid = load_entire(validation_dataset, validation_size)
# np.save(os.path.join(root_dir, "data", "X_valid.npy"), X_valid)
# np.save(os.path.join(root_dir, "data", "y_valid.npy"), y_valid)
# del y_valid
# del validation_dataset

# Encoding features with autoencoder

In [5]:
# Define the encoder
encoder = Sequential([
    Dense(6144, input_shape=(6144, ), kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    Dense(2048, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    Dense(1024, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    # Dense(512, kernel_initializer=HeNormal()),
    # Activation(swish),
    # BatchNormalization(),
    # Dropout(0.3),
    # Dense(512, kernel_initializer=HeNormal()),
    # Activation(swish),
    # BatchNormalization(),
    # Dropout(0.3),
    Dense(512, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3)
])

decoder = Sequential([
    Dense(256, input_shape=(256,), kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    Dense(512, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    # Dense(512, kernel_initializer=HeNormal()),
    # Activation(swish),
    # BatchNormalization(),
    # Dropout(0.3),
    # Dense(512, kernel_initializer=HeNormal()),
    # Activation(swish),
    # BatchNormalization(),
    # Dropout(0.3),
    Dense(1024, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    Dense(2048, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
    Dense(6144, kernel_initializer=HeNormal()),
    Activation(swish),
    BatchNormalization(),
    Dropout(0.3),
])

autoencoder = Sequential([encoder, decoder])

2024-06-03 17:49:42.350163: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-06-03 17:49:42.350179: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-06-03 17:49:42.350184: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-06-03 17:49:42.350200: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-06-03 17:49:42.350211: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
lr_scheduler = ExponentialDecay(
    initial_learning_rate=0.003,
    decay_steps=1500,
    decay_rate=0.9,
)
optimizer = Nadam(learning_rate=lr_scheduler)
autoencoder.compile(optimizer=optimizer, loss=Huber())
# autoencoder.compile(optimizer=optimizer, loss='mse')

checkpoint_dir = os.path.join(".", "logs", "checkpoints")
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_prefix = os.path.join(checkpoint_dir, "autoencoder_{epoch:02d}.weights.h5")
checkpoint = ModelCheckpoint(
    filepath=checkpoint_prefix, save_weights_only=True, save_freq="epoch"
)
early_stopping = EarlyStopping(patience=5, verbose=1, restore_best_weights=True)
tensorboard = TensorBoard(log_dir="./logs", histogram_freq=1)
callbacks_list = [early_stopping, tensorboard, checkpoint]

Due to our limited performance we commented out part of the network to speed up training and below code uses only quater of the training set (mode=2) ie around 50k images. It also uses relatively large batch size.

In [7]:
# %tensorboard --logdir logs/gradient_tape

In [8]:
initial_epoch = 0
checkpoints = sorted(os.listdir(checkpoint_dir))
if len(checkpoints):
    latest_checkpoint = os.path.join(checkpoint_dir, checkpoints[-1])
    autoencoder.load_weights(latest_checkpoint)
    initial_epoch = int(os.path.basename(latest_checkpoint).split("_")[-1].split(".")[0])
    print(f"Loaded from checkpoint {latest_checkpoint}")

Loaded from checkpoint ./logs/checkpoints/autoencoder_08.weights.h5


In [9]:
epochs = 500

# autoencoder.fit(X_train, X_train, epochs=epochs, callbacks=callbacks_list, batch_size=16, validation_data=(X_valid, X_valid))

train_dataset, train_size = create_dataset_from_mapping(mapping_file="train_mapping.csv", mode=2, max_size=50000)
validation_dataset, validation_size = create_dataset_from_mapping(mapping_file="validation_mapping.csv", mode=2, max_size=10000)

train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
validation_dataset = validation_dataset.batch(256).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
autoencoder.fit(train_dataset, epochs=epochs, callbacks=callbacks_list, validation_data=validation_dataset, initial_epoch=initial_epoch)


Epoch 9/500


2024-06-03 17:49:44.237624: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 303ms/step - loss: 0.0091 - val_loss: 2.9995
Epoch 10/500
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 295ms/step - loss: 0.0091 - val_loss: 210262.1406
Epoch 11/500
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 299ms/step - loss: 0.0091 - val_loss: 24.0805
Epoch 12/500
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 298ms/step - loss: 0.0091 - val_loss: 17.2985
Epoch 13/500
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 300ms/step - loss: 0.0091 - val_loss: 18.0310
Epoch 14/500
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m236s[0m 301ms/step - loss: 0.0091 - val_loss: 13.9465
Epoch 14: early stopping
Restoring model weights from the end of the best epoch: 9.


<keras.src.callbacks.history.History at 0x349b083a0>

In [10]:
encoder.save(os.path.join(root_dir, "models", "encoder.keras"))
decoder.save(os.path.join(root_dir, "models", "decoder.keras"))

In [11]:
del train_dataset
del validation_dataset
del decoder

# Encoding

In [18]:
encode("validation", encoder)
encode("train", encoder)

Encoding validation:  99%|█████████▉| 85/86 [00:10<00:00,  7.88it/s]2024-06-03 18:22:45.976025: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Encoding validation: 100%|██████████| 86/86 [00:10<00:00,  8.47it/s]
Encoding train: 100%|█████████▉| 770/771 [01:33<00:00,  7.34it/s]2024-06-03 18:24:20.045446: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Encoding train: 100%|██████████| 771/771 [01:33<00:00,  8.22it/s]


Final dataset is 2D array of shape (n_samples, 256) - so we have up to now 1 - (256) / (150 * 150 * 3) = ~99.6% reduction of data.