In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import and modify dataset

In [None]:
ds = pd.read_csv('../Dataset/data_abs.csv', header=None)
ds = ds[ds[4] == 1] # Select data for water media
ds = ds[ds[1] != 0] # Select only core-shell particles
ds = ds.sample(frac=1).reset_index(drop=True) # Randomize dataset

In [None]:
num_mats = 11 # Total number of materials used (including "empty")
spectra = np.array(ds.iloc[:, 5:])/255 # Normalized spectra

m1 = np.array(ds[[0]]) # Core material ids
m2 = np.array(ds[[1]]) # Shell material ids
t1 = np.array(ds[[2]])/100 # Normalized core thickness
t2 = np.array(ds[[3]])/100 # Normalized shell thickness

geo = np.hstack((m1, m2, t1, t2)) # Geometry parameters

test_split = int(0.95*ds.shape[0]) # Number of samples separated for testing after training

# Training spectra
y_train = spectra[:test_split]
y_train = tf.expand_dims(y_train, axis=-1)

# Training geometry parameters
x_train_m1 = keras.utils.to_categorical(m1[:test_split])
x_train_m1 = tf.expand_dims(x_train_m1, axis=1)
x_train_m2 = keras.utils.to_categorical(m2[:test_split])
x_train_m2 = tf.expand_dims(x_train_m2, axis=1)
x_train_t1 = t1[:test_split]
x_train_t1 = tf.expand_dims(x_train_t1, axis=-1)
x_train_t2 = t2[:test_split]
x_train_t2 = tf.expand_dims(x_train_t2, axis=-1)
x_train = geo[:test_split]
x_train = tf.expand_dims(x_train, axis=-1)

# Testing spectra
y_test = spectra[test_split:]
y_test = tf.expand_dims(y_test, axis=-1)

# Testing geometry parameters
x_test_m1 = keras.utils.to_categorical(m1[test_split:])
x_test_m1 = tf.expand_dims(x_test_m1, axis=1)
x_test_m2 = keras.utils.to_categorical(m2[test_split:])
x_test_m2 = tf.expand_dims(x_test_m2, axis=1)
x_test_t1 = t1[test_split:]
x_test_t1 = tf.expand_dims(x_test_t1, axis=-1)
x_test_t2 = t2[test_split:]
x_test_t2 = tf.expand_dims(x_test_t2, axis=-1)
x_test = geo[test_split:]
x_test = tf.expand_dims(x_test, axis=-1)

# Load pre-trained forward model and freeze weights

The forward model is pre-trained, we do not want it to be modified (further trained) in the inverse net training.

We therefore need to set the forward model as not trainable. Note: If a full model is set to trainable = False, the model needs to be compiled, for it to have an effect. Setting the model to trainable = False after compilation, this does not have any effect. To avoid recompilation, every single layer can be set to trainable=False in a loop through the model layers, which works also on compiled models without re-compilation.

In [None]:
model_path = '../Models/model_tandem_forward.h5'

# load pretrained forward model
fwd_model = keras.models.load_model(model_path, compile=False)

fwd_model.trainable = False

# Inverse resnet model

In [None]:
def residual_block(x_in, N_filter, kernel_size=3, strides=1,
                   conv_layer=keras.layers.Conv1D, alpha=0.3,
                   with_BN=False):
    
    # Residual connection
    if x_in.shape[-1] != N_filter or strides != 1:
        # If input != output dimension, add BN/ReLU/conv. into shortcut
        conv_shortcut = conv_layer(
            filters=N_filter, kernel_size=1, strides=strides, padding='same')(x_in)
    else:
        # Else use bare input as shortcut
        conv_shortcut = x_in

    # Convolutional path
    x = x_in

    x = conv_layer(filters=N_filter, kernel_size=1, strides=1,
                   padding='same', use_bias=not with_BN)(x)
    if with_BN:
        x = keras.layers.BatchNormalization()(x)
    x = keras.layers.LeakyReLU(alpha)(x)

    x = conv_layer(filters=N_filter, kernel_size=kernel_size,
                   strides=strides, padding='same', use_bias=not with_BN)(x)
    if with_BN:
        x = keras.layers.BatchNormalization()(x)
    x = keras.layers.LeakyReLU(alpha)(x)

    x = conv_layer(filters=N_filter, kernel_size=1, strides=1,
                   padding='same', use_bias=not with_BN)(x)
    if with_BN:
        x = keras.layers.BatchNormalization()(x)

    # Add residual and main and apply a further activation
    x = keras.layers.Add()([x, conv_shortcut])
    x = keras.layers.LeakyReLU(alpha)(x)

    return x

In [None]:
def resblock_sequence_down(x_in, N_filter, N_blocks):
    x = x_in
    for i in range(N_blocks):
        x = residual_block(x, N_filter, kernel_size=3, strides=1, with_BN=True)
    # Use strides=2 for downsampling (more flexible, since trainable).
    x = residual_block(x, N_filter, kernel_size=3, strides=2)
    return x


def resblock_sequence_up(x_in, N_filter, N_blocks):
    x = x_in
    for i in range(N_blocks):
        x = residual_block(x, N_filter, kernel_size=3, strides=1, with_BN=True)
    # Use upsampling (more robust than transpose convolutions with stride 2)
    x = keras.layers.UpSampling1D()(x)
    return x


def resblock_sequence_id(x_in, N_filter, N_blocks):
    # Identity block sequence: input shape = output shape
    x = x_in
    for i in range(N_blocks):
        x = residual_block(x, N_filter, kernel_size=3, strides=1, with_BN=True)
    return x

In [None]:
N_blocks = 3 # Number of resblocks between upsamplings

# Spectrum in
target_spec_input = keras.layers.Input(shape=y_train.shape[1:])
x = target_spec_input

for i in range(N_blocks):
    x = residual_block(x, 32, kernel_size=3, strides=1)
# Use strides=3 for downsampling. 200 --> 67
x = residual_block(x, 32, kernel_size=3, strides=3)

for i in range(N_blocks):
    x = residual_block(x, 64, kernel_size=3, strides=1)
x = residual_block(x, 64, kernel_size=3, strides=2) # 67 --> 34

for i in range(N_blocks):
    x = residual_block(x, 128, kernel_size=3, strides=1)
x = residual_block(x, 128, kernel_size=3, strides=2) # 34 --> 12

x = keras.layers.Flatten()(x)

x_m1 = keras.layers.Dense(256, activation='relu')(x)
x_m1 = keras.layers.Dense(256, activation='relu')(x_m1)
out_design_m1 = keras.layers.Dense(num_mats, activation='softmax', name='design_out_m1')(x_m1)

x_m2 = keras.layers.Dense(256, activation='relu')(x)
x_m2 = keras.layers.Dense(256, activation='relu')(x_m2)
out_design_m2 = keras.layers.Dense(num_mats, activation='softmax', name='design_out_m2')(x_m2)

x_t1 = keras.layers.Dense(256, activation='relu')(x)
x_t1 = keras.layers.Dense(256, activation='relu')(x_t1)
out_design_t1 = keras.layers.Dense(1, activation='linear', name='design_out_t1')(x_t1)

x_t2 = keras.layers.Dense(256, activation='relu')(x)
x_t2 = keras.layers.Dense(256, activation='relu')(x_t2)
out_design_t2 = keras.layers.Dense(1, activation='linear', name='design_out_t2')(x_t2)

inverse_model = keras.Model(
    inputs = target_spec_input,
    outputs = [out_design_m1, out_design_m2, out_design_t1, out_design_t2], name='resnet_inverse_model')
inverse_model.summary()

# Define the full tandem model

In [None]:
# Get the inputs
tandem_input = inverse_model.inputs
inverse_output = inverse_model(tandem_input)

tandem_output = fwd_model([tf.expand_dims(inverse_output[0], axis=1),
                           tf.expand_dims(inverse_output[1], axis=1),
                           tf.expand_dims(inverse_output[2], axis=1),
                           tf.expand_dims(inverse_output[3], axis=1)])

tandem = keras.models.Model(inputs=tandem_input, outputs=tandem_output)

# Tandem training

Now we compile the model with an optimizer (AdamW), add callbacks and then run the training.

Note on training data: The full tandem model takes as input the design target spectrum and returns the predicted spectrum of the suggested geometry. Therefore we do not use the geometries for training anymore, but we use the spectra for both, in- and output.

In [None]:
# Compile with optimizer and cost function
tandem.compile(optimizer=keras.optimizers.AdamW(learning_rate=0.001), loss='mse', metrics='mse')

# Automatic learning rate reduction on loss plateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=1/3, patience=6, verbose=1)
# Callback for early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, mode='min', restore_best_weights=True)
# callback for checkpoint saving
checkpoint_save = ModelCheckpoint(filepath='checkpoints/tandem/',
                                  monitor='val_loss', save_weights_only=True, save_best_only=True)

callbacks = [reduce_lr, early_stop, checkpoint_save]

hist = None  # Global history after BS incrase
for i in range(3):  # 3x16 epochs, doubling batchsize
    _h = tandem.fit(x=y_train, y=y_train,
                    validation_split=.2,
                    batch_size=32 * 2**i, epochs=16,
                    callbacks = callbacks)
    if hist is None:
        hist = _h
    else:
        # Update history
        for k in hist.history:
            hist.history[k] = np.concatenate([hist.history[k], _h.history[k]])

# Plot losses

In [None]:
plt.figure()
plt.plot(hist.history['loss'], label='loss')
plt.plot(hist.history['val_loss'], label='val_loss')
plt.legend()
plt.show()

# Save models

In [None]:
tandem.save('../Models/model_tandem.h5')
inverse_model.save('../Models/model_tandem_inverse.h5')