<a href="https://colab.research.google.com/github/PlushyWushy/Rachel/blob/main/Rachel_Full_CIFAR_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow==2.15.0 numpy==2.0.2 tqdm==4.67.1
!pip install tensorflow-addons==0.23.0


Collecting tensorflow==2.15.0
  Downloading tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0)
  Downloading ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.
[31mERROR: Cannot install numpy==2.0.2 and tensorflow==2.15.0 because these package versions have conflicting dependencies.[0m[31m
[0m
The conflict is caused by:
    The user requested numpy==2.0.2
    tensorflow 2.15.0 depends on numpy<2.0.0 and >=1.23.5

To fix this you could try to:
1. loosen the range of package versions you've specified
2. remove package versions to allow pip to attempt to solve the dependency conflict

[31mERROR: ResolutionImpossible: for help visit https://pip.pypa.io/en/latest/topics/dependency-resolution/#dealing-with

In [None]:
#RACHEL FULL CIFAR BENCHMARK
import sys
sys.setrecursionlimit(100000)  # Increase recursion limit

import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import cifar10
from tqdm import tqdm

# ------------------------------------------------------------------------------
# GPU Setup: Enable Mixed Precision and XLA for A100 Optimization
# ------------------------------------------------------------------------------
# Enable XLA JIT compilation
tf.config.optimizer.set_jit(True)

# Enable mixed precision training for faster FP16 operations (TF 2.15.0)
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy("mixed_float16")
print("Global mixed precision policy:", tf.keras.mixed_precision.global_policy())

# Mixed precision test: create a couple of float16 tensors and perform an op.
a = tf.constant([1.0, 2.0, 3.0], dtype=tf.float16)
b = tf.constant([4.0, 5.0, 6.0], dtype=tf.float16)
c = a + b
print("Test tensor c dtype (should be float16):", c.dtype)

# Use MirroredStrategy for distributed GPU training.
strategy = tf.distribute.MirroredStrategy()
print("Running on GPU with Mixed Precision and XLA enabled.")

# Set seeds for reproducibility.
tf.random.set_seed(42)
np.random.seed(42)

# =============================================================================
# 1. Data Loading and Preparation (Full CIFAR-10)
# =============================================================================
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = np.squeeze(y_train).astype(np.int32)
y_test  = np.squeeze(y_test).astype(np.int32)

# Use full CIFAR-10 (10 classes)
x_train = x_train.astype(np.float32) / 255.0
x_test  = x_test.astype(np.float32) / 255.0

# Use the last 5000 examples of training for validation.
num_val = 5000
x_val = x_train[-num_val:]
y_val = y_train[-num_val:]
x_train = x_train[:-num_val]
y_train = y_train[:-num_val]

img_shape = (32, 32, 3)
num_classes = 10
batch_size = 128  # Batch size set to 1024

def make_dataset(x, y, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((x, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=10000, seed=42)
    # Optionally add data augmentation with ds.map(...)
    return ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)

train_ds = make_dataset(x_train, y_train, shuffle=True)
val_ds   = make_dataset(x_val, y_val, shuffle=False)
test_ds  = make_dataset(x_test, y_test, shuffle=False)

# =============================================================================
# 2. RL Controller with Actor–Critic (Expanded Candidate Blocks)
# =============================================================================
max_new_layers = 3
initial_temperature = 1.5
decay_rate = 0.95

class RLController(tf.keras.Model):
    def __init__(self, max_new_layers=5, lstm_units=64):
        super().__init__()
        self.max_new_layers = max_new_layers
        self.lstm = tf.keras.layers.LSTM(lstm_units, return_sequences=True)
        self.logits_presence = tf.keras.layers.Dense(2)
        self.logits_type     = tf.keras.layers.Dense(2)
        self.logits_units    = tf.keras.layers.Dense(241)

    def call(self, inputs, temperature=1.0, training=False):
        # Use a dummy sequence to generate candidate tokens.
        dummy_seq = tf.zeros((tf.shape(inputs)[0], self.max_new_layers, 16))
        lstm_out = self.lstm(dummy_seq)
        logits0 = self.logits_presence(lstm_out) / temperature
        logits1 = self.logits_type(lstm_out) / temperature
        logits2 = self.logits_units(lstm_out) / temperature
        return logits0, logits1, logits2

class OptimizedControllerPolicy:
    def __init__(self, max_new_layers=5, lstm_units=64, lr=1e-3):
        self.max_new_layers = max_new_layers
        self.actor = RLController(max_new_layers, lstm_units)
        self.critic = tf.keras.Sequential([
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        self.optimizer = tf.keras.optimizers.Adam(lr)

    def sample_tokens_from_input(self, dummy_input, temperature=1.0):
        pres_logits, type_logits, units_logits = self.actor(dummy_input, temperature=temperature, training=True)
        pres_probs  = tf.nn.softmax(pres_logits, axis=-1)
        type_probs  = tf.nn.softmax(type_logits, axis=-1)
        units_probs = tf.nn.softmax(units_logits, axis=-1)
        bs = tf.shape(dummy_input)[0]

        # Sample presence
        pres_samples = tf.random.categorical(tf.math.log(tf.reshape(pres_probs, [-1, 2])), 1)
        pres_samples = tf.reshape(pres_samples, [bs, self.max_new_layers])
        pres_samples = tf.cast(pres_samples, tf.int32)

        # Sample block type
        type_samples = tf.random.categorical(tf.math.log(tf.reshape(type_probs, [-1, 2])), 1)
        type_samples = tf.reshape(type_samples, [bs, self.max_new_layers])
        type_samples = tf.cast(type_samples, tf.int32)

        # Sample block units
        units_samples = tf.random.categorical(tf.math.log(tf.reshape(units_probs, [-1, 241])), 1)
        units_samples = tf.reshape(units_samples, [bs, self.max_new_layers])
        units_samples = tf.cast(units_samples, tf.int32)

        tokens = tf.stack([pres_samples, type_samples, units_samples], axis=2)

        def gather_logp(logits, samples):
            b = tf.shape(logits)[0]
            ml = tf.shape(logits)[1]
            indices = tf.stack([
                tf.repeat(tf.range(b), ml),
                tf.tile(tf.range(ml), [b]),
                tf.reshape(tf.stop_gradient(samples), [-1])
            ], axis=1)
            gathered = tf.gather_nd(tf.nn.log_softmax(logits, axis=-1), indices)
            return tf.reshape(gathered, [b, ml])

        pres_logp = gather_logp(pres_logits, pres_samples)
        type_logp = gather_logp(type_logits, type_samples)
        units_logp = gather_logp(units_logits, units_samples)
        logp = tf.reduce_sum(pres_logp + type_logp + units_logp, axis=1)
        return tokens, logp

    def sample_tokens(self, batch_size=1, temperature=1.0):
        dummy_input = tf.random.normal((batch_size, self.max_new_layers, 16), mean=0.0, stddev=1.5)
        return self.sample_tokens_from_input(dummy_input, temperature=temperature)

    def update_policy(self, dummy_input, rewards, temperature=1.0):
        with tf.GradientTape() as tape:
            _, logp = self.sample_tokens_from_input(dummy_input, temperature=temperature)
            # Cast critic output to float32 and compute advantage.
            value_pred = tf.cast(tf.squeeze(self.critic(dummy_input), axis=1), tf.float32)
            advantage = rewards - value_pred
            # Cast advantage to the same dtype as logp (i.e. float16) to avoid type mismatch.
            advantage = tf.cast(advantage, logp.dtype)
            advantage = tf.expand_dims(tf.stop_gradient(advantage), axis=1)

            actor_loss = -tf.reduce_mean(logp * advantage)
            # Cast actor_loss to float32 so both losses are the same type.
            actor_loss = tf.cast(actor_loss, tf.float32)
            critic_loss = tf.reduce_mean(tf.square(rewards - value_pred))
            total_loss = actor_loss + 0.5 * critic_loss

        vars_to_train = self.actor.trainable_variables + self.critic.trainable_variables
        grads = tape.gradient(total_loss, vars_to_train)
        self.optimizer.apply_gradients(zip(grads, vars_to_train))

controller_policy = OptimizedControllerPolicy(max_new_layers, lstm_units=64, lr=1e-3)
prev_candidate_tokens = np.empty((0, 3), dtype=np.int32)

# =============================================================================
# 3. Decode Full Candidate to Build a Subnetwork (Optimized for GPU)
# =============================================================================
def decode_incremental_architecture(old_tokens, new_tokens):
    active = np.where(new_tokens[:, 0] == 1)[0]
    num_new = active[-1] + 1 if active.size > 0 else 0
    return np.concatenate([old_tokens, new_tokens[:num_new]], axis=0)

def decode_full_candidate(candidate_tokens, dropout_rate=0.3):
    with strategy.scope():
        inputs = tf.keras.Input(shape=img_shape)
        x = tf.keras.layers.RandomFlip("horizontal")(inputs)
        x = tf.keras.layers.RandomTranslation(0.1, 0.1)(x)

        for tok in candidate_tokens:
            if tok[0] == 0:
                continue
            block_type = tok[1]
            block_size = int(tok[2]) + 16

            if block_type == 0:
                x = tf.keras.layers.Conv2D(block_size, kernel_size=3, padding='same',
                                           kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
                x = tf.keras.layers.BatchNormalization()(x)
                x = tf.keras.layers.Activation('relu')(x)
                x = tf.keras.layers.Dropout(dropout_rate)(x)
                if x.shape[1] is not None and x.shape[1] >= 2:
                    x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
            else:
                # Simple residual block
                shortcut = x
                x = tf.keras.layers.Conv2D(block_size, kernel_size=3, padding='same',
                                           kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
                x = tf.keras.layers.BatchNormalization()(x)
                x = tf.keras.layers.Activation('relu')(x)
                x = tf.keras.layers.Dropout(dropout_rate)(x)

                x = tf.keras.layers.Conv2D(block_size, kernel_size=3, padding='same',
                                           kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
                x = tf.keras.layers.BatchNormalization()(x)

                if shortcut.shape[-1] != block_size:
                    shortcut = tf.keras.layers.Conv2D(block_size, kernel_size=1, padding='same',
                                                      kernel_regularizer=tf.keras.regularizers.l2(1e-4))(shortcut)

                x = tf.keras.layers.Add()([x, shortcut])
                x = tf.keras.layers.Activation('relu')(x)
                x = tf.keras.layers.Dropout(dropout_rate)(x)
                if x.shape[1] is not None and x.shape[1] >= 2:
                    x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)

        if len(x.shape) > 2:
            x = tf.keras.layers.Flatten()(x)

        outputs = tf.keras.layers.Dense(num_classes, dtype='float32')(x)
        model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

# =============================================================================
# 4. Adaptive Candidate Training Duration (Epoch decider)
# =============================================================================
def ideal_epochs(num_layers, base_epochs=200):
    return base_epochs + 10 * num_layers

def lr_scheduler(epoch, lr):
    return lr * 0.95

def train_candidate_model(model, epochs):
    with strategy.scope():
        model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                      metrics=['accuracy'])
    print("Training candidate model for up to", epochs, "epochs with early stopping...")
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10,
                                                  min_delta=0.001, restore_best_weights=True)
    callbacks = [tf.keras.callbacks.LearningRateScheduler(lr_scheduler), early_stop]
    history = model.fit(train_ds, epochs=epochs, verbose=1,
                        validation_data=val_ds, callbacks=callbacks)
    loss, acc = model.evaluate(train_ds, verbose=1)
    return loss, acc

def evaluate_candidate_model(model):
    loss, acc = model.evaluate(val_ds, verbose=1)
    return loss, acc

# =============================================================================
# 5. Mixture Loss (Utility function)
# =============================================================================
def mixture_loss(mixture_logits, ensemble_preds, y_true):
    weights = tf.nn.softmax(mixture_logits)
    weighted_preds = tf.zeros_like(ensemble_preds[0])
    for w, pred in zip(tf.unstack(weights), ensemble_preds):
        weighted_preds += w * pred
    loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_true, weighted_preds, from_logits=True))
    return loss

# Quick test of mixture loss (optional demonstration)
def test_convex_optimization():
    print("\n--- Testing Convex Mixture Optimization ---")
    dummy_preds = [tf.random.uniform((4, num_classes), minval=-1, maxval=1) for _ in range(3)]
    dummy_y = tf.constant([1, 0, 1, 0], dtype=tf.int32)
    mixture_logits_test = tf.Variable(tf.zeros([3]), name="mixture_logits_test", trainable=True)
    optimizer_test = tf.keras.optimizers.Adam(1e-3)
    initial_loss = mixture_loss(mixture_logits_test, dummy_preds, dummy_y)
    print("Initial dummy mixture loss:", initial_loss.numpy())
    for epoch in range(10):
        with tf.GradientTape() as tape:
            loss_val = mixture_loss(mixture_logits_test, dummy_preds, dummy_y)
        grads = tape.gradient(loss_val, [mixture_logits_test])
        optimizer_test.apply_gradients(zip(grads, [mixture_logits_test]))
        print(f"Epoch {epoch+1}: dummy loss {loss_val.numpy()}")
    final_weights = tf.nn.softmax(mixture_logits_test).numpy()
    print("Final dummy mixture weights:", final_weights)
    print("--- Convex Mixture Optimization Test Completed ---\n")

test_convex_optimization()

# =============================================================================
# 6. Main RL with Extended Candidate Search, Adaptive Training, & Logging
# =============================================================================
num_candidates = 5
ensemble = []
prev_tokens = np.empty((0, 3), dtype=np.int32)
prev_model = None
prev_obj = None
iterations = 10
safety_margin = 0.01
safety_net_count = 0

for it in range(iterations):
    print(f"\n--- RL Iteration {it+1}/{iterations} ---")
    current_temperature = initial_temperature * (decay_rate ** it)
    dummy_input = tf.random.normal((num_candidates, max_new_layers, 16), mean=0.0, stddev=1.5)
    tokens_batch, logp_batch = controller_policy.sample_tokens_from_input(dummy_input, temperature=current_temperature)

    rewards = []
    candidate_tokens_list = []
    candidate_models = []

    for j in range(num_candidates):
        candidate_tokens = decode_incremental_architecture(prev_tokens, tokens_batch[j].numpy())
        num_layers = candidate_tokens.shape[0]
        dropout_rate = max(0.1, 0.3 - 0.02 * num_layers)
        print(f"\nCandidate {j+1}: layers = {num_layers}, dropout_rate = {dropout_rate:.2f}")

        model_candidate = decode_full_candidate(candidate_tokens, dropout_rate=dropout_rate)
        model_candidate.summary()
        epochs_to_train = ideal_epochs(num_layers)
        tr_loss, tr_acc = train_candidate_model(model_candidate, epochs=epochs_to_train)

        # Evaluate Candidate in Ensemble Context
        if len(ensemble) == 0:
            val_loss, val_acc = evaluate_candidate_model(model_candidate)
            print(f"  Standalone Evaluation -> Train loss: {tr_loss:.4f}, acc: {tr_acc:.4f} | "
                  f"Val loss: {val_loss:.4f}, acc: {val_acc:.4f}")
            obj = val_loss
        else:
            candidate_ensemble = ensemble + [model_candidate]
            mixture_logits_temp = tf.Variable(tf.zeros([len(candidate_ensemble)]), trainable=True)
            optimizer_temp = tf.keras.optimizers.Adam(1e-3)
            candidate_epochs_mix = 10
            print(f"  Evaluating Candidate {j+1} with current ensemble of {len(ensemble)} models...")

            for epoch in range(candidate_epochs_mix):
                total_loss = 0.0
                batches = 0
                for x_batch, y_batch in val_ds:
                    ensemble_preds = [m(x_batch, training=False) for m in candidate_ensemble]
                    with tf.GradientTape() as tape:
                        loss_val = mixture_loss(mixture_logits_temp, ensemble_preds, y_batch)
                    grads = tape.gradient(loss_val, [mixture_logits_temp])
                    optimizer_temp.apply_gradients(zip(grads, [mixture_logits_temp]))
                    total_loss += loss_val.numpy()
                    batches += 1
                avg_loss = total_loss / batches
                print(f"    Mixture optimization epoch {epoch+1}/{candidate_epochs_mix}, loss: {avg_loss:.4f}")

            # Final validation mixture loss
            total_loss = 0.0
            batches = 0
            for x_batch, y_batch in val_ds:
                ensemble_preds = [m(x_batch, training=False) for m in candidate_ensemble]
                loss_val = mixture_loss(mixture_logits_temp, ensemble_preds, y_batch)
                total_loss += loss_val.numpy()
                batches += 1
            final_val_loss = total_loss / batches
            print(f"  Combined Evaluation -> Final validation loss with candidate: {final_val_loss:.4f}")
            obj = final_val_loss

        r = -obj
        print(f"  Objective: {obj:.4f} | Reward: {r:.4f}")
        rewards.append(r)
        candidate_tokens_list.append(candidate_tokens)
        candidate_models.append(model_candidate)

    rewards_tf = tf.cast(tf.stack(rewards), tf.float32)
    controller_policy.update_policy(dummy_input, rewards_tf, temperature=current_temperature)

    best_idx = int(np.argmax(rewards))
    best_r = rewards[best_idx]
    best_tokens_new = candidate_tokens_list[best_idx]
    best_model_new = candidate_models[best_idx]

    print(f"\nBest candidate in iteration {it+1} had reward = {best_r:.4f}")

    # Safety check
    if (prev_obj is not None) and (prev_obj > best_r + safety_margin):
        print("=> Safety-net: Reusing previous candidate.")
        safety_net_count += 1
        ensemble.append(prev_model)
    else:
        print("=> Candidate selected for ensemble.")
        ensemble.append(best_model_new)
        prev_obj = best_r
        prev_model = best_model_new
        prev_tokens = best_tokens_new

print(f"\nSafety net was used in {safety_net_count} out of {iterations} iterations.")

print("\nFinal Ensemble Architectures:")
for i, model in enumerate(ensemble):
    print(f"\nCandidate {i+1} Architecture Summary:")
    model.summary()

# =============================================================================
# 7. Optimize Mixture Weights for the Ensemble (Final Stage, Distributed)
# =============================================================================
with strategy.scope():
    mixture_logits = tf.Variable(tf.zeros([len(ensemble)]), name="mixture_logits", trainable=True)
    optimizer_mix = tf.keras.optimizers.Adam(1e-3)

# Distribute validation dataset for mixture training
dist_val_ds = strategy.experimental_distribute_dataset(val_ds)

@tf.function
def distributed_mixture_train_step(x_batch, y_batch):
    """
    Perform a single gradient update on mixture_logits in a distributed manner.
    """
    def step_fn(inputs):
        x, y = inputs
        with tf.GradientTape() as tape:
            ensemble_preds = [model(x, training=False) for model in ensemble]
            loss_val = mixture_loss(mixture_logits, ensemble_preds, y)
        grads = tape.gradient(loss_val, [mixture_logits])
        optimizer_mix.apply_gradients(zip(grads, [mixture_logits]))
        return loss_val

    per_replica_losses = strategy.run(step_fn, args=((x_batch, y_batch),))
    return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)

epochs_mix = 50
for epoch in range(epochs_mix):
    epoch_loss = 0.0
    batches = 0
    for x_batch, y_batch in dist_val_ds:
        loss_val = distributed_mixture_train_step(x_batch, y_batch)
        epoch_loss += loss_val.numpy()
        batches += 1
    print(f"Mixture weight optimization epoch {epoch+1}/{epochs_mix}, loss: {epoch_loss / batches:.4f}")

final_weights = tf.nn.softmax(mixture_logits).numpy()
print("Optimized mixture weights:", final_weights)

# =============================================================================
# Define ensemble_predict for evaluation
# =============================================================================
def ensemble_predict(x, ensemble, mixture_weights):
    ensemble_preds = [model(x, training=False) for model in ensemble]
    weighted_preds = tf.zeros_like(ensemble_preds[0])
    for w, pred in zip(mixture_weights, ensemble_preds):
        weighted_preds += w * pred
    return weighted_preds

# =============================================================================
# 8. Evaluate the Ensemble with Optimized Mixture Weights on the Test Set
# =============================================================================
try:
    test_loss_total = 0.0
    test_acc_total = 0.0
    num_batches = 0
    for x_batch, y_batch in test_ds:
        preds = ensemble_predict(x_batch, ensemble, final_weights)
        loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_batch, preds, from_logits=True))
        acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(preds, axis=-1, output_type=tf.int32), y_batch), tf.float32))
        test_loss_total += loss.numpy()
        test_acc_total += acc.numpy()
        num_batches += 1
    print(f"\nEnsemble Test Loss: {test_loss_total / num_batches:.4f}, "
          f"Test Accuracy: {test_acc_total / num_batches:.4f}")
except Exception as e:
    print("Error calculating test accuracy:", e)

# =============================================================================
# 9. Save the Ensemble for Future Predictions
# =============================================================================
try:
    for i, model in enumerate(ensemble):
        save_path = f'ensemble_model_candidate_{i+1}.h5'
        model.save(save_path)
        print(f"Saved candidate {i+1} to {save_path}")
    print("Ensemble saved successfully.")
except Exception as e:
    print("Error saving ensemble:", e)
# =============================================================================
# 10. Save yourself some money
# =============================================================================
from google.colab import runtime
runtime.unassign()

Global mixed precision policy: <DTypePolicy "mixed_float16">
Test tensor c dtype (should be float16): <dtype: 'float16'>
Running on GPU with Mixed Precision and XLA enabled.

--- Testing Convex Mixture Optimization ---
Initial dummy mixture loss: 2.5340755
Epoch 1: dummy loss 2.5340754985809326
Epoch 2: dummy loss 2.5336976051330566
Epoch 3: dummy loss 2.5333197116851807
Epoch 4: dummy loss 2.532942295074463
Epoch 5: dummy loss 2.532565116882324
Epoch 6: dummy loss 2.5321884155273438
Epoch 7: dummy loss 2.5318121910095215
Epoch 8: dummy loss 2.531435966491699
Epoch 9: dummy loss 2.5310604572296143
Epoch 10: dummy loss 2.5306851863861084
Final dummy mixture weights: [0.33554775 0.32890514 0.33554715]
--- Convex Mixture Optimization Test Completed ---


--- RL Iteration 1/10 ---

Candidate 1: layers = 2, dropout_rate = 0.26


Training candidate model for up to 220 epochs with early stopping...
Epoch 1/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - accuracy: 0.3488 - loss: 4.3133 - val_accuracy: 0.5106 - val_loss: 1.9481 - learning_rate: 9.5000e-04
Epoch 2/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.5103 - loss: 1.4344 - val_accuracy: 0.4582 - val_loss: 1.4948 - learning_rate: 9.0250e-04
Epoch 3/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5502 - loss: 1.3110 - val_accuracy: 0.4214 - val_loss: 1.7159 - learning_rate: 8.5737e-04
Epoch 4/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.5724 - loss: 1.2569 - val_accuracy: 0.5558 - val_loss: 1.2694 - learning_rate: 8.1451e-04
Epoch 5/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5824 - loss: 1.2144 - val_accuracy: 0.5666 - val_loss: 1.2805

Training candidate model for up to 220 epochs with early stopping...
Epoch 1/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 18ms/step - accuracy: 0.3008 - loss: 3.7875 - val_accuracy: 0.2318 - val_loss: 2.2774 - learning_rate: 9.5000e-04
Epoch 2/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 35ms/step - accuracy: 0.4726 - loss: 1.5025 - val_accuracy: 0.5068 - val_loss: 1.4143 - learning_rate: 9.0250e-04
Epoch 3/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.5392 - loss: 1.3200 - val_accuracy: 0.5714 - val_loss: 1.2448 - learning_rate: 8.5737e-04
Epoch 4/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.5868 - loss: 1.1843 - val_accuracy: 0.5974 - val_loss: 1.1700 - learning_rate: 8.1451e-04
Epoch 5/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.6152 - loss: 1.1007 - val_accuracy: 0.6038 - val_loss: 1.16

Training candidate model for up to 220 epochs with early stopping...
Epoch 1/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.2931 - loss: 5.0203 - val_accuracy: 0.2106 - val_loss: 2.3539 - learning_rate: 9.5000e-04
Epoch 2/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 42ms/step - accuracy: 0.4668 - loss: 1.5475 - val_accuracy: 0.5596 - val_loss: 1.2829 - learning_rate: 9.0250e-04
Epoch 3/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.5459 - loss: 1.3050 - val_accuracy: 0.5672 - val_loss: 1.2433 - learning_rate: 8.5737e-04
Epoch 4/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.5955 - loss: 1.1607 - val_accuracy: 0.6472 - val_loss: 1.0526 - learning_rate: 8.1451e-04
Epoch 5/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.6247 - loss: 1.0818 - val_accuracy: 0.6604 - val_loss: 1.02

Training candidate model for up to 220 epochs with early stopping...
Epoch 1/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 17ms/step - accuracy: 0.3441 - loss: 5.3717 - val_accuracy: 0.4304 - val_loss: 1.9595 - learning_rate: 9.5000e-04
Epoch 2/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 29ms/step - accuracy: 0.5059 - loss: 1.4414 - val_accuracy: 0.4790 - val_loss: 1.5192 - learning_rate: 9.0250e-04
Epoch 3/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5444 - loss: 1.3254 - val_accuracy: 0.5010 - val_loss: 1.4486 - learning_rate: 8.5737e-04
Epoch 4/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5641 - loss: 1.2677 - val_accuracy: 0.5328 - val_loss: 1.3869 - learning_rate: 8.1451e-04
Epoch 5/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5847 - loss: 1.2204 - val_accuracy: 0.5216 - val_loss: 1.44

Training candidate model for up to 220 epochs with early stopping...
Epoch 1/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 17ms/step - accuracy: 0.3564 - loss: 1.9484 - val_accuracy: 0.1058 - val_loss: 2.9342 - learning_rate: 9.5000e-04
Epoch 2/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 32ms/step - accuracy: 0.5147 - loss: 1.3722 - val_accuracy: 0.5316 - val_loss: 1.3513 - learning_rate: 9.0250e-04
Epoch 3/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5740 - loss: 1.2186 - val_accuracy: 0.6030 - val_loss: 1.1513 - learning_rate: 8.5737e-04
Epoch 4/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.6079 - loss: 1.1264 - val_accuracy: 0.6322 - val_loss: 1.0950 - learning_rate: 8.1451e-04
Epoch 5/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.6304 - loss: 1.0641 - val_accuracy: 0.6652 - val_loss: 1.00

Training candidate model for up to 220 epochs with early stopping...
Epoch 1/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 17ms/step - accuracy: 0.2914 - loss: 4.5692 - val_accuracy: 0.2476 - val_loss: 2.3643 - learning_rate: 9.5000e-04
Epoch 2/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - accuracy: 0.4705 - loss: 1.5323 - val_accuracy: 0.5404 - val_loss: 1.3483 - learning_rate: 9.0250e-04
Epoch 3/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.5329 - loss: 1.3349 - val_accuracy: 0.5590 - val_loss: 1.2790 - learning_rate: 8.5737e-04
Epoch 4/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.5878 - loss: 1.1857 - val_accuracy: 0.5550 - val_loss: 1.2676 - learning_rate: 8.1451e-04
Epoch 5/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.6151 - loss: 1.1124 - val_accuracy: 0.5954 - val_loss: 1.19

Training candidate model for up to 240 epochs with early stopping...
Epoch 1/240
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - accuracy: 0.3490 - loss: 1.9755 - val_accuracy: 0.1998 - val_loss: 2.6234 - learning_rate: 9.5000e-04
Epoch 2/240
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 40ms/step - accuracy: 0.5231 - loss: 1.3556 - val_accuracy: 0.5732 - val_loss: 1.2713 - learning_rate: 9.0250e-04
Epoch 3/240
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.5985 - loss: 1.1612 - val_accuracy: 0.6236 - val_loss: 1.1341 - learning_rate: 8.5737e-04
Epoch 4/240
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.6406 - loss: 1.0483 - val_accuracy: 0.6492 - val_loss: 1.0694 - learning_rate: 8.1451e-04
Epoch 5/240
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.6681 - loss: 0.9787 - val_accuracy: 0.6190 - val_loss: 1.13

Training candidate model for up to 220 epochs with early stopping...
Epoch 1/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 17ms/step - accuracy: 0.2879 - loss: 4.7813 - val_accuracy: 0.1562 - val_loss: 2.7480 - learning_rate: 9.5000e-04
Epoch 2/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - accuracy: 0.4668 - loss: 1.5341 - val_accuracy: 0.5288 - val_loss: 1.3359 - learning_rate: 9.0250e-04
Epoch 3/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.5355 - loss: 1.3229 - val_accuracy: 0.5972 - val_loss: 1.2171 - learning_rate: 8.5737e-04
Epoch 4/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.5844 - loss: 1.1867 - val_accuracy: 0.6248 - val_loss: 1.1318 - learning_rate: 8.1451e-04
Epoch 5/220
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.6207 - loss: 1.0986 - val_accuracy: 0.6320 - val_loss: 1.11

Training candidate model for up to 250 epochs with early stopping...
Epoch 1/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 24ms/step - accuracy: 0.3162 - loss: 3.0297 - val_accuracy: 0.1052 - val_loss: 2.8596 - learning_rate: 9.5000e-04
Epoch 2/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 44ms/step - accuracy: 0.5098 - loss: 1.4272 - val_accuracy: 0.6126 - val_loss: 1.1709 - learning_rate: 9.0250e-04
Epoch 3/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.6001 - loss: 1.1766 - val_accuracy: 0.6186 - val_loss: 1.1736 - learning_rate: 8.5737e-04
Epoch 4/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.6410 - loss: 1.0633 - val_accuracy: 0.7128 - val_loss: 0.9246 - learning_rate: 8.1451e-04
Epoch 5/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.6733 - loss: 0.9686 - val_accuracy: 0.7114 - val_loss: 0.91

Training candidate model for up to 250 epochs with early stopping...
Epoch 1/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.3190 - loss: 2.7814 - val_accuracy: 0.1280 - val_loss: 3.1287 - learning_rate: 9.5000e-04
Epoch 2/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 44ms/step - accuracy: 0.5176 - loss: 1.3955 - val_accuracy: 0.5518 - val_loss: 1.3281 - learning_rate: 9.0250e-04
Epoch 3/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.6003 - loss: 1.1683 - val_accuracy: 0.6436 - val_loss: 1.0698 - learning_rate: 8.5737e-04
Epoch 4/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.6443 - loss: 1.0493 - val_accuracy: 0.6654 - val_loss: 1.0082 - learning_rate: 8.1451e-04
Epoch 5/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.6718 - loss: 0.9674 - val_accuracy: 0.6574 - val_loss: 1.01

Training candidate model for up to 270 epochs with early stopping...
Epoch 1/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 26ms/step - accuracy: 0.3817 - loss: 1.9280 - val_accuracy: 0.1364 - val_loss: 3.0743 - learning_rate: 9.5000e-04
Epoch 2/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 47ms/step - accuracy: 0.5984 - loss: 1.2033 - val_accuracy: 0.5906 - val_loss: 1.2432 - learning_rate: 9.0250e-04
Epoch 3/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.6737 - loss: 0.9994 - val_accuracy: 0.6344 - val_loss: 1.1146 - learning_rate: 8.5737e-04
Epoch 4/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.7092 - loss: 0.9001 - val_accuracy: 0.6846 - val_loss: 0.9865 - learning_rate: 8.1451e-04
Epoch 5/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.7405 - loss: 0.8201 - val_accuracy: 0.7278 - val_loss: 0.86

Training candidate model for up to 280 epochs with early stopping...
Epoch 1/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 29ms/step - accuracy: 0.4052 - loss: 1.8102 - val_accuracy: 0.1444 - val_loss: 3.0885 - learning_rate: 9.5000e-04
Epoch 2/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 56ms/step - accuracy: 0.6377 - loss: 1.1203 - val_accuracy: 0.6972 - val_loss: 1.0135 - learning_rate: 9.0250e-04
Epoch 3/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.7163 - loss: 0.9214 - val_accuracy: 0.6072 - val_loss: 1.2915 - learning_rate: 8.5737e-04
Epoch 4/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.7557 - loss: 0.8205 - val_accuracy: 0.7192 - val_loss: 0.9527 - learning_rate: 8.1451e-04
Epoch 5/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.7811 - loss: 0.7557 - val_accuracy: 0.7164 - val_loss: 0.94

Training candidate model for up to 250 epochs with early stopping...
Epoch 1/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 20ms/step - accuracy: 0.2885 - loss: 3.7467 - val_accuracy: 0.1326 - val_loss: 2.9182 - learning_rate: 9.5000e-04
Epoch 2/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 34ms/step - accuracy: 0.4928 - loss: 1.4520 - val_accuracy: 0.4794 - val_loss: 1.4966 - learning_rate: 9.0250e-04
Epoch 3/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5729 - loss: 1.2365 - val_accuracy: 0.5438 - val_loss: 1.2953 - learning_rate: 8.5737e-04
Epoch 4/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.6279 - loss: 1.0951 - val_accuracy: 0.6024 - val_loss: 1.1766 - learning_rate: 8.1451e-04
Epoch 5/250
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.6576 - loss: 1.0077 - val_accuracy: 0.6922 - val_loss: 0.93

Training candidate model for up to 270 epochs with early stopping...
Epoch 1/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 25ms/step - accuracy: 0.3941 - loss: 1.7717 - val_accuracy: 0.1452 - val_loss: 3.0276 - learning_rate: 9.5000e-04
Epoch 2/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 45ms/step - accuracy: 0.5934 - loss: 1.1733 - val_accuracy: 0.5874 - val_loss: 1.2260 - learning_rate: 9.0250e-04
Epoch 3/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.6749 - loss: 0.9803 - val_accuracy: 0.6086 - val_loss: 1.1708 - learning_rate: 8.5737e-04
Epoch 4/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.7087 - loss: 0.8881 - val_accuracy: 0.6814 - val_loss: 0.9914 - learning_rate: 8.1451e-04
Epoch 5/270
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.7365 - loss: 0.8121 - val_accuracy: 0.6968 - val_loss: 0.92

Training candidate model for up to 280 epochs with early stopping...
Epoch 1/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 27ms/step - accuracy: 0.3650 - loss: 1.9832 - val_accuracy: 0.1070 - val_loss: 3.1427 - learning_rate: 9.5000e-04
Epoch 2/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 51ms/step - accuracy: 0.5564 - loss: 1.3006 - val_accuracy: 0.6274 - val_loss: 1.1426 - learning_rate: 9.0250e-04
Epoch 3/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.6381 - loss: 1.0901 - val_accuracy: 0.5040 - val_loss: 1.5413 - learning_rate: 8.5737e-04
Epoch 4/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.6869 - loss: 0.9591 - val_accuracy: 0.6942 - val_loss: 0.9717 - learning_rate: 8.1451e-04
Epoch 5/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.7227 - loss: 0.8660 - val_accuracy: 0.7108 - val_loss: 0.93

Training candidate model for up to 310 epochs with early stopping...
Epoch 1/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 31ms/step - accuracy: 0.3827 - loss: 1.8366 - val_accuracy: 0.0964 - val_loss: 3.8020 - learning_rate: 9.5000e-04
Epoch 2/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 63ms/step - accuracy: 0.6166 - loss: 1.1993 - val_accuracy: 0.6108 - val_loss: 1.3101 - learning_rate: 9.0250e-04
Epoch 3/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7012 - loss: 0.9937 - val_accuracy: 0.6166 - val_loss: 1.3486 - learning_rate: 8.5737e-04
Epoch 4/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7453 - loss: 0.8826 - val_accuracy: 0.7242 - val_loss: 0.9346 - learning_rate: 8.1451e-04
Epoch 5/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7712 - loss: 0.8097 - val_accuracy: 0.7162 - val_loss: 1.02

Training candidate model for up to 310 epochs with early stopping...
Epoch 1/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 34ms/step - accuracy: 0.3290 - loss: 1.9369 - val_accuracy: 0.1634 - val_loss: 2.7751 - learning_rate: 9.5000e-04
Epoch 2/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 73ms/step - accuracy: 0.5849 - loss: 1.2801 - val_accuracy: 0.5506 - val_loss: 1.4247 - learning_rate: 9.0250e-04
Epoch 3/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.6775 - loss: 1.0558 - val_accuracy: 0.7586 - val_loss: 0.8685 - learning_rate: 8.5737e-04
Epoch 4/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.7262 - loss: 0.9332 - val_accuracy: 0.7100 - val_loss: 0.9892 - learning_rate: 8.1451e-04
Epoch 5/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.7619 - loss: 0.8531 - val_accuracy: 0.7376 - val_loss: 0.94

Training candidate model for up to 280 epochs with early stopping...
Epoch 1/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 25ms/step - accuracy: 0.4140 - loss: 1.7843 - val_accuracy: 0.1792 - val_loss: 2.9903 - learning_rate: 9.5000e-04
Epoch 2/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 46ms/step - accuracy: 0.6399 - loss: 1.1172 - val_accuracy: 0.5782 - val_loss: 1.3072 - learning_rate: 9.0250e-04
Epoch 3/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.7148 - loss: 0.9201 - val_accuracy: 0.6690 - val_loss: 1.0469 - learning_rate: 8.5737e-04
Epoch 4/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.7536 - loss: 0.8205 - val_accuracy: 0.6980 - val_loss: 0.9837 - learning_rate: 8.1451e-04
Epoch 5/280
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.7808 - loss: 0.7580 - val_accuracy: 0.7330 - val_loss: 0.90

Training candidate model for up to 300 epochs with early stopping...
Epoch 1/300
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 31ms/step - accuracy: 0.3946 - loss: 1.8164 - val_accuracy: 0.2628 - val_loss: 2.7353 - learning_rate: 9.5000e-04
Epoch 2/300
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 65ms/step - accuracy: 0.6333 - loss: 1.1853 - val_accuracy: 0.6188 - val_loss: 1.2706 - learning_rate: 9.0250e-04
Epoch 3/300
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.7092 - loss: 0.9837 - val_accuracy: 0.6458 - val_loss: 1.2280 - learning_rate: 8.5737e-04
Epoch 4/300
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.7475 - loss: 0.8817 - val_accuracy: 0.7212 - val_loss: 0.9883 - learning_rate: 8.1451e-04
Epoch 5/300
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.7812 - loss: 0.8014 - val_accuracy: 0.7722 - val_loss: 0.84

Training candidate model for up to 310 epochs with early stopping...
Epoch 1/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 29ms/step - accuracy: 0.4000 - loss: 1.7637 - val_accuracy: 0.2254 - val_loss: 2.8706 - learning_rate: 9.5000e-04
Epoch 2/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 58ms/step - accuracy: 0.6377 - loss: 1.1441 - val_accuracy: 0.6658 - val_loss: 1.1031 - learning_rate: 9.0250e-04
Epoch 3/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7159 - loss: 0.9444 - val_accuracy: 0.7110 - val_loss: 1.0123 - learning_rate: 8.5737e-04
Epoch 4/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7589 - loss: 0.8470 - val_accuracy: 0.7896 - val_loss: 0.7684 - learning_rate: 8.1451e-04
Epoch 5/310
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7896 - loss: 0.7656 - val_accuracy: 0.7916 - val_loss: 0.77

Training candidate model for up to 340 epochs with early stopping...
Epoch 1/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 32ms/step - accuracy: 0.3596 - loss: 1.8623 - val_accuracy: 0.1780 - val_loss: 3.1354 - learning_rate: 9.5000e-04
Epoch 2/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 66ms/step - accuracy: 0.6057 - loss: 1.2267 - val_accuracy: 0.6054 - val_loss: 1.2695 - learning_rate: 9.0250e-04
Epoch 3/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.6927 - loss: 1.0094 - val_accuracy: 0.6784 - val_loss: 1.0637 - learning_rate: 8.5737e-04
Epoch 4/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7386 - loss: 0.8930 - val_accuracy: 0.7446 - val_loss: 0.8816 - learning_rate: 8.1451e-04
Epoch 5/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.7666 - loss: 0.8225 - val_accuracy: 0.6744 - val_loss: 1.12

Training candidate model for up to 340 epochs with early stopping...
Epoch 1/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 40ms/step - accuracy: 0.2469 - loss: 2.2422 - val_accuracy: 0.1216 - val_loss: 2.9056 - learning_rate: 9.5000e-04
Epoch 2/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 92ms/step - accuracy: 0.4915 - loss: 1.5585 - val_accuracy: 0.4050 - val_loss: 1.8243 - learning_rate: 9.0250e-04
Epoch 3/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 24ms/step - accuracy: 0.6027 - loss: 1.2810 - val_accuracy: 0.6542 - val_loss: 1.1410 - learning_rate: 8.5737e-04
Epoch 4/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 24ms/step - accuracy: 0.6664 - loss: 1.1262 - val_accuracy: 0.4886 - val_loss: 1.6771 - learning_rate: 8.1451e-04
Epoch 5/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 24ms/step - accuracy: 0.7063 - loss: 1.0302 - val_accuracy: 0.6834 - val_loss: 1.13

Training candidate model for up to 330 epochs with early stopping...
Epoch 1/330
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 36ms/step - accuracy: 0.3194 - loss: 2.0265 - val_accuracy: 0.1428 - val_loss: 3.2531 - learning_rate: 9.5000e-04
Epoch 2/330
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 80ms/step - accuracy: 0.5614 - loss: 1.3595 - val_accuracy: 0.6026 - val_loss: 1.3066 - learning_rate: 9.0250e-04
Epoch 3/330
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6587 - loss: 1.1115 - val_accuracy: 0.6286 - val_loss: 1.2222 - learning_rate: 8.5737e-04
Epoch 4/330
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7097 - loss: 0.9806 - val_accuracy: 0.7074 - val_loss: 1.0404 - learning_rate: 8.1451e-04
Epoch 5/330
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7442 - loss: 0.8987 - val_accuracy: 0.7566 - val_loss: 0.86

Training candidate model for up to 340 epochs with early stopping...
Epoch 1/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 35ms/step - accuracy: 0.3363 - loss: 1.9527 - val_accuracy: 0.1490 - val_loss: 3.4363 - learning_rate: 9.5000e-04
Epoch 2/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 74ms/step - accuracy: 0.5845 - loss: 1.2885 - val_accuracy: 0.5334 - val_loss: 1.5988 - learning_rate: 9.0250e-04
Epoch 3/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6840 - loss: 1.0485 - val_accuracy: 0.6310 - val_loss: 1.2266 - learning_rate: 8.5737e-04
Epoch 4/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7306 - loss: 0.9266 - val_accuracy: 0.6918 - val_loss: 1.0532 - learning_rate: 8.1451e-04
Epoch 5/340
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7569 - loss: 0.8552 - val_accuracy: 0.6326 - val_loss: 1.25

Training candidate model for up to 320 epochs with early stopping...
Epoch 1/320
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 32ms/step - accuracy: 0.3526 - loss: 1.8911 - val_accuracy: 0.1554 - val_loss: 3.0781 - learning_rate: 9.5000e-04
Epoch 2/320
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 64ms/step - accuracy: 0.6016 - loss: 1.2361 - val_accuracy: 0.6120 - val_loss: 1.2494 - learning_rate: 9.0250e-04
Epoch 3/320
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.6949 - loss: 1.0084 - val_accuracy: 0.6866 - val_loss: 1.0494 - learning_rate: 8.5737e-04
Epoch 4/320
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7371 - loss: 0.8989 - val_accuracy: 0.7176 - val_loss: 0.9770 - learning_rate: 8.1451e-04
Epoch 5/320
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7708 - loss: 0.8262 - val_accuracy: 0.7052 - val_loss: 1.06

Training candidate model for up to 360 epochs with early stopping...
Epoch 1/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 33ms/step - accuracy: 0.3177 - loss: 2.0342 - val_accuracy: 0.1186 - val_loss: 2.9956 - learning_rate: 9.5000e-04
Epoch 2/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 72ms/step - accuracy: 0.5560 - loss: 1.3684 - val_accuracy: 0.6064 - val_loss: 1.2452 - learning_rate: 9.0250e-04
Epoch 3/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6612 - loss: 1.1142 - val_accuracy: 0.6408 - val_loss: 1.1989 - learning_rate: 8.5737e-04
Epoch 4/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7129 - loss: 0.9847 - val_accuracy: 0.7202 - val_loss: 0.9711 - learning_rate: 8.1451e-04
Epoch 5/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7449 - loss: 0.9039 - val_accuracy: 0.6578 - val_loss: 1.17

Training candidate model for up to 360 epochs with early stopping...
Epoch 1/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 33ms/step - accuracy: 0.3384 - loss: 1.9325 - val_accuracy: 0.1024 - val_loss: 3.4823 - learning_rate: 9.5000e-04
Epoch 2/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 69ms/step - accuracy: 0.5872 - loss: 1.2846 - val_accuracy: 0.6390 - val_loss: 1.1607 - learning_rate: 9.0250e-04
Epoch 3/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6815 - loss: 1.0484 - val_accuracy: 0.6586 - val_loss: 1.1350 - learning_rate: 8.5737e-04
Epoch 4/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7214 - loss: 0.9415 - val_accuracy: 0.7370 - val_loss: 0.9153 - learning_rate: 8.1451e-04
Epoch 5/360
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7565 - loss: 0.8594 - val_accuracy: 0.7466 - val_loss: 0.90

Training candidate model for up to 370 epochs with early stopping...
Epoch 1/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 36ms/step - accuracy: 0.3106 - loss: 2.0192 - val_accuracy: 0.0962 - val_loss: 3.9399 - learning_rate: 9.5000e-04
Epoch 2/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 81ms/step - accuracy: 0.5463 - loss: 1.3922 - val_accuracy: 0.5440 - val_loss: 1.4407 - learning_rate: 9.0250e-04
Epoch 3/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6475 - loss: 1.1456 - val_accuracy: 0.6120 - val_loss: 1.2416 - learning_rate: 8.5737e-04
Epoch 4/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7029 - loss: 1.0073 - val_accuracy: 0.7124 - val_loss: 0.9881 - learning_rate: 8.1451e-04
Epoch 5/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.7404 - loss: 0.9221 - val_accuracy: 0.6734 - val_loss: 1.17

Training candidate model for up to 350 epochs with early stopping...
Epoch 1/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 36ms/step - accuracy: 0.3193 - loss: 2.0311 - val_accuracy: 0.1374 - val_loss: 3.0542 - learning_rate: 9.5000e-04
Epoch 2/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 79ms/step - accuracy: 0.5623 - loss: 1.3574 - val_accuracy: 0.6306 - val_loss: 1.2174 - learning_rate: 9.0250e-04
Epoch 3/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6615 - loss: 1.1128 - val_accuracy: 0.6460 - val_loss: 1.1642 - learning_rate: 8.5737e-04
Epoch 4/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7187 - loss: 0.9770 - val_accuracy: 0.5974 - val_loss: 1.3756 - learning_rate: 8.1451e-04
Epoch 5/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7521 - loss: 0.8880 - val_accuracy: 0.6944 - val_loss: 1.05

Training candidate model for up to 350 epochs with early stopping...
Epoch 1/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 32ms/step - accuracy: 0.3329 - loss: 1.9368 - val_accuracy: 0.1854 - val_loss: 2.9481 - learning_rate: 9.5000e-04
Epoch 2/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 68ms/step - accuracy: 0.5756 - loss: 1.2970 - val_accuracy: 0.5510 - val_loss: 1.5234 - learning_rate: 9.0250e-04
Epoch 3/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.6741 - loss: 1.0622 - val_accuracy: 0.6830 - val_loss: 1.0982 - learning_rate: 8.5737e-04
Epoch 4/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7250 - loss: 0.9353 - val_accuracy: 0.6884 - val_loss: 1.0902 - learning_rate: 8.1451e-04
Epoch 5/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7542 - loss: 0.8630 - val_accuracy: 0.7416 - val_loss: 0.89

Training candidate model for up to 350 epochs with early stopping...
Epoch 1/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 30ms/step - accuracy: 0.3343 - loss: 1.9447 - val_accuracy: 0.1220 - val_loss: 3.2089 - learning_rate: 9.5000e-04
Epoch 2/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 64ms/step - accuracy: 0.5809 - loss: 1.2921 - val_accuracy: 0.5988 - val_loss: 1.2884 - learning_rate: 9.0250e-04
Epoch 3/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.6814 - loss: 1.0457 - val_accuracy: 0.6796 - val_loss: 1.0705 - learning_rate: 8.5737e-04
Epoch 4/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7329 - loss: 0.9193 - val_accuracy: 0.7154 - val_loss: 0.9788 - learning_rate: 8.1451e-04
Epoch 5/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7599 - loss: 0.8539 - val_accuracy: 0.6546 - val_loss: 1.25

Training candidate model for up to 380 epochs with early stopping...
Epoch 1/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 48ms/step - accuracy: 0.2187 - loss: 2.3233 - val_accuracy: 0.0970 - val_loss: 3.0518 - learning_rate: 9.5000e-04
Epoch 2/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 97ms/step - accuracy: 0.4471 - loss: 1.6490 - val_accuracy: 0.2938 - val_loss: 2.6469 - learning_rate: 9.0250e-04
Epoch 3/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 24ms/step - accuracy: 0.5584 - loss: 1.3881 - val_accuracy: 0.4722 - val_loss: 1.6848 - learning_rate: 8.5737e-04
Epoch 4/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 24ms/step - accuracy: 0.6231 - loss: 1.2219 - val_accuracy: 0.5796 - val_loss: 1.3783 - learning_rate: 8.1451e-04
Epoch 5/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 24ms/step - accuracy: 0.6680 - loss: 1.1101 - val_accuracy: 0.6484 - val_loss: 1.16

Training candidate model for up to 380 epochs with early stopping...
Epoch 1/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 36ms/step - accuracy: 0.2891 - loss: 2.0668 - val_accuracy: 0.1062 - val_loss: 3.4912 - learning_rate: 9.5000e-04
Epoch 2/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 80ms/step - accuracy: 0.5336 - loss: 1.4168 - val_accuracy: 0.4138 - val_loss: 2.0340 - learning_rate: 9.0250e-04
Epoch 3/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6364 - loss: 1.1594 - val_accuracy: 0.6408 - val_loss: 1.1710 - learning_rate: 8.5737e-04
Epoch 4/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7005 - loss: 1.0194 - val_accuracy: 0.6418 - val_loss: 1.2464 - learning_rate: 8.1451e-04
Epoch 5/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7309 - loss: 0.9313 - val_accuracy: 0.7406 - val_loss: 0.92

Training candidate model for up to 380 epochs with early stopping...
Epoch 1/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 39ms/step - accuracy: 0.2725 - loss: 2.1365 - val_accuracy: 0.1190 - val_loss: 3.3368 - learning_rate: 9.5000e-04
Epoch 2/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 88ms/step - accuracy: 0.5127 - loss: 1.4845 - val_accuracy: 0.3360 - val_loss: 2.3454 - learning_rate: 9.0250e-04
Epoch 3/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6111 - loss: 1.2439 - val_accuracy: 0.6066 - val_loss: 1.2878 - learning_rate: 8.5737e-04
Epoch 4/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6744 - loss: 1.0863 - val_accuracy: 0.6352 - val_loss: 1.2081 - learning_rate: 8.1451e-04
Epoch 5/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.7125 - loss: 0.9975 - val_accuracy: 0.6842 - val_loss: 1.08

Training candidate model for up to 350 epochs with early stopping...
Epoch 1/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 31ms/step - accuracy: 0.3392 - loss: 1.9214 - val_accuracy: 0.0962 - val_loss: 3.8406 - learning_rate: 9.5000e-04
Epoch 2/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 64ms/step - accuracy: 0.5846 - loss: 1.2763 - val_accuracy: 0.5606 - val_loss: 1.4349 - learning_rate: 9.0250e-04
Epoch 3/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.6794 - loss: 1.0521 - val_accuracy: 0.6416 - val_loss: 1.1964 - learning_rate: 8.5737e-04
Epoch 4/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7279 - loss: 0.9224 - val_accuracy: 0.6928 - val_loss: 1.0755 - learning_rate: 8.1451e-04
Epoch 5/350
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7588 - loss: 0.8535 - val_accuracy: 0.7460 - val_loss: 0.88

Training candidate model for up to 370 epochs with early stopping...
Epoch 1/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 36ms/step - accuracy: 0.2721 - loss: 2.1371 - val_accuracy: 0.0986 - val_loss: 3.7274 - learning_rate: 9.5000e-04
Epoch 2/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 81ms/step - accuracy: 0.5011 - loss: 1.5053 - val_accuracy: 0.5166 - val_loss: 1.4817 - learning_rate: 9.0250e-04
Epoch 3/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6197 - loss: 1.2192 - val_accuracy: 0.5610 - val_loss: 1.4115 - learning_rate: 8.5737e-04
Epoch 4/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6834 - loss: 1.0724 - val_accuracy: 0.6772 - val_loss: 1.0902 - learning_rate: 8.1451e-04
Epoch 5/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7256 - loss: 0.9664 - val_accuracy: 0.6430 - val_loss: 1.24

Training candidate model for up to 370 epochs with early stopping...
Epoch 1/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 34ms/step - accuracy: 0.3204 - loss: 1.9906 - val_accuracy: 0.1232 - val_loss: 3.6484 - learning_rate: 9.5000e-04
Epoch 2/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 75ms/step - accuracy: 0.5549 - loss: 1.3528 - val_accuracy: 0.4736 - val_loss: 1.6199 - learning_rate: 9.0250e-04
Epoch 3/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6558 - loss: 1.1060 - val_accuracy: 0.6412 - val_loss: 1.1475 - learning_rate: 8.5737e-04
Epoch 4/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7022 - loss: 0.9889 - val_accuracy: 0.6706 - val_loss: 1.0631 - learning_rate: 8.1451e-04
Epoch 5/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7453 - loss: 0.8937 - val_accuracy: 0.7004 - val_loss: 1.02

Training candidate model for up to 380 epochs with early stopping...
Epoch 1/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 34ms/step - accuracy: 0.3237 - loss: 1.9905 - val_accuracy: 0.1622 - val_loss: 2.8908 - learning_rate: 9.5000e-04
Epoch 2/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 71ms/step - accuracy: 0.5617 - loss: 1.3402 - val_accuracy: 0.5812 - val_loss: 1.2638 - learning_rate: 9.0250e-04
Epoch 3/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.6614 - loss: 1.0973 - val_accuracy: 0.6804 - val_loss: 1.0904 - learning_rate: 8.5737e-04
Epoch 4/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7171 - loss: 0.9672 - val_accuracy: 0.7410 - val_loss: 0.8832 - learning_rate: 8.1451e-04
Epoch 5/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.7457 - loss: 0.8921 - val_accuracy: 0.7412 - val_loss: 0.89

Training candidate model for up to 370 epochs with early stopping...
Epoch 1/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 37ms/step - accuracy: 0.3049 - loss: 2.0669 - val_accuracy: 0.1074 - val_loss: 3.3778 - learning_rate: 9.5000e-04
Epoch 2/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 81ms/step - accuracy: 0.5414 - loss: 1.4049 - val_accuracy: 0.5770 - val_loss: 1.3402 - learning_rate: 9.0250e-04
Epoch 3/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6448 - loss: 1.1508 - val_accuracy: 0.5380 - val_loss: 1.5609 - learning_rate: 8.5737e-04
Epoch 4/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6997 - loss: 1.0216 - val_accuracy: 0.5684 - val_loss: 1.5699 - learning_rate: 8.1451e-04
Epoch 5/370
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7358 - loss: 0.9297 - val_accuracy: 0.7558 - val_loss: 0.86

Training candidate model for up to 380 epochs with early stopping...
Epoch 1/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 47ms/step - accuracy: 0.2209 - loss: 2.3241 - val_accuracy: 0.1054 - val_loss: 3.2250 - learning_rate: 9.5000e-04
Epoch 2/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 99ms/step - accuracy: 0.4517 - loss: 1.6712 - val_accuracy: 0.5092 - val_loss: 1.5652 - learning_rate: 9.0250e-04
Epoch 3/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - accuracy: 0.5660 - loss: 1.3969 - val_accuracy: 0.5560 - val_loss: 1.4573 - learning_rate: 8.5737e-04
Epoch 4/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - accuracy: 0.6407 - loss: 1.2168 - val_accuracy: 0.5828 - val_loss: 1.4403 - learning_rate: 8.1451e-04
Epoch 5/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 24ms/step - accuracy: 0.6820 - loss: 1.1072 - val_accuracy: 0.6962 - val_loss: 1.08

Training candidate model for up to 410 epochs with early stopping...
Epoch 1/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 45ms/step - accuracy: 0.2746 - loss: 2.1338 - val_accuracy: 0.0962 - val_loss: 3.4238 - learning_rate: 9.5000e-04
Epoch 2/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 88ms/step - accuracy: 0.5218 - loss: 1.4607 - val_accuracy: 0.4466 - val_loss: 1.6718 - learning_rate: 9.0250e-04
Epoch 3/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6342 - loss: 1.1925 - val_accuracy: 0.6046 - val_loss: 1.2992 - learning_rate: 8.5737e-04
Epoch 4/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6878 - loss: 1.0599 - val_accuracy: 0.6450 - val_loss: 1.1847 - learning_rate: 8.1451e-04
Epoch 5/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.7247 - loss: 0.9650 - val_accuracy: 0.6926 - val_loss: 1.03

Training candidate model for up to 400 epochs with early stopping...
Epoch 1/400
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 37ms/step - accuracy: 0.2695 - loss: 2.1460 - val_accuracy: 0.1236 - val_loss: 3.0926 - learning_rate: 9.5000e-04
Epoch 2/400
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 85ms/step - accuracy: 0.5175 - loss: 1.4746 - val_accuracy: 0.5258 - val_loss: 1.4509 - learning_rate: 9.0250e-04
Epoch 3/400
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6257 - loss: 1.2153 - val_accuracy: 0.6206 - val_loss: 1.2968 - learning_rate: 8.5737e-04
Epoch 4/400
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6812 - loss: 1.0717 - val_accuracy: 0.6920 - val_loss: 1.0621 - learning_rate: 8.1451e-04
Epoch 5/400
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.7204 - loss: 0.9796 - val_accuracy: 0.7370 - val_loss: 0.92

Training candidate model for up to 380 epochs with early stopping...
Epoch 1/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 32ms/step - accuracy: 0.3231 - loss: 1.9857 - val_accuracy: 0.1128 - val_loss: 3.0232 - learning_rate: 9.5000e-04
Epoch 2/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 68ms/step - accuracy: 0.5560 - loss: 1.3612 - val_accuracy: 0.6150 - val_loss: 1.2297 - learning_rate: 9.0250e-04
Epoch 3/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6491 - loss: 1.1251 - val_accuracy: 0.6300 - val_loss: 1.2173 - learning_rate: 8.5737e-04
Epoch 4/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7045 - loss: 0.9902 - val_accuracy: 0.6604 - val_loss: 1.1392 - learning_rate: 8.1451e-04
Epoch 5/380
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7407 - loss: 0.9033 - val_accuracy: 0.7488 - val_loss: 0.89

Training candidate model for up to 410 epochs with early stopping...
Epoch 1/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 37ms/step - accuracy: 0.3021 - loss: 2.0407 - val_accuracy: 0.1070 - val_loss: 3.4493 - learning_rate: 9.5000e-04
Epoch 2/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 80ms/step - accuracy: 0.5477 - loss: 1.3887 - val_accuracy: 0.5836 - val_loss: 1.2940 - learning_rate: 9.0250e-04
Epoch 3/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6470 - loss: 1.1384 - val_accuracy: 0.6112 - val_loss: 1.2232 - learning_rate: 8.5737e-04
Epoch 4/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.7043 - loss: 1.0038 - val_accuracy: 0.7364 - val_loss: 0.9126 - learning_rate: 8.1451e-04
Epoch 5/410
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7328 - loss: 0.9214 - val_accuracy: 0.7040 - val_loss: 1.04

Training candidate model for up to 390 epochs with early stopping...
Epoch 1/390
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 35ms/step - accuracy: 0.2883 - loss: 2.0747 - val_accuracy: 0.1360 - val_loss: 3.0777 - learning_rate: 9.5000e-04
Epoch 2/390
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 78ms/step - accuracy: 0.5330 - loss: 1.4180 - val_accuracy: 0.4790 - val_loss: 1.6324 - learning_rate: 9.0250e-04
Epoch 3/390
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.6396 - loss: 1.1643 - val_accuracy: 0.6616 - val_loss: 1.1115 - learning_rate: 8.5737e-04
Epoch 4/390
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6973 - loss: 1.0215 - val_accuracy: 0.7122 - val_loss: 0.9674 - learning_rate: 8.1451e-04
Epoch 5/390
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.7336 - loss: 0.9323 - val_accuracy: 0.7364 - val_loss: 0.93

Training candidate model for up to 440 epochs with early stopping...
Epoch 1/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 50ms/step - accuracy: 0.2091 - loss: 2.3279 - val_accuracy: 0.1018 - val_loss: 2.7732 - learning_rate: 9.5000e-04
Epoch 2/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 105ms/step - accuracy: 0.4298 - loss: 1.6856 - val_accuracy: 0.4934 - val_loss: 1.5962 - learning_rate: 9.0250e-04
Epoch 3/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - accuracy: 0.5481 - loss: 1.4161 - val_accuracy: 0.5484 - val_loss: 1.4655 - learning_rate: 8.5737e-04
Epoch 4/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - accuracy: 0.6209 - loss: 1.2493 - val_accuracy: 0.6548 - val_loss: 1.1350 - learning_rate: 8.1451e-04
Epoch 5/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - accuracy: 0.6655 - loss: 1.1375 - val_accuracy: 0.5910 - val_loss: 1.3

Training candidate model for up to 440 epochs with early stopping...
Epoch 1/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 37ms/step - accuracy: 0.2353 - loss: 2.2182 - val_accuracy: 0.0990 - val_loss: 3.2375 - learning_rate: 9.5000e-04
Epoch 2/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 85ms/step - accuracy: 0.4800 - loss: 1.5502 - val_accuracy: 0.4286 - val_loss: 1.8947 - learning_rate: 9.0250e-04
Epoch 3/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.5906 - loss: 1.2879 - val_accuracy: 0.5862 - val_loss: 1.3742 - learning_rate: 8.5737e-04
Epoch 4/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6559 - loss: 1.1354 - val_accuracy: 0.6340 - val_loss: 1.2114 - learning_rate: 8.1451e-04
Epoch 5/440
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.6956 - loss: 1.0360 - val_accuracy: 0.7052 - val_loss: 1.00