# The Sequential model

**Author:** [fchollet](https://twitter.com/fchollet)<br>
**Date created:** 2020/04/12<br>
**Last modified:** 2023/06/25<br>
**Description:** Complete guide to the Sequential model that does not work. The outcome is because the test images were combined with the training images, causing false positives.

## Setup

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence, to_categorical
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint

model = None

In [16]:

class SimpleCIFAR10DataGenerator(Sequence):
    def __init__(self, images, labels, batch_size=32, num_classes=10, shuffle=True):
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.indices = np.arange(len(self.images))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.images) / self.batch_size))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_images = self.images[batch_indices]
        batch_labels = self.labels[batch_indices]

        batch_labels_onehot = to_categorical(batch_labels, num_classes=self.num_classes)
        return batch_images, batch_labels_onehot

def create_large_model(num_classes=10):
    rgb_input = layers.Input(shape=(32, 32, 3), name="rgb_input")

    def alpha_output_shape(input_shape):
        return (input_shape[0], input_shape[1], input_shape[2], 1)
    alpha = layers.Lambda(lambda x: tf.ones_like(x[..., :1]),
                          output_shape=alpha_output_shape, name="alpha_creation")(rgb_input)
    rgba = layers.Concatenate(axis=-1, name="rgba_concat")([rgb_input, alpha])

    def pair_output_shape(input_shape):
        return (input_shape[0], input_shape[1], input_shape[2], 2)
    def triplet_output_shape(input_shape):
        return (input_shape[0], input_shape[1], input_shape[2], 3)
    def quad_output_shape(input_shape):
        return (input_shape[0], input_shape[1], input_shape[2], 4)

    # For pairs that do not reorder channels, direct indexing might still fail, so let's use tf.gather everywhere for consistency:
    # rg: [0,1]
    rg = layers.Lambda(lambda x: tf.gather(x, [0,1], axis=-1), output_shape=pair_output_shape, name="rg_extract")(rgba)
    rb = layers.Lambda(lambda x: tf.gather(x, [0,2], axis=-1), output_shape=pair_output_shape, name="rb_extract")(rgba)
    gb = layers.Lambda(lambda x: tf.gather(x, [1,2], axis=-1), output_shape=pair_output_shape, name="gb_extract")(rgba)
    ra = layers.Lambda(lambda x: tf.gather(x, [0,3], axis=-1), output_shape=pair_output_shape, name="ra_extract")(rgba)
    ga = layers.Lambda(lambda x: tf.gather(x, [1,3], axis=-1), output_shape=pair_output_shape, name="ga_extract")(rgba)
    ba = layers.Lambda(lambda x: tf.gather(x, [2,3], axis=-1), output_shape=pair_output_shape, name="ba_extract")(rgba)
    rba= layers.Lambda(lambda x: tf.gather(x, [0,2,3], axis=-1), output_shape=triplet_output_shape, name="rba_extract")(rgba)
    rga= layers.Lambda(lambda x: tf.gather(x, [0,1,3], axis=-1), output_shape=triplet_output_shape, name="rga_extract")(rgba)
    bga= layers.Lambda(lambda x: tf.gather(x, [2,1,3], axis=-1), output_shape=triplet_output_shape, name="bga_extract")(rgba)
    rgb= layers.Lambda(lambda x: tf.gather(x, [0,1,2,3], axis=-1), output_shape=quad_output_shape, name="rgba_extract")(rgba)

    # Flatten all
    rg_flat = layers.Flatten(name="rg_flat")(rg)
    rb_flat = layers.Flatten(name="rb_flat")(rb)
    gb_flat = layers.Flatten(name="gb_flat")(gb)
    ra_flat = layers.Flatten(name="ra_flat")(ra)
    ga_flat = layers.Flatten(name="ga_flat")(ga)
    ba_flat = layers.Flatten(name="ba_flat")(ba)
    rba_flat= layers.Flatten(name="rba_flat")(rba)
    rga_flat= layers.Flatten(name="rga_flat")(rga)
    bga_flat= layers.Flatten(name="bga_flat")(bga)
    rgba_flat= layers.Flatten(name="rgba_flat")(rgb)
    rgba_flat= layers.LayerNormalization(name="rgba_norm")(rgba_flat)

    def process_subset(name, input_tensor):
        x = layers.Dense(256, activation='relu', name=f"{name}_dense_1")(input_tensor)
        x = layers.Dense(128, activation='relu', name=f"{name}_dense_2")(x)
        return x

    rg_proc = process_subset("rg", rg_flat)
    rb_proc = process_subset("rb", rb_flat)
    gb_proc = process_subset("gb", gb_flat)
    ra_proc = process_subset("ra", ra_flat)
    ga_proc = process_subset("ga", ga_flat)
    ba_proc = process_subset("ba", ba_flat)
    rba_proc= process_subset("rba", rba_flat)
    rga_proc= process_subset("rga", rga_flat)
    bga_proc= process_subset("bga", bga_flat)
    rgba_proc= process_subset("rgba", rgba_flat)

    concatenated = layers.Concatenate(name="color_concatenation")([
        rg_proc, rb_proc, gb_proc, ra_proc, ga_proc, ba_proc, rba_proc, rga_proc, bga_proc, rgba_flat
    ])

    x = layers.Dense(512, activation='relu', name="final_dense_1")(concatenated)
    x = layers.Dense(256, activation='relu', name="final_dense_2")(x)
    output = layers.Dense(num_classes, activation='softmax', name="output")(x)

    model = models.Model(inputs=rgb_input, outputs=output)
    return model


In [17]:
model = None

In [None]:
# load the model
model = models.load_model("best_model.keras")

In [18]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
images = np.concatenate([train_images, test_images], axis=0)
labels = np.concatenate([train_labels, test_labels], axis=0)


In [19]:
batch_size = 128


train_generator = SimpleCIFAR10DataGenerator(
    images=images,
    labels=labels,
    batch_size=batch_size,
    num_classes=10,
    shuffle=True
)

val_generator = SimpleCIFAR10DataGenerator(
    images=test_images,
    labels=test_labels,
    batch_size=batch_size,
    num_classes=10,
    shuffle=False
)

if model is None:
  print("creating large model")
  model = create_large_model(num_classes=10)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.summary()

# ModelCheckpoint callback: save the best model based on validation loss
checkpoint_callback = ModelCheckpoint(
    filepath='best_model.keras',    # file to save the best model
    monitor='val_loss',          # metric to monitor
    save_best_only=True,         # only save when val_loss improves
    save_weights_only=False,     # save the entire model, not just weights
    mode='min',                  # looking for minimum val_loss
    verbose=1
)
model.summary()
model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=500,
    batch_size=batch_size,
    callbacks=[checkpoint_callback],

    verbose=1
)

#main()


creating large model


Epoch 1/500
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2238 - loss: 56.3663
Epoch 1: val_loss improved from inf to 1.72048, saving model to best_model.keras
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - accuracy: 0.2240 - loss: 56.2779 - val_accuracy: 0.4005 - val_loss: 1.7205
Epoch 2/500
[1m462/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.3881 - loss: 1.7580
Epoch 2: val_loss improved from 1.72048 to 1.61159, saving model to best_model.keras
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.3883 - loss: 1.7572 - val_accuracy: 0.4252 - val_loss: 1.6116
Epoch 3/500
[1m461/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.4273 - loss: 1.6239
Epoch 3: val_loss improved from 1.61159 to 1.54598, saving model to best_model.keras
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - acc

<keras.src.callbacks.history.History at 0x7b0fec3d7b80>

In [26]:
# lets save our model
model.save("color_model.keras")

## When to use a Sequential model

A `Sequential` model is appropriate for **a plain stack of layers**
where each layer has **exactly one input tensor and one output tensor**.

Schematically, the following `Sequential` model:

In [2]:
# Define Sequential model with 3 layers
model = keras.Sequential(
    [
        layers.Dense(2, activation="relu", name="layer1"),
        layers.Dense(3, activation="relu", name="layer2"),
        layers.Dense(4, name="layer3"),
    ]
)
# Call model on a test input
x = ops.ones((3, 3))
y = model(x)

is equivalent to this function:

In [3]:
# Create 3 layers
layer1 = layers.Dense(2, activation="relu", name="layer1")
layer2 = layers.Dense(3, activation="relu", name="layer2")
layer3 = layers.Dense(4, name="layer3")

# Call layers on a test input
x = ops.ones((3, 3))
y = layer3(layer2(layer1(x)))

A Sequential model is **not appropriate** when:

- Your model has multiple inputs or multiple outputs
- Any of your layers has multiple inputs or multiple outputs
- You need to do layer sharing
- You want non-linear topology (e.g. a residual connection, a multi-branch
model)

## Creating a Sequential model

You can create a Sequential model by passing a list of layers to the Sequential
constructor:

In [4]:
model = keras.Sequential(
    [
        layers.Dense(2, activation="relu"),
        layers.Dense(3, activation="relu"),
        layers.Dense(4),
    ]
)

Its layers are accessible via the `layers` attribute:

In [5]:
model.layers

[<Dense name=dense, built=False>,
 <Dense name=dense_1, built=False>,
 <Dense name=dense_2, built=False>]

You can also create a Sequential model incrementally via the `add()` method:

In [6]:
model = keras.Sequential()
model.add(layers.Dense(2, activation="relu"))
model.add(layers.Dense(3, activation="relu"))
model.add(layers.Dense(4))

Note that there's also a corresponding `pop()` method to remove layers:
a Sequential model behaves very much like a list of layers.

In [7]:
model.pop()
print(len(model.layers))  # 2

2


Also note that the Sequential constructor accepts a `name` argument, just like
any layer or model in Keras. This is useful to annotate TensorBoard graphs
with semantically meaningful names.

In [8]:
model = keras.Sequential(name="my_sequential")
model.add(layers.Dense(2, activation="relu", name="layer1"))
model.add(layers.Dense(3, activation="relu", name="layer2"))
model.add(layers.Dense(4, name="layer3"))

## Specifying the input shape in advance

Generally, all layers in Keras need to know the shape of their inputs
in order to be able to create their weights. So when you create a layer like
this, initially, it has no weights:

In [9]:
layer = layers.Dense(3)
layer.weights  # Empty

[]

It creates its weights the first time it is called on an input, since the shape
of the weights depends on the shape of the inputs:

In [10]:
# Call layer on a test input
x = ops.ones((1, 4))
y = layer(x)
layer.weights  # Now it has weights, of shape (4, 3) and (3,)

[<KerasVariable shape=(4, 3), dtype=float32, path=dense_6/kernel>,
 <KerasVariable shape=(3,), dtype=float32, path=dense_6/bias>]

Naturally, this also applies to Sequential models. When you instantiate a
Sequential model without an input shape, it isn't "built": it has no weights
(and calling
`model.weights` results in an error stating just this). The weights are created
when the model first sees some input data:

In [11]:
model = keras.Sequential(
    [
        layers.Dense(2, activation="relu"),
        layers.Dense(3, activation="relu"),
        layers.Dense(4),
    ]
)  # No weights at this stage!

# At this point, you can't do this:
# model.weights

# You also can't do this:
# model.summary()

# Call the model on a test input
x = ops.ones((1, 4))
y = model(x)
print("Number of weights after calling the model:", len(model.weights))  # 6

Number of weights after calling the model: 6


Once a model is "built", you can call its `summary()` method to display its
contents:

In [12]:
model.summary()

However, it can be very useful when building a Sequential model incrementally
to be able to display the summary of the model so far, including the current
output shape. In this case, you should start your model by passing an `Input`
object to your model, so that it knows its input shape from the start:

In [14]:
model = keras.Sequential()
model.add(keras.Input(shape=(4,)))
model.add(layers.Dense(2, activation="relu"))

model.summary()

Note that the `Input` object is not displayed as part of `model.layers`, since
it isn't a layer:

In [15]:
model.layers

[<Dense name=dense_11, built=True>]

Models built with a predefined input shape like this always have weights (even
before seeing any data) and always have a defined output shape.

In general, it's a recommended best practice to always specify the input shape
of a Sequential model in advance if you know what it is.

## A common debugging workflow: `add()` + `summary()`

When building a new Sequential architecture, it's useful to incrementally stack
layers with `add()` and frequently print model summaries. For instance, this
enables you to monitor how a stack of `Conv2D` and `MaxPooling2D` layers is
downsampling image feature maps:

In [1]:
train_images = None
train_captions = None
test_images = None
test_captions = None
train_image_rg = None
train_image_rb = None
train_image_gb = None
train_image_ra = None
train_image_ga = None
train_image_ba = None
train_image_rba = None
train_image_rga = None
train_image_bga = None
classes = [
  "airplane", #0
  "automobile", #1
  "bird", #2
  "cat", #3
  "deer", #4
  "dog", #5
  "frog", #6
  "horse", #7
  "ship", #8
  "truck", #9
  ]

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

def prepare_dataset(image_width, image_height, num_classes=10):
    # Load CIFAR-10 dataset
    (train_images, train_labels), (test_images, test_labels) = keras.datasets.cifar10.load_data()

    # Normalize pixel values to [0, 1]
    train_images = train_images.astype('float32') / 255.0
    test_images = test_images.astype('float32') / 255.0

    # Resize images to (image_width, image_height)
    train_resized = tf.image.resize(train_images, [image_width, image_height]).numpy()
    test_resized = tf.image.resize(test_images, [image_width, image_height]).numpy()

    # Add Alpha channel (set to 1.0 for full opacity)
    def add_alpha_channel(images):
        alpha_channel = np.ones((images.shape[0], image_width, image_height, 1), dtype=np.float32)
        return np.concatenate([images, alpha_channel], axis=-1)

    train_rgba = add_alpha_channel(train_resized)
    test_rgba = add_alpha_channel(test_resized)

    # Create color combination inputs
    # Pair Inputs: (2 channels)
    train_rg = train_rgba[..., [0, 1]]  # Red + Green
    test_rg = test_rgba[..., [0, 1]]

    train_rb = train_rgba[..., [0, 2]]  # Red + Blue
    test_rb = test_rgba[..., [0, 2]]

    train_gb = train_rgba[..., [1, 2]]  # Green + Blue
    test_gb = test_rgba[..., [1, 2]]

    train_ra = train_rgba[..., [0, 3]]  # Red + Alpha
    test_ra = test_rgba[..., [0, 3]]

    train_ga = train_rgba[..., [1, 3]]  # Green + Alpha
    test_ga = test_rgba[..., [1, 3]]

    train_ba = train_rgba[..., [2, 3]]  # Blue + Alpha
    test_ba = test_rgba[..., [2, 3]]

    # Triplet Inputs: (3 channels)
    train_rba = train_rgba[..., [0, 2, 3]]  # Red + Blue + Alpha
    test_rba = test_rgba[..., [0, 2, 3]]

    train_rga = train_rgba[..., [0, 1, 3]]  # Red + Green + Alpha
    test_rga = test_rgba[..., [0, 1, 3]]

    train_bga = train_rgba[..., [2, 1, 3]]  # Blue + Green + Alpha
    test_bga = test_rgba[..., [2, 1, 3]]

    # One-hot encode labels
    train_labels = to_categorical(train_labels, num_classes)
    test_labels = to_categorical(test_labels, num_classes)

    # Organize inputs into dictionaries
    train_inputs = {
        "rgba_input": train_rgba,
        "rg_input": train_rg,
        "rb_input": train_rb,
        "gb_input": train_gb,
        "ra_input": train_ra,
        "ga_input": train_ga,
        "ba_input": train_ba,
        "rba_input": train_rba,
        "rga_input": train_rga,
        "bga_input": train_bga
    }

    test_inputs = {
        "rgba_input": test_rgba,
        "rg_input": test_rg,
        "rb_input": test_rb,
        "gb_input": test_gb,
        "ra_input": test_ra,
        "ga_input": test_ga,
        "ba_input": test_ba,
        "rba_input": test_rba,
        "rga_input": test_rga,
        "bga_input": test_bga
    }

    return train_inputs, train_labels, test_inputs, test_labels


In [3]:
def verify_data_shapes(train_inputs, test_inputs):
    print("Training Inputs Shapes:")
    for key, value in train_inputs.items():
        print(f"{key}: {value.shape}")

    print("\nTesting Inputs Shapes:")
    for key, value in test_inputs.items():
        print(f"{key}: {value.shape}")

In [4]:
def create_parent_model(image_width, image_height, num_classes=10):
    rgba_input = keras.Input(shape=(image_width, image_height, 4), name="rgba_input")

    rg_input = keras.Input(shape=(image_width, image_height, 2), name="rg_input")
    rb_input = keras.Input(shape=(image_width, image_height, 2), name="rb_input")
    gb_input = keras.Input(shape=(image_width, image_height, 2), name="gb_input")

    ra_input = keras.Input(shape=(image_width, image_height, 2), name="ra_input")
    ga_input = keras.Input(shape=(image_width, image_height, 2), name="ga_input")
    ba_input = keras.Input(shape=(image_width, image_height, 2), name="ba_input")

    rga_input = keras.Input(shape=(image_width, image_height, 3), name="rga_input")
    bga_input = keras.Input(shape=(image_width, image_height, 3), name="bga_input")
    rba_input = keras.Input(shape=(image_width, image_height, 3), name="rba_input")
    # Flatten all inputs before passing to Dense layers
    rgba_flat = layers.Flatten()(rgba_input)
    rg_flat = layers.Flatten()(rg_input)
    rb_flat = layers.Flatten()(rb_input)
    gb_flat = layers.Flatten()(gb_input)
    ra_flat = layers.Flatten()(ra_input)
    ga_flat = layers.Flatten()(ga_input)
    ba_flat = layers.Flatten()(ba_input)
    rga_flat = layers.Flatten()(rga_input)
    bga_flat = layers.Flatten()(bga_input)
    rba_flat = layers.Flatten()(rba_input)

    # rgba

    rgba = layers.Dense(1024, activation="relu", name="rgba_dense_1")(rgba_flat)
    rgba = layers.Dense(1024, activation="relu", name="rgba_dense_2")(rgba)

    # red + green
    rg = layers.Dense(1024, activation="relu", name="rg_dense_1")(rg_flat)
    rg = layers.Dense(1024, activation="relu", name="rg_dense_2")(rg)
    # green + blue
    gb = layers.Dense(1024, activation="relu", name="gb_dense_1")(gb_flat)
    gb = layers.Dense(1024, activation="relu", name="gb_dense_2")(gb)
    # red + blue
    rb = layers.Dense(1024, activation="relu", name="rb_dense_1")(rb_flat)
    rb = layers.Dense(1024, activation="relu", name="rb_dense_2")(rb)
    # red + alpha
    ra = layers.Dense(1024, activation="relu", name="ra_dense_1")(ra_flat)
    ra = layers.Dense(1024, activation="relu", name="ra_dense_2")(ra)
    # green + alpha
    ga = layers.Dense(1024, activation="relu", name="ga_dense_1")(ga_flat)
    ga = layers.Dense(1024, activation="relu", name="ga_dense_2")(ga)
    # blue + alpha
    ba = layers.Dense(1024, activation="relu", name="ba_dense_1")(ba_flat)
    ba = layers.Dense(1024, activation="relu", name="ba_dense_2")(ba)
    # red + green + alpha
    rga = layers.Dense(1024, activation="relu", name="rga_dense_1")(rga_flat)
    rga = layers.Dense(1024, activation="relu", name="rga_dense_2")(rga)
    # blue + green + alpha
    bga = layers.Dense(1024, activation="relu", name="bga_dense_1")(bga_flat)
    bga = layers.Dense(1024, activation="relu", name="bga_dense_2")(bga)
    # red + blue + alpha
    rba = layers.Dense(1024, activation="relu", name="rba_dense_1")(rba_flat)
    rba = layers.Dense(1024, activation="relu", name="rba_dense_2")(rba)

    # Concatenate all processed inputs
    color_layers = layers.concatenate([rgba, rg, gb, rb, ra, ga, ba, rga, bga, rba], name="color_concatenation")
    #color_layers = layers.concatenate([rgba, rg, gb, rb, ra, ga, ba], name="color_concatenation")
    color_layers = layers.Flatten()(color_layers)

    # Further Dense layers
    dense_color_1 = layers.Dense(1024, activation="relu", name="dense_color_1")(color_layers)
    dense_color_2 = layers.Dense(512, activation="relu", name="dense_color_2")(dense_color_1)
    dense_color_3 = layers.Dense(256, activation="relu", name="dense_color_3")(dense_color_2)
    dense_color_4 = layers.Dense(128, activation="relu", name="dense_color_4")(dense_color_3)
    dense_color_5 = layers.Dense(64, activation="relu", name="dense_color_5")(dense_color_4)
    dense_color_6 = layers.Dense(32, activation="relu", name="dense_color_6")(dense_color_5)

    # Output layer
    output = layers.Dense(num_classes, activation='softmax', name="output")(dense_color_6)

    # Define the model
    model = keras.Model(
        inputs=[
            rgba_input,
            rg_input, rb_input, gb_input, ra_input,
            ga_input, ba_input,
            rba_input, rga_input, bga_input
        ],
        outputs=[dense_color_6],
        name="color_model"
    )
    model.name = "color_model"
    return model

out_model = create_parent_model(32, 32, 10)
out_model.summary()

In [6]:
from tensorflow.keras.utils import Sequence, to_categorical

class CIFAR10DataGenerator(Sequence):
    def __init__(self, images, labels, image_width, image_height, batch_size=32, num_classes=10, shuffle=True):
        self.images = images
        self.labels = labels
        self.image_width = image_width
        self.image_height = image_height
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.indices = np.arange(len(self.images))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.images) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_images = self.images[batch_indices]
        batch_labels = self.labels[batch_indices]

        # Preprocess the batch
        inputs = self.preprocess_batch(batch_images)

        # One-hot encode labels
        batch_labels = to_categorical(batch_labels, num_classes=self.num_classes)

        return inputs, batch_labels

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def preprocess_batch(self, batch_images):
        # Normalize pixel values to [0, 1]
        batch_images = batch_images.astype('float32') / 255.0

        # Resize images to (32, 32) if not already
        resized = tf.image.resize(batch_images, [self.image_width, self.image_height]).numpy()

        # Add Alpha channel (set to 1.0 for full opacity)
        alpha_channel = np.ones((resized.shape[0], self.image_width, self.image_height, 1), dtype=np.float32)
        rgba = np.concatenate([resized, alpha_channel], axis=-1)  # Shape: (batch_size, 32, 32, 4)

        # Create pair inputs (2 channels)
        rg = rgba[..., [0, 1]]  # Red + Green
        rb = rgba[..., [0, 2]]  # Red + Blue
        gb = rgba[..., [1, 2]]  # Green + Blue
        ra = rgba[..., [0, 3]]  # Red + Alpha
        ga = rgba[..., [1, 3]]  # Green + Alpha
        ba = rgba[..., [2, 3]]  # Blue + Alpha

        # Create triplet inputs (3 channels)
        rba = rgba[..., [0, 2, 3]]  # Red + Blue + Alpha
        rga = rgba[..., [0, 1, 3]]  # Red + Green + Alpha
        bga = rgba[..., [2, 1, 3]]  # Blue + Green + Alpha

        # Organize inputs into a dictionary
        inputs = {
            "rgba_input": rgba,
            "rg_input": rg,
            "rb_input": rb,
            "gb_input": gb,
            "ra_input": ra,
            "ga_input": ga,
            "ba_input": ba,
            "rba_input": rba,
            "rga_input": rga,
            "bga_input": bga
        }

        # Verification: Print shapes (optional, remove after verification)
        # print("Batch Shapes:")
        # for key, value in inputs.items():
        #     print(f"{key}: {value.shape}")

        return inputs

In [5]:
import datetime

# Define summary writers
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = f'logs_custom/train/{current_time}'
val_log_dir = f'logs_custom/val/{current_time}'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
val_summary_writer = tf.summary.create_file_writer(val_log_dir)

for epoch in range(epochs):
    # Reset metrics
    train_loss.reset_states()
    train_accuracy.reset_states()
    val_loss.reset_states()
    val_accuracy.reset_states()

    # Training
    for batch, (inputs, labels) in enumerate(train_generator):
        train_step(inputs, labels)

        # Optional: Print progress every 100 batches
        if batch % 100 == 0:
            print(f"Epoch {epoch+1}, Batch {batch}, Loss: {train_loss.result():.4f}, Accuracy: {train_accuracy.result():.4f}")

    # Validation
    for val_inputs, val_labels in validation_generator:
        val_step(val_inputs, val_labels)

    # Log metrics to TensorBoard
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)

    with val_summary_writer.as_default():
        tf.summary.scalar('loss', val_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', val_accuracy.result(), step=epoch)

    # Print epoch metrics
    template = (
        "Epoch {}/{}\n"
        "Train Loss: {:.4f}, Train Accuracy: {:.4f}\n"
        "Val Loss: {:.4f}, Val Accuracy: {:.4f}"
    )
    print(template.format(
        epoch + 1,
        epochs,
        train_loss.result(),
        train_accuracy.result(),
        val_loss.result(),
        val_accuracy.result()
    ))

    # Early Stopping and Checkpointing
    if val_loss.result() < best_val_loss:
        best_val_loss = val_loss.result()
        wait = 0
        # Save the best model
        model.save('best_color_model.h5')
        print("Model checkpoint saved.")
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping triggered.")
            break


Training Inputs Shapes:
rgba_input: (50000, 32, 32, 4)
rg_input: (50000, 32, 32, 2)
rb_input: (50000, 32, 32, 2)
gb_input: (50000, 32, 32, 2)
ra_input: (50000, 32, 32, 2)
ga_input: (50000, 32, 32, 2)
ba_input: (50000, 32, 32, 2)
rba_input: (50000, 32, 32, 3)
rga_input: (50000, 32, 32, 3)
bga_input: (50000, 32, 32, 3)

Testing Inputs Shapes:
rgba_input: (10000, 32, 32, 4)
rg_input: (10000, 32, 32, 2)
rb_input: (10000, 32, 32, 2)
gb_input: (10000, 32, 32, 2)
ra_input: (10000, 32, 32, 2)
ga_input: (10000, 32, 32, 2)
ba_input: (10000, 32, 32, 2)
rba_input: (10000, 32, 32, 3)
rga_input: (10000, 32, 32, 3)
bga_input: (10000, 32, 32, 3)


Epoch 1/100


ValueError: Exception encountered when calling Functional.call().

[1mInput 0 of layer "ba_dense_1" is incompatible with the layer: expected axis -1 of input shape to have value 2048, but received input with shape (None, 3072)[0m

Arguments received by Functional.call():
  • inputs={'rgba_input': 'tf.Tensor(shape=(None, 32, 32, 4), dtype=float32)', 'rg_input': 'tf.Tensor(shape=(None, 32, 32, 2), dtype=float32)', 'rb_input': 'tf.Tensor(shape=(None, 32, 32, 2), dtype=float32)', 'gb_input': 'tf.Tensor(shape=(None, 32, 32, 2), dtype=float32)', 'ra_input': 'tf.Tensor(shape=(None, 32, 32, 2), dtype=float32)', 'ga_input': 'tf.Tensor(shape=(None, 32, 32, 2), dtype=float32)', 'ba_input': 'tf.Tensor(shape=(None, 32, 32, 2), dtype=float32)', 'rba_input': 'tf.Tensor(shape=(None, 32, 32, 3), dtype=float32)', 'rga_input': 'tf.Tensor(shape=(None, 32, 32, 3), dtype=float32)', 'bga_input': 'tf.Tensor(shape=(None, 32, 32, 3), dtype=float32)'}
  • training=True
  • mask={'rgba_input': 'None', 'rg_input': 'None', 'rb_input': 'None', 'gb_input': 'None', 'ra_input': 'None', 'ga_input': 'None', 'ba_input': 'None', 'rba_input': 'None', 'rga_input': 'None', 'bga_input': 'None'}

Very practical, right?


## What to do once you have a model

Once your model architecture is ready, you will want to:

- Train your model, evaluate it, and run inference. See our
[guide to training & evaluation with the built-in loops](
    /guides/training_with_built_in_methods/)
- Save your model to disk and restore it. See our
[guide to serialization & saving](/guides/serialization_and_saving/).

## Feature extraction with a Sequential model

Once a Sequential model has been built, it behaves like a
[Functional API model](/guides/functional_api/).
This means that every layer has an `input`
and `output` attribute. These attributes can be used to do neat things, like
quickly creating a model that extracts the outputs of all intermediate layers in a
Sequential model:

In [21]:
initial_model = keras.Sequential(
    [
        keras.Input(shape=(250, 250, 3)),
        layers.Conv2D(32, 5, strides=2, activation="relu"),
        layers.Conv2D(32, 3, activation="relu"),
        layers.Conv2D(32, 3, activation="relu"),
    ]
)
feature_extractor = keras.Model(
    inputs=initial_model.inputs,
    outputs=[layer.output for layer in initial_model.layers],
)

# Call feature extractor on test input.
x = ops.ones((1, 250, 250, 3))
features = feature_extractor(x)

Here's a similar example that only extract features from one layer:

In [22]:
initial_model = keras.Sequential(
    [
        keras.Input(shape=(250, 250, 3)),
        layers.Conv2D(32, 5, strides=2, activation="relu"),
        layers.Conv2D(32, 3, activation="relu", name="my_intermediate_layer"),
        layers.Conv2D(32, 3, activation="relu"),
    ]
)
feature_extractor = keras.Model(
    inputs=initial_model.inputs,
    outputs=initial_model.get_layer(name="my_intermediate_layer").output,
)
# Call feature extractor on test input.
x = ops.ones((1, 250, 250, 3))
features = feature_extractor(x)

## Transfer learning with a Sequential model

Transfer learning consists of freezing the bottom layers in a model and only training
the top layers. If you aren't familiar with it, make sure to read our [guide
to transfer learning](/guides/transfer_learning/).

Here are two common transfer learning blueprint involving Sequential models.

First, let's say that you have a Sequential model, and you want to freeze all
layers except the last one. In this case, you would simply iterate over
`model.layers` and set `layer.trainable = False` on each layer, except the
last one. Like this:

```python
model = keras.Sequential([
    keras.Input(shape=(784)),
    layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(10),
])

# Presumably you would want to first load pre-trained weights.
model.load_weights(...)

# Freeze all layers except the last one.
for layer in model.layers[:-1]:
  layer.trainable = False

# Recompile and train (this will only update the weights of the last layer).
model.compile(...)
model.fit(...)
```

Another common blueprint is to use a Sequential model to stack a pre-trained
model and some freshly initialized classification layers. Like this:

```python
# Load a convolutional base with pre-trained weights
base_model = keras.applications.Xception(
    weights='imagenet',
    include_top=False,
    pooling='avg')

# Freeze the base model
base_model.trainable = False

# Use a Sequential model to add a trainable classifier on top
model = keras.Sequential([
    base_model,
    layers.Dense(1000),
])

# Compile & train
model.compile(...)
model.fit(...)
```

If you do transfer learning, you will probably find yourself frequently using
these two patterns.

That's about all you need to know about Sequential models!

To find out more about building models in Keras, see:

- [Guide to the Functional API](/guides/functional_api/)
- [Guide to making new Layers & Models via subclassing](/guides/making_new_layers_and_models_via_subclassing/)