In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
import visualkeras
from PIL import ImageFont

In [2]:
font = ImageFont.truetype("arial.ttf", 16)

# --- Constants for Model 1 ---
LEARNING_RATE      = 5e-4
IMG_HEIGHT         = 256
IMG_WIDTH          = 256

def build_attention_cnn(img_size=(IMG_HEIGHT, IMG_WIDTH)):
    inp = layers.Input(shape=(*img_size, 1), name='input_image')

    # --- CNN Backbone ---
    def cnn_block(x, filters, prefix):
        x = layers.Conv2D(filters, 3, padding='same',
                          kernel_regularizer=regularizers.l2(1e-4),
                          name=f'{prefix}_conv_a')(x)
        x = layers.BatchNormalization(name=f'{prefix}_bn_a')(x)
        x = layers.Activation('relu', name=f'{prefix}_relu_a')(x)
        x = layers.Conv2D(filters, 3, padding='same',
                          kernel_regularizer=regularizers.l2(1e-4),
                          name=f'{prefix}_conv_b')(x)
        x = layers.BatchNormalization(name=f'{prefix}_bn_b')(x)
        x = layers.Activation('relu', name=f'{prefix}_relu_b')(x)
        return layers.MaxPooling2D(2, 2, name=f'{prefix}_pool')(x)

    x = inp
    # Using fewer blocks for a more compact visualization, but the principle is the same.
    # for i, f in enumerate([32, 64, 128, 256, 256], start=1):
    for i, f in enumerate([32, 64, 128], start=1): # Reduced for brevity in visualization
        x = cnn_block(x, f, prefix=f'block{i}')

    # --- Spatial Attention ---
    avg_pool = layers.Lambda(lambda t: tf.reduce_mean(t, axis=-1, keepdims=True),
                             name='att_avg_pool')(x)
    max_pool = layers.Lambda(lambda t: tf.reduce_max(t, axis=-1, keepdims=True),
                             name='att_max_pool')(x)

    concat   = layers.Concatenate(name='att_concat')([avg_pool, max_pool])
    att_mid  = layers.Conv2D(32, 5, padding='same', activation='relu',
                              name='att_inter_conv')(concat)
    att_map  = layers.Conv2D(1, 7, padding='same', activation='sigmoid',
                              use_bias=False, name='att_final_conv')(att_mid)
    x = layers.Multiply(name='apply_attention')([x, att_map])

    # --- Regression Head ---
    x = layers.Flatten(name='flatten')(x)
    for units, drop, name in [(512, .4, 'fc1'), (256, .4, 'fc2')]: # Reduced for brevity
        x = layers.Dense(units, activation='relu',
                         kernel_regularizer=regularizers.l2(1e-4),
                         name=f'{name}_dense')(x)
        x = layers.BatchNormalization(name=f'{name}_bn')(x)
        x = layers.Dropout(drop, name=f'{name}_dropout')(x)

    lin_out = layers.Dense(1, name='age_linear',
                           kernel_regularizer=regularizers.l2(1e-4))(x)
    out     = layers.Activation('relu', name='age_output')(lin_out)

    model = models.Model(inputs=inp, outputs=out, name='AttentionCNN')
    return model

attention_cnn_model = build_attention_cnn()

visualkeras.layered_view(
    attention_cnn_model,
    legend=True,
    font=font, spacing = 80,
    to_file='pngs/attention_cnn_model.png'
).show() # .show() will display the plot directly if in an interactive environment






In [3]:
SIZE = 256
INPUT_SHAPE = (SIZE, SIZE, 1)
GENDER_FEATURE_SHAPE = (1,)

def build_dual_cnn(input_shape, gender_shape):
    # inputs
    image_input = keras.Input(shape=input_shape, name="image_input")
    gender_input = keras.Input(shape=gender_shape, name="gender_input")

    x = layers.Conv2D(32, (3, 3), padding='same', name='conv1a')(image_input)
    x = layers.BatchNormalization(name='bn1a')(x)
    x = layers.Activation('relu', name='relu1a')(x)
    x = layers.MaxPooling2D((2, 2), name='pool1')(x)

    x = layers.Conv2D(64, (3, 3), padding='same', name='conv2a')(x)
    x = layers.BatchNormalization(name='bn2a')(x)
    x = layers.Activation('relu', name='relu2a')(x)
    x = layers.MaxPooling2D((2, 2), name='pool2')(x)

    x = layers.Conv2D(128, (3, 3), padding='same', name='conv3a')(x)
    x = layers.BatchNormalization(name='bn3a')(x)
    x = layers.Activation('relu', name='relu3a')(x)
    x = layers.MaxPooling2D((2, 2), name='pool3')(x)

    # feature extraction
    image_features = layers.GlobalAveragePooling2D(name='global_avg_pool')(x)

    # fusion with gender
    concatenated_features = layers.concatenate([image_features, gender_input],
                                               name='concatenate_features')

    # regression head
    x = layers.Dense(128, name='dense_head1')(concatenated_features)
    x = layers.BatchNormalization(name='bn_head1')(x)
    x = layers.Activation('relu', name='relu_head1')(x)
    x = layers.Dropout(0.4, name='dropout_head')(x)

    bone_age_output = layers.Dense(1, activation='linear', name='bone_age_output')(x)

    model = keras.Model(
        inputs=[image_input, gender_input],
        outputs=bone_age_output,
        name="Dual_Input_CNN")
    return model

# Build the model
dual_cnn_model = build_dual_cnn(INPUT_SHAPE, GENDER_FEATURE_SHAPE)

# Generate the visualization
visualkeras.layered_view(
    dual_cnn_model,
    legend=True,
    font=font, spacing = 80,
    to_file='pngs/dual_cnn_model.png'
).show()

print("Saved 'dual_cnn_model.png'")




Saved 'dual_cnn_model.png'


In [4]:
SIZE = 320
CHANNELS = 1
PATCH_SIZE = 48
PATCH_DIM = (PATCH_SIZE ** 2) * CHANNELS
OVERLAP = 0.25
STRIDE = int(PATCH_SIZE * (1 - OVERLAP))
num_patches_per_side = (SIZE - PATCH_SIZE) // STRIDE + 1
N_PATCHES = num_patches_per_side ** 2
INPUT_SHAPE = (N_PATCHES, PATCH_DIM)
GENDER_FEATURE_SHAPE = (1,)

def build_bid_rnn(input_shape, gender_shape,
                 patch_size=PATCH_SIZE, channels=CHANNELS,
                 patch_embed_dim=112, rnn_units=[112, 112, 56]):
    # 1) Inputs & reshape
    image_input  = keras.Input(shape=input_shape,  name="image_input")
    gender_input = keras.Input(shape=gender_shape, name="gender_input")
    num_patches, _ = input_shape
    x = layers.Reshape((num_patches, patch_size, patch_size, channels),
                       name="reshape_patches")(image_input)

    # 2) Smaller Conv2D patch encoder
    patch_encoder = keras.Sequential([
        layers.Conv2D(32, kernel_size=4, padding="same", activation="relu"),
        layers.Flatten(),
        layers.Dense(patch_embed_dim, activation="relu"),
    ], name="patch_encoder")
    x = layers.TimeDistributed(patch_encoder, name="patch_embedding")(x)

    # 3) Positional embeddings
    pos_emb_layer = layers.Embedding(input_dim=num_patches,
                                     output_dim=patch_embed_dim,
                                     name="pos_embedding")
    pos_indices = tf.range(num_patches, dtype=tf.int32)
    pos_emb = tf.expand_dims(pos_emb_layer(pos_indices), axis=0)
    x = layers.Add(name="add_positional")([x, pos_emb])

    # 4) Reduced Bi‑GRU stack
    for i, units in enumerate(rnn_units, start=1):
        return_seq = (i < len(rnn_units))
        x = layers.Bidirectional(
                layers.GRU(units, return_sequences=return_seq),
                name=f"bi_gru{i}"
            )(x)
        x = layers.BatchNormalization(name=f"bn_gru{i}")(x)
    image_features = x

    # 5) Slimmer dense head
    x = layers.concatenate([image_features, gender_input],
                           name='concatenate_features')
    x = layers.Dense(128, name='dense_head1')(x)
    x = layers.BatchNormalization(name='bn_head1')(x)
    x = layers.Activation('relu', name='relu_head1')(x)
    x = layers.Dropout(0.4, name='dropout_head1')(x)

    bone_age_output = layers.Dense(1, activation='linear', name='bone_age_output')(x)

    model = keras.Model(inputs=[image_input, gender_input],
                        outputs=bone_age_output,
                        name="Patch_BiRNN_Model")
    return model

bid_rnn_model = build_bid_rnn(
    input_shape=(N_PATCHES, PATCH_DIM),
    gender_shape=GENDER_FEATURE_SHAPE)

visualkeras.layered_view(
    bid_rnn_model,
    legend=True,
    font=font,
    to_file='pngs/bid_rnn_model.png',
    spacing=80
).show()