In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
import matplotlib.pyplot as plt
import glob # Can be useful but we use os

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
SIZE = 320
BATCH_SIZE = 32
CHANNELS = 1
PATCH_SIZE = 48
PATCH_DIM = (PATCH_SIZE ** 2) * CHANNELS
GENDER_FEATURE_SHAPE = (1,)
EPOCHS = 35
LEARNING_RATE = 1e-3

OVERLAP = 0.25
STRIDE = int(PATCH_SIZE * (1 - OVERLAP))
num_patches_per_side = (SIZE - PATCH_SIZE) // STRIDE + 1
N_PATCHES = num_patches_per_side ** 2

INPUT_SHAPE = (N_PATCHES, PATCH_DIM)



In [6]:
base_dir = '.'

train_csv_path = os.path.join(base_dir, 'Train', 'train_labels.csv')
val_csv_path = os.path.join(base_dir, 'Val', 'val_labels.csv')
test_csv_path = os.path.join(base_dir, 'Test', 'test_labels.csv')

train_image_dir = os.path.join(base_dir, 'Train', 'train_samples_pp')
val_image_dir = os.path.join(base_dir, 'Val', 'val_samples_pp')
test_image_dir = os.path.join(base_dir, 'Test', 'test_samples_pp')

checkpoint_filepath = 'Models/bid_RNN.keras'

def load_labels(csv_path):
    df = pd.read_csv(csv_path, index_col='id')
    df = df[['boneage', 'male']].rename(columns={'male': 'gender'})
    df['gender'] = df['gender'].astype(np.float32)
    df['boneage'] = df['boneage'].astype(np.float32)
    return df

In [7]:
def create_dataframe(image_dir, labels_df):
    data = []

    for filename in os.listdir(image_dir):
        file_id = int(filename.split('.')[0])
        if file_id in labels_df.index:
            boneage = labels_df.loc[file_id, 'boneage']
            gender = labels_df.loc[file_id, 'gender']
            full_path = os.path.join(image_dir, filename)
            data.append({'file_path': full_path, 'boneage': boneage, 'gender': gender})

    return pd.DataFrame(data)


def preprocess_image(image, labels):
    image = tf.image.resize(image, [SIZE, SIZE])
    if image.shape[-1] is None:
        image = tf.reshape(image, [SIZE, SIZE, CHANNELS])
    elif image.shape[-1] != CHANNELS:
        image = tf.image.rgb_to_grayscale(image)

    image = tf.cast(image, tf.float32) / 255.0

    patches = tf.image.extract_patches(
        images=tf.expand_dims(image, 0),
        sizes=[1, PATCH_SIZE, PATCH_SIZE, 1],
        strides=[1, STRIDE,    STRIDE,    1],
        rates=[1, 1, 1, 1],
        padding='VALID',
    )
    patches = tf.reshape(patches, (1, N_PATCHES, PATCH_DIM))
    patches = tf.squeeze(patches, axis=0)

    #- new
    boneage, gender = labels
    boneage = (boneage - mean_age) / std_age
    return patches, (boneage, gender)
    #-


def image_label_generator(file_paths, boneage_labels, gender_labels):
    for path, boneage, gender in zip(file_paths, boneage_labels, gender_labels):
        img_bytes = tf.io.read_file(path)
        image = tf.io.decode_image(img_bytes, channels=CHANNELS, expand_animations=False)
        image.set_shape([None, None, CHANNELS])
        yield image, (boneage, gender)


def create_tf_dataset(dataframe, shuffle, repeat_flag, batch_size_local=BATCH_SIZE):
    dataset = tf.data.Dataset.from_generator(
        image_label_generator,
        args=[
            dataframe['file_path'].values,
            dataframe['boneage'].values,
            dataframe['gender'].values],
        output_signature=(
            tf.TensorSpec(shape=(None, None, CHANNELS), dtype=tf.uint8),
            (tf.TensorSpec(shape=(), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.float32))))

    dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

    dataset = dataset.map(lambda img, labels: ((img, labels[1]), labels[0]),
                          num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(dataframe), reshuffle_each_iteration=True)

    if repeat_flag:
        dataset = dataset.repeat()

    dataset = dataset.batch(batch_size_local)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

In [8]:
base_dir = 'drive/MyDrive/MLHD'

train_csv_path = os.path.join(base_dir, 'Train', 'train_labels.csv')
val_csv_path = os.path.join(base_dir, 'Val', 'val_labels.csv')
test_csv_path = os.path.join(base_dir, 'Test', 'test_labels.csv')

train_image_dir = os.path.join(base_dir, 'Train', 'train_samples_pp')
val_image_dir = os.path.join(base_dir, 'Val', 'val_samples_pp')
test_image_dir = os.path.join(base_dir, 'Test', 'test_samples_pp')


train_labels_df = load_labels(train_csv_path)
val_labels_df = load_labels(val_csv_path)
test_labels_df = load_labels(test_csv_path)

#- exclusive for this
mean_age = train_labels_df['boneage'].mean()
std_age  = train_labels_df['boneage'].std()
#-

training_dataframe = create_dataframe(train_image_dir, train_labels_df)
validation_dataframe = create_dataframe(val_image_dir, val_labels_df)
test_dataframe = create_dataframe(test_image_dir, test_labels_df)

train_dataset = create_tf_dataset(training_dataframe, shuffle=True, repeat_flag=True)
validation_dataset = create_tf_dataset(validation_dataframe, shuffle=False, repeat_flag=False)
test_dataset_eval = create_tf_dataset(test_dataframe, shuffle=False, repeat_flag=False)

In [9]:
def bid_RNN(input_shape, gender_shape,
                       patch_size=PATCH_SIZE, channels=CHANNELS,
                       patch_embed_dim=112, rnn_units=[112, 112, 56]):
    # 1) Inputs & reshape
    image_input  = keras.Input(shape=input_shape,  name="image_input")
    gender_input = keras.Input(shape=gender_shape, name="gender_input")
    num_patches, patch_dim = input_shape
    x = layers.Reshape((num_patches, patch_size, patch_size, channels),
                       name="reshape_patches")(image_input)

    # 2) Smaller Conv2D patch encoder
    patch_encoder = keras.Sequential([
        layers.Conv2D(32, kernel_size=4, padding="same", activation="relu"),
        layers.Flatten(),
        layers.Dense(patch_embed_dim, activation="relu"),
    ], name="patch_encoder")
    x = layers.TimeDistributed(patch_encoder, name="patch_embedding")(x)

    # 3) Positional embeddings
    pos_indices = tf.range(num_patches, dtype=tf.int32)
    pos_emb_layer = layers.Embedding(input_dim=num_patches,
                                     output_dim=patch_embed_dim,
                                     name="pos_embedding")
    pos_emb = tf.expand_dims(pos_emb_layer(pos_indices), axis=0)
    x = layers.Add(name="add_positional")([x, pos_emb])

    # 4) Reduced Bi‑GRU stack
    for i, units in enumerate(rnn_units, start=1):
        return_seq = (i < len(rnn_units))
        x = layers.Bidirectional(
                layers.GRU(units,
                           return_sequences=return_seq),
                name=f"bi_gru{i}"
            )(x)
        x = layers.BatchNormalization(name=f"bn_gru{i}")(x)
    image_features = x  # shape (batch, 2*rnn_units[-1])

    # 5) Slimmer dense head
    x = layers.concatenate([image_features, gender_input],
                           name='concatenate_features')
    x = layers.Dense(128, name='dense_head1')(x)
    x = layers.BatchNormalization(name='bn_head1')(x)
    x = layers.Activation('relu', name='relu_head1')(x)
    x = layers.Dropout(0.4, name='dropout_head1')(x)

    x = layers.Dense(64, name='dense_head2')(x)
    x = layers.BatchNormalization(name='bn_head2')(x)
    x = layers.Activation('relu', name='relu_head2')(x)
    x = layers.Dropout(0.4, name='dropout_head2')(x)

    bone_age_output = layers.Dense(1, activation='linear',
                                   name='bone_age_output')(x)

    model = keras.Model(inputs=[image_input, gender_input],
                        outputs=bone_age_output,
                        name="light_bone_age_predictor")
    model.compile(optimizer=keras.optimizers.Adam(1e-3),
                  loss='mse', metrics=['mae'])
    return model


model = bid_RNN(
    input_shape=(N_PATCHES, PATCH_DIM),
    gender_shape=(1,),
    patch_size=PATCH_SIZE,
    channels=1)

model.summary()

In [10]:
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_mae', # save the best mae
    mode='min',
    save_best_only=True)

early_stopping_callback = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min',
    restore_best_weights=True)
# restore best weights because it tends to overfit
# monitor loss because it's the actual improvement metric meanwhile mae can be a face value metric

reduce_lr_callback = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    mode='min',
    min_lr=1e-6)
# trial and error came to a best hyperparam of 3 epochs

callback_list = [model_checkpoint_callback, early_stopping_callback, reduce_lr_callback]

steps = len(training_dataframe) // BATCH_SIZE
val_steps = len(validation_dataframe) // BATCH_SIZE

In [11]:
import numpy as np
from sklearn.model_selection import KFold

# how many folds
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# to store histories
all_hist = []

for fold, (train_idx, val_idx) in enumerate(kf.split(training_dataframe), 1):
    print(f"\n>>> Fold {fold}/{n_splits}")

    # split df
    df_train = training_dataframe.iloc[train_idx]
    df_val = training_dataframe.iloc[val_idx]

    # build datasets
    train_ds = create_tf_dataset(df_train, shuffle=True, repeat_flag=True)
    val_ds = create_tf_dataset(df_val, shuffle=False, repeat_flag=False)

    steps = len(df_train) // BATCH_SIZE
    val_steps = len(df_val) // BATCH_SIZE

    # fresh model
    model = bid_RNN(INPUT_SHAPE, GENDER_FEATURE_SHAPE)

    # fit
    history = model.fit(
        train_ds,
        epochs=EPOCHS,
        steps_per_epoch=steps,
        validation_data=val_ds,
        validation_steps=val_steps,
        callbacks=callback_list,
        verbose=1)

    # save history
    all_hist.append(history.history)



>>> Fold 1/5
Epoch 1/35
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 213ms/step - loss: 1.6491 - mae: 1.0216 - val_loss: 1.3235 - val_mae: 0.9715 - learning_rate: 0.0010
Epoch 2/35
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 206ms/step - loss: 1.1328 - mae: 0.8565 - val_loss: 1.0057 - val_mae: 0.8241 - learning_rate: 0.0010
Epoch 3/35
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 203ms/step - loss: 1.0394 - mae: 0.8315 - val_loss: 0.9920 - val_mae: 0.7961 - learning_rate: 0.0010
Epoch 4/35
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 199ms/step - loss: 0.9972 - mae: 0.8111 - val_loss: 1.0106 - val_mae: 0.8243 - learning_rate: 0.0010
Epoch 5/35
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 197ms/step - loss: 0.9522 - mae: 0.7911 - val_loss: 1.0062 - val_mae: 0.7932 - learning_rate: 0.0010
Epoch 6/35
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 190ms/step -

In [12]:

# after CV, average your metric, e.g.:
val_maes = np.array([h["val_mae"][-1] for h in all_hist])
print("Per-fold final val MAE:", val_maes)
print("CV mean val MAE:    ", val_maes.mean())
print("CV std  val MAE:    ", val_maes.std())

Per-fold final val MAE: [0.59787017 0.60279232 0.82312024 0.60234529 0.6855033 ]
CV mean val MAE:     0.6623262643814087
CV std  val MAE:     0.08681989672719513


Roughly: