In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# import functions from Gender CNN notebook
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks, regularizers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob  # Can be useful but we use os
import time

MODEL_SAVE_PATH = 'Models/attention_model.keras'
LEARNING_RATE      = 5e-4
IMG_HEIGHT         = 300
IMG_WIDTH          = 300
BATCH_SIZE         = 32
EPOCHS             = 50
AUTOTUNE      = tf.data.AUTOTUNE
CHANNELS = 1
SIZE = 300
PATIENCE_ES = 10
PATIENCE_RLR = 5
LR_FACTOR = 0.5
MIN_LR = 1e-6
base_dir = '/content/drive/MyDrive/MLHD'

# Data Paths
train_csv_path = os.path.join(base_dir, 'Train', 'train_labels.csv')
val_csv_path = os.path.join(base_dir, 'Val', 'val_labels.csv')
test_csv_path = os.path.join(base_dir, 'Test', 'test_labels.csv')

train_image_dir = os.path.join(base_dir, 'Train', 'train_samples_pp')
val_image_dir = os.path.join(base_dir, 'Val', 'val_samples_pp')
test_image_dir = os.path.join(base_dir, 'Test', 'test_samples_pp')



def load_labels(csv_path):
    df = pd.read_csv(csv_path, index_col='id')

    df = df[['boneage', 'male']].rename(columns={'male': 'gender'})
    df['gender'] = df['gender'].astype(np.float32)
    df['boneage'] = df['boneage'].astype(np.float32)
    return df


def create_dataframe(image_dir, labels_df):
    data = []

    for filename in os.listdir(image_dir):
        file_id = int(filename.split('.')[0])
        if file_id in labels_df.index:
            boneage = labels_df.loc[file_id, 'boneage']
            gender = labels_df.loc[file_id, 'gender']
            full_path = os.path.join(image_dir, filename)
            data.append({'file_path': full_path, 'boneage': boneage, 'gender': gender})

    return pd.DataFrame(data)


def preprocess_image(path, boneage):
    # Read + decode to [H,W,CHANNELS]
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=CHANNELS, expand_animations=False)
    img = tf.image.resize(img, [SIZE, SIZE])
    img = tf.cast(img, tf.float32) / 255.0
    return img, boneage


def image_label_generator(file_paths, boneage_labels, gender_labels):
    """tuple of (boneage, gender) labels."""
    for path, boneage, gender in zip(file_paths, boneage_labels, gender_labels):
        try:
            img_bytes = tf.io.read_file(path)

            image = tf.io.decode_image(img_bytes, channels=CHANNELS, expand_animations=False)

            # allow dynamic height/width initially
            image.set_shape([None, None, CHANNELS])
            yield image, (boneage, gender)  # Yield image and label tuple
        except tf.errors.InvalidArgumentError as e:
            print(f"Warning: Skipping file {path}. Error decoding image: {e}")
        except Exception as e:
            print(f"Warning: Skipping file {path}. Unexpected error: {e}")


def create_tf_dataset(df, shuffle: bool, repeat: bool):
    """
    Returns a tf.data.Dataset yielding (image, boneage) pairs,
    batched, shuffled/prefetched as specified.
    """
    paths    = df['file_path'].values
    boneages = df['boneage'].values

    ds = tf.data.Dataset.from_tensor_slices((paths, boneages))
    ds = ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)

    if shuffle:
        ds = ds.shuffle(buffer_size=len(df), reshuffle_each_iteration=True)
    if repeat:
        ds = ds.repeat()

    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(AUTOTUNE)
    return ds


# labels
train_labels_df = load_labels(train_csv_path)
val_labels_df = load_labels(val_csv_path)
test_labels_df = load_labels(test_csv_path)

# df
training_dataframe = create_dataframe(train_image_dir, train_labels_df)
validation_dataframe = create_dataframe(val_image_dir, val_labels_df)
test_dataframe = create_dataframe(test_image_dir, test_labels_df)


train_dataset = create_tf_dataset(training_dataframe, shuffle=True,  repeat=True)
val_dataset   = create_tf_dataset(validation_dataframe,   shuffle=False, repeat=True)
test_dataset  = create_tf_dataset(test_dataframe,          shuffle=False, repeat=False)


In [3]:

def build_model(img_size=(IMG_HEIGHT, IMG_WIDTH)):
    inp = layers.Input(shape=(*img_size, 1), name='input_image')

    # --- CNN Backbone ---
    def cnn_block(x, filters, prefix):
        x = layers.Conv2D(filters, 3, padding='same',
                          kernel_regularizer=regularizers.l2(1e-4),
                          name=f'{prefix}_conv_a')(x)
        x = layers.BatchNormalization(name=f'{prefix}_bn_a')(x)
        x = layers.Activation('relu', name=f'{prefix}_relu_a')(x)
        x = layers.Conv2D(filters, 3, padding='same',
                          kernel_regularizer=regularizers.l2(1e-4),
                          name=f'{prefix}_conv_b')(x)
        x = layers.BatchNormalization(name=f'{prefix}_bn_b')(x)
        x = layers.Activation('relu', name=f'{prefix}_relu_b')(x)
        return layers.MaxPooling2D(2, 2, name=f'{prefix}_pool')(x)

    x = inp
    for i, f in enumerate([32, 64, 128, 256, 256], start=1):
        x = cnn_block(x, f, prefix=f'block{i}')

    # --- Spatial Attention ---
    # CORRECTED: Wrap tf functions in Lambda layers
    avg_pool = layers.Lambda(lambda t: tf.reduce_mean(t, axis=-1, keepdims=True),
                             name='att_avg_pool')(x)
    max_pool = layers.Lambda(lambda t: tf.reduce_max(t, axis=-1, keepdims=True),
                             name='att_max_pool')(x)

    concat   = layers.Concatenate(name='att_concat')([avg_pool, max_pool])
    att_mid  = layers.Conv2D(32, 5, padding='same', activation='relu',
                              name='att_inter_conv')(concat)
    att_map  = layers.Conv2D(1, 7, padding='same', activation='sigmoid',
                              use_bias=False, name='att_final_conv')(att_mid)
    x = layers.Multiply(name='apply_attention')([x, att_map])

    # --- Regression Head ---
    x = layers.Flatten(name='flatten')(x)
    for units, drop, name in [(512, .4, 'fc1'), (256, .4, 'fc2'), (128, .3, 'fc3')]:
        x = layers.Dense(units, activation='relu',
                         kernel_regularizer=regularizers.l2(1e-4),
                         name=f'{name}_dense')(x)
        x = layers.BatchNormalization(name=f'{name}_bn')(x)
        x = layers.Dropout(drop, name=f'{name}_dropout')(x)

    lin_out = layers.Dense(1, name='age_linear',
                           kernel_regularizer=regularizers.l2(1e-4))(x)
    out     = layers.Activation('relu', name='age_output')(lin_out)

    model = models.Model(inputs=inp, outputs=out, name='AgePredModel')

    # --- COMPILE ---
    optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    model.compile(optimizer=optimizer,
                  loss='mae',
                  metrics=['mae'])
    return model

# --- INSTANTIATE MODEL ---
model = build_model()
model.summary()
# --- CALLBACKS ---
callbacks = [
    ModelCheckpoint(
        filepath=MODEL_SAVE_PATH,
        monitor='val_mae',
        save_best_only=True,
        mode='min',
        verbose=1
    ),
    EarlyStopping(
        monitor='val_mae',
        patience=PATIENCE_ES,
        mode='min',
        verbose=1,
        restore_best_weights=True
    ),
    ReduceLROnPlateau(
        monitor='val_mae',
        factor=LR_FACTOR,
        patience=PATIENCE_RLR,
        min_lr=MIN_LR,
        mode='min',
        verbose=1
    )
]



In [4]:
import numpy as np
from sklearn.model_selection import KFold

# Number of CV folds
N_SPLITS = 5
kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

# Will hold the dicts of loss/metric curves per fold
all_hist = []

for fold, (train_idx, val_idx) in enumerate(kf.split(training_dataframe), start=1):
    print(f"\n>>> Fold {fold}/{N_SPLITS}")

    # 1) Split the dataframe
    df_train = training_dataframe.iloc[train_idx]
    df_val   = training_dataframe.iloc[val_idx]

    # 2) Build datasets
    train_ds = create_tf_dataset(df_train, shuffle=True,  repeat=True)
    val_ds   = create_tf_dataset(df_val,   shuffle=False, repeat=False)

    # 3) Compute steps
    steps_per_epoch = len(df_train) // BATCH_SIZE
    validation_steps = len(df_val) // BATCH_SIZE

    # 4) Fresh model for this fold
    model = build_model()  # returns a compiled tf.keras.Model

    # 5) (Re-)create callbacks if you want per-fold saving/early stopping
    callbacks = [
        ModelCheckpoint(
            filepath=f'model_fold{fold}.keras',
            monitor='val_mae',
            save_best_only=True,
            mode='min',
            verbose=1
        ),
        EarlyStopping(
            monitor='val_mae',
            patience=PATIENCE_ES,
            mode='min',
            verbose=1,
            restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor='val_mae',
            factor=LR_FACTOR,
            patience=PATIENCE_RLR,
            min_lr=MIN_LR,
            mode='min',
            verbose=1
        )
    ]

    # 6) Train
    history = model.fit(
        train_ds,
        epochs=EPOCHS,
        steps_per_epoch=steps_per_epoch,
        validation_data=val_ds,
        validation_steps=validation_steps,
        callbacks=callbacks,
        verbose=1
    )

    # 7) Store history and (optionally) final evaluation
    all_hist.append(history.history)

# After loop: all_hist is a list of dicts,
# where each dict maps 'loss','mae','val_loss','val_mae' → list over epochs.



>>> Fold 1/5
Epoch 1/50
[1m300/301[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 43ms/step - loss: 118.5160 - mae: 118.2283
Epoch 1: val_mae improved from inf to 107.17170, saving model to model_fold1.keras
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 63ms/step - loss: 118.4850 - mae: 118.1973 - val_loss: 107.4567 - val_mae: 107.1717 - learning_rate: 5.0000e-04
Epoch 2/50
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 98.6505 - mae: 98.3610
Epoch 2: val_mae improved from 107.17170 to 96.44464, saving model to model_fold1.keras
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 56ms/step - loss: 98.6304 - mae: 98.3408 - val_loss: 96.7676 - val_mae: 96.4446 - learning_rate: 5.0000e-04
Epoch 3/50
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 74.4978 - mae: 74.1593
Epoch 3: val_mae improved from 96.44464 to 55.88615, saving model to model_fold1.keras
[1m301/301[0m 

In [5]:

# after CV, average your metric, e.g.:
val_maes = np.array([h["val_mae"][-1] for h in all_hist])
print("Per-fold final val MAE:", val_maes)
print("CV mean val MAE:    ", val_maes.mean())
print("CV std  val MAE:    ", val_maes.std())

Per-fold final val MAE: [17.58379745 24.67361832 20.71093178 32.45626068 16.05643463]
CV mean val MAE:     22.296208572387695
CV std  val MAE:     5.874239284129035
