In [None]:
!pip install tf-keras keras-cv-attention-models keras-tuner -q

import os
import sys

os.environ["TF_USE_LEGACY_KERAS"] = "1"

import tensorflow as tf
import tf_keras
import numpy as np
import pandas as pd
import keras_tuner

tf.keras = tf_keras
sys.modules["tensorflow.keras"] = tf_keras

from tf_keras import layers, Model
from tf_keras.optimizers import AdamW
from keras_cv_attention_models import maxvit
from google.colab import drive
from sklearn.model_selection import train_test_split
from tf_keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

import gc

drive.mount('/content/drive')

print("TensorFlow Version:", tf.__version__)
print("Keras Version (should be ~2.15 via tf_keras):", tf_keras.__version__)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.0/190.0 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.6/802.6 kB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
TensorFlow Version: 2.19.0
Keras Version (should be ~2.15 via tf_keras): 2.19.0


In [None]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 16
CLASSES = ["acne", "pigmentation", "wrinkles"]
DATA_ROOT = "/content/drive/MyDrive/skincareapp/acne clean pigmentation wrinkles"

# Define the model path here so it is available globally
FINAL_MODEL_PATH = os.path.join(DATA_ROOT, "maxvit_tiny_skin_model_FINAL_TUNED.keras")

In [None]:
df = pd.read_csv(os.path.join(DATA_ROOT, "labels.csv"))
df["filename"] = df["filename"].apply(lambda x: os.path.join(DATA_ROOT, x))

# Stratified split to keep class balance
train_val_df, test_df = train_test_split(df, test_size=0.15, random_state=42, stratify=df[CLASSES])
train_df, val_df = train_test_split(train_val_df, test_size=0.15, random_state=42, stratify=train_val_df[CLASSES])

pos_counts = train_df[CLASSES].sum().values
total_train_samples = len(train_df)

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Test samples: {len(test_df)}")

Training samples: 3656
Validation samples: 646
Test samples: 760


In [None]:
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.3),
    layers.RandomZoom(0.3),
    layers.RandomContrast(0.2),
], name="data_augmentation")

def parse_function(filename, labels):
    image_string = tf.io.read_file(filename)
    image_decoded = tf.io.decode_jpeg(image_string, channels=3)
    # Convert to float32 in [0, 1] range
    image = tf.image.convert_image_dtype(image_decoded, tf.float32)
    image_resized = tf.image.resize(image, IMG_SIZE)
    return image_resized, labels

def create_dataset(df, batch_size, augment=False, cache_file=None):
    dataset = tf.data.Dataset.from_tensor_slices(
        (df["filename"].values, df[CLASSES].values.astype(np.float32))
    )
    dataset = dataset.map(parse_function, num_parallel_calls=tf.data.AUTOTUNE)

    if augment:
        dataset = dataset.map(lambda x, y: (data_augmentation(x, training=True), y),
                              num_parallel_calls=tf.data.AUTOTUNE)

    if cache_file:
        dataset = dataset.cache(cache_file)
    else:
        dataset = dataset.cache()

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

# Define cache paths
train_cache_file = os.path.join(DATA_ROOT, 'maxvit_train_cache')
val_cache_file = os.path.join(DATA_ROOT, 'maxvit_val_cache')

# Create datasets
train_ds = create_dataset(train_df, BATCH_SIZE, augment=True, cache_file=train_cache_file)
val_ds = create_dataset(val_df, BATCH_SIZE, augment=False, cache_file=val_cache_file)
test_ds = create_dataset(test_df, BATCH_SIZE, augment=False)

print("Data pipelines created.")

Data pipelines created.


In [None]:
def create_weighted_bce_loss(pos_counts, total_samples, smooth=0.05):
    pos = tf.constant(pos_counts, dtype=tf.float32)
    neg = total_samples - pos
    w_pos = neg / tf.maximum(pos, 1.0)
    w_neg = tf.ones_like(pos)

    def weighted_bce(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        y_true = y_true * (1.0 - smooth) + 0.5 * smooth
        bce = tf.keras.backend.binary_crossentropy(y_true, y_pred)
        weights = y_true * w_pos + (1.0 - y_true) * w_neg
        return tf.reduce_mean(bce * weights)

    return weighted_bce

loss_fn = create_weighted_bce_loss(pos_counts, total_train_samples)
custom_objects = {"weighted_bce": loss_fn}

In [None]:
def build_hyper_model(hp):
    hp_dropout = hp.Float('dropout', 0.2, 0.5, step=0.1)

    # This bridges the gap between your float32 dataset and the float16 model
    inputs = layers.Input(shape=IMG_SIZE + (3,), dtype='float32')

    # Initialize MaxViT Tiny
    # Note: We must also force the base model to use float32 inputs if needed,
    # but usually connecting it to the float32 input tensor is enough.
    base_model = maxvit.MaxViT_Tiny(
        input_shape=IMG_SIZE + (3,),
        pretrained="imagenet",
        num_classes=0
    )

    # The base_model will automatically cast the float32 input to float16
    x = base_model(inputs)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(hp_dropout)(x)

    # Ensure output is float32 for stable loss calculation
    outputs = layers.Dense(len(CLASSES), activation="sigmoid", dtype='float32')(x)

    model = Model(inputs, outputs, name="maxvit_tiny_hyper_model")
    return model

In [None]:
class CustomTuner(keras_tuner.RandomSearch):
    def __init__(self, loss_function, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss_function = loss_function

    def run_trial(self, trial, train_ds, val_ds, **kwargs):
        tf.keras.backend.clear_session()
        gc.collect()
        hp = trial.hyperparameters
        model = self.hypermodel.build(hp)

        all_metrics = [
            tf.keras.metrics.BinaryAccuracy(name="acc", threshold=0.5),
            tf.keras.metrics.AUC(name="auc", multi_label=True),
            tf.keras.metrics.Precision(name="precision", thresholds=0.5),
            tf.keras.metrics.Recall(name="recall", thresholds=0.5),
        ]

        # STAGE 1: Train Head Only
        print(f"\n[Trial {trial.trial_id}] Stage 1: Training head...")
        head_lr = hp.Float('head_lr', 1e-4, 1e-3, sampling='log')

        # Freeze the MaxViT base layers.
        # In this library, the base is often the whole model except our new head.
        # We can freeze layers by index since we know we added layers at the end.
        # Freezing everything except the last 3 layers (GAP, Dropout, Dense)
        for layer in model.layers[:-3]:
             layer.trainable = False

        model.compile(
            optimizer=AdamW(learning_rate=head_lr, weight_decay=1e-4),
            loss=self.loss_function,
            metrics=all_metrics
        )

        model.fit(train_ds, validation_data=val_ds, epochs=8, verbose=1)

        # STAGE 2: Fine-Tuning
        print(f"\n[Trial {trial.trial_id}] Stage 2: Fine-tuning...")
        finetune_lr = hp.Float('finetune_lr', 1e-6, 5e-5, sampling='log')

        # Unfreeze all
        for layer in model.layers:
            layer.trainable = True

        model.compile(
            optimizer=AdamW(learning_rate=finetune_lr, weight_decay=1e-4),
            loss=self.loss_function,
            metrics=all_metrics
        )

        callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=3, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", factor=0.2, patience=2)
        ]

        model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=40,
            callbacks=callbacks,
            initial_epoch=8,
            verbose=1
        )

        eval_results = model.evaluate(val_ds, return_dict=True, verbose=0)
        return {f"val_{k}": v for k, v in eval_results.items()}

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best HPs found:", best_hps.values)

final_model = build_hyper_model(best_hps)
final_model.summary()

final_callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=7, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", factor=0.2, patience=3, min_lr=1e-6),
    tf.keras.callbacks.ModelCheckpoint(FINAL_MODEL_PATH, monitor="val_auc", mode="max", save_best_only=True)
]

print("\n--- Final Training Stage 1 ---")
for layer in final_model.layers[:-3]:
     layer.trainable = False

final_model.compile(
    optimizer=AdamW(learning_rate=best_hps.get('head_lr'), weight_decay=1e-4),
    loss=loss_fn,
    metrics=[tf.keras.metrics.BinaryAccuracy(name="acc"), tf.keras.metrics.AUC(name="auc", multi_label=True), tf.keras.metrics.Precision(name="precision"), tf.keras.metrics.Recall(name="recall")]
)
history_head = final_model.fit(train_ds, validation_data=val_ds, epochs=10, verbose=1)

print("\n--- Final Training Stage 2 ---")
for layer in final_model.layers:
    layer.trainable = True

final_model.compile(
    optimizer=AdamW(learning_rate=best_hps.get('finetune_lr'), weight_decay=1e-4),
    loss=loss_fn,
    metrics=[tf.keras.metrics.BinaryAccuracy(name="acc"), tf.keras.metrics.AUC(name="auc", multi_label=True), tf.keras.metrics.Precision(name="precision"), tf.keras.metrics.Recall(name="recall")]
)
history_fine_tune = final_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=70,
    callbacks=final_callbacks,
    initial_epoch=len(history_head.history['loss']),
    verbose=1
)

Best HPs found: {'dropout': 0.30000000000000004, 'head_lr': 0.0007015772037865249, 'finetune_lr': 2.937665256971339e-05}
>>>> Load pretrained from: /root/.keras/models/maxvit_tiny_224_imagenet.h5
Model: "maxvit_tiny_hyper_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 maxvit_tiny (Functional)    (None, 7, 7, 512)         30187464  
                                                                 
 global_average_pooling2d_1  (None, 512)               0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 3)       

In [11]:
print(f"\nLoading best saved final model from: {FINAL_MODEL_PATH}")
loaded_best_model = tf.keras.models.load_model(FINAL_MODEL_PATH, custom_objects=custom_objects)

print("\nEvaluating the final tuned MaxViT-Tiny model on the test set...")
test_results = loaded_best_model.evaluate(test_ds, return_dict=True)

print("\nFinal MaxViT-Tiny Test Set Results")
for metric, value in test_results.items():
    print(f"{metric}: {value:.4f}")

precision = test_results.get('precision', 0.0)
recall = test_results.get('recall', 0.0)
if precision + recall > 0:
    f1 = 2 * (precision * recall) / (precision + recall)
    print(f"F1 Score: {f1:.4f}")
else:
    print("F1 Score: 0.0")


Loading best saved final model from: /content/drive/MyDrive/acne clean pigmentation wrinkles/maxvit_tiny_skin_model_FINAL_TUNED.keras

Evaluating the final tuned MaxViT-Tiny model on the test set...

Final MaxViT-Tiny Test Set Results
loss: 0.2564
acc: 0.9904
auc: 0.9996
precision: 0.9628
recall: 0.9888
F1 Score: 0.9756


In [None]:
import gc
import tensorflow as tf
# Ensure we use the legacy keras for compatibility
import tf_keras as keras
from tf_keras import layers, Model
from tf_keras.optimizers import AdamW

# --- FIX 1: REDUCE BATCH SIZE TO PREVENT CRASHES ---
# Fine-tuning MaxViT is memory intensive. We must lower batch size to 16.
BATCH_SIZE = 16

print(f"Re-creating datasets with Batch Size: {BATCH_SIZE}...")
# We assume create_dataset, train_df, val_df are already defined from previous cells
train_ds = create_dataset(train_df, BATCH_SIZE, augment=True, cache_file=os.path.join(DATA_ROOT, 'maxvit_train_cache_v2'))
val_ds = create_dataset(val_df, BATCH_SIZE, augment=False, cache_file=os.path.join(DATA_ROOT, 'maxvit_val_cache_v2'))

# --- FIX 2: MODEL INPUT TYPE FIX ---
def build_hyper_model(hp):
    hp_dropout = hp.Float('dropout', 0.2, 0.5, step=0.1)

    # EXPLICITLY set dtype='float32'.
    # This tells the model: "Expect standard 32-bit images from the dataset"
    # The mixed_precision policy will automatically cast them to float16 *after* this layer.
    inputs = layers.Input(shape=IMG_SIZE + (3,), dtype='float32')

    base_model = maxvit.MaxViT_Tiny(
        input_shape=IMG_SIZE + (3,),
        pretrained="imagenet",
        num_classes=0
    )

    x = base_model(inputs)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(hp_dropout)(x)

    # Output layer stays float32 for stability
    outputs = layers.Dense(len(CLASSES), activation="sigmoid", dtype='float32')(x)

    model = Model(inputs, outputs, name="maxvit_tiny_hyper_model")
    return model

# --- FIX 3: MEMORY CLEANING TUNER ---
class CustomTuner(keras_tuner.RandomSearch):
    def __init__(self, loss_function, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss_function = loss_function

    def run_trial(self, trial, train_ds, val_ds, **kwargs):
        # FORCE CLEAN GPU MEMORY BEFORE EACH TRIAL
        tf.keras.backend.clear_session()
        gc.collect()

        hp = trial.hyperparameters
        model = self.hypermodel.build(hp)

        # Standard metrics
        metrics = [
            tf.keras.metrics.BinaryAccuracy(name="acc"),
            tf.keras.metrics.AUC(name="auc", multi_label=True),
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall")
        ]

        # STAGE 1: Train Head
        print(f"\n[Trial {trial.trial_id}] Stage 1: Training head...")
        # Freeze backbone
        for layer in model.layers[:-3]:
             layer.trainable = False

        model.compile(
            optimizer=AdamW(learning_rate=hp.Float('head_lr', 1e-4, 1e-3, sampling='log'), weight_decay=1e-4),
            loss=self.loss_function,
            metrics=metrics
        )
        model.fit(train_ds, validation_data=val_ds, epochs=8, verbose=1)

        # STAGE 2: Fine-Tuning
        print(f"\n[Trial {trial.trial_id}] Stage 2: Fine-tuning...")
        # Unfreeze all
        for layer in model.layers:
            layer.trainable = True

        model.compile(
            optimizer=AdamW(learning_rate=hp.Float('finetune_lr', 1e-6, 5e-5, sampling='log'), weight_decay=1e-4),
            loss=self.loss_function,
            metrics=metrics
        )

        # Callbacks for fine-tuning
        callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=3, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", factor=0.2, patience=2)
        ]

        model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=40,
            initial_epoch=8,
            callbacks=callbacks,
            verbose=1
        )

        return {f"val_{k}": v for k, v in model.evaluate(val_ds, return_dict=True, verbose=0).items()}

# --- RUN THE TUNER ---
print("Starting Tuner with Fixes...")
tuner = CustomTuner(
    loss_function=loss_fn,
    hypermodel=build_hyper_model,
    objective=keras_tuner.Objective("val_auc", direction="max"),
    max_trials=4,
    executions_per_trial=1,
    directory=os.path.join(DATA_ROOT, 'keras_tuner_v2'), # New directory to avoid conflicts
    project_name='maxvit_tiny_tuning_fixed',
    overwrite=False
)

tuner.search(train_ds=train_ds, val_ds=val_ds)

Re-creating datasets with Batch Size: 16...
Starting Tuner with Fixes...
Reloading Tuner from /content/drive/MyDrive/skincareapp/acne clean pigmentation wrinkles/keras_tuner_v2/maxvit_tiny_tuning_fixed/tuner0.json

Search: Running Trial #2

Value             |Best Value So Far |Hyperparameter
0.4               |0.3               |dropout
0.00067958        |0.0001            |head_lr
7.0373e-06        |1e-06             |finetune_lr

Downloading data from https://github.com/leondgarse/keras_cv_attention_models/releases/download/maxvit/maxvit_tiny_224_imagenet.h5
>>>> Load pretrained from: /root/.keras/models/maxvit_tiny_224_imagenet.h5

[Trial 1] Stage 1: Training head...
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8

[Trial 1] Stage 2: Fine-tuning...
Epoch 9/40