In [2]:
# ==============================================================================
# CELL 1: Setup and Imports
# ==============================================================================
import tensorflow as tf
import pandas as pd
import numpy as np
import os, random
from google.colab import drive
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, Model

# Sidenote: Mount Google Drive to access your dataset and save models.
drive.mount('/content/drive')

print("TensorFlow Version:", tf.__version__)


Mounted at /content/drive
TensorFlow Version: 2.19.0


In [9]:
# ==============================================================================
# CELL 2: Configuration and Seed
# ==============================================================================
# Sidenote: A function to set random seeds for reproducibility.
def set_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seeds()

# --- Configuration ---
IMG_SIZE = (224, 224)  # Sidenote: All images will be resized to this dimension.
BATCH_SIZE = 32        # Sidenote: Number of images the model processes at one time.
CLASSES = ["acne", "pigmentation", "wrinkles"]  # Sidenote: Target labels for multi-label classification.
DATA_ROOT = "/content/drive/MyDrive/acne clean pigmentation wrinkles/"  # (paths kept the same)

# --- Load CSV and prepare file paths ---
df = pd.read_csv(os.path.join(DATA_ROOT, "labels.csv"))
df["filename"] = df["filename"].apply(lambda x: os.path.join(DATA_ROOT, x))


In [10]:
# ==============================================================================
# CELL 3: Data Splitting (Train, Validation, Test)
# ==============================================================================
# Sidenote: Stratified multi-label split.
train_val_df, test_df = train_test_split(
    df,
    test_size=0.15,
    random_state=42,
    stratify=df[CLASSES]
)

train_df, val_df = train_test_split(
    train_val_df,
    test_size=0.15,
    random_state=42,
    stratify=train_val_df[CLASSES]
)

# --- Class counts for weighted loss ---
pos_counts = train_df[CLASSES].sum().values.astype("float32")
neg_counts = (len(train_df) - pos_counts).astype("float32")

print("Train size:", len(train_df), "| Val size:", len(val_df), "| Test size:", len(test_df))
print("Pos counts:", pos_counts, "| Neg counts:", neg_counts)


Train size: 3656 | Val size: 646 | Test size: 760
Pos counts: [1015.  386.  738.] | Neg counts: [2641. 3270. 2918.]


In [11]:
# ==============================================================================
# CELL 4: Create tf.data Pipelines
# ==============================================================================
def parse_function(filename, labels):
    image_string = tf.io.read_file(filename)
    image_decoded = tf.io.decode_jpeg(image_string, channels=3)
    image = tf.image.convert_image_dtype(image_decoded, tf.float32)
    image_resized = tf.image.resize(image, IMG_SIZE)
    return image_resized, labels

def create_dataset(df, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices(
        (df["filename"].values, df[CLASSES].values.astype(np.float32))
    )
    dataset = dataset.map(parse_function, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.cache()
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

train_ds = create_dataset(train_df, BATCH_SIZE)
val_ds   = create_dataset(val_df,   BATCH_SIZE)
test_ds  = create_dataset(test_df,  BATCH_SIZE)


In [12]:
# ==============================================================================
# CELL 5: MobileViT Model Implementation (TensorFlow/Keras)
# ==============================================================================
# This is a compact MobileViT-style implementation:
# 1) Local convs
# 2) Unfold (non-overlapping patches) -> Transformer (global) -> Fold back
# 3) Fuse local + global features

class MobileViTBlock(layers.Layer):
    def __init__(
        self,
        dim,                 # transformer channel dimension
        patch_size=2,        # non-overlapping unfolding patch size over feature map
        depth=2,             # number of transformer blocks
        num_heads=4,
        mlp_ratio=2.0,
        drop_rate=0.0,
        attn_drop=0.0,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.dim = dim
        self.patch_size = patch_size
        self.depth = depth
        self.num_heads = num_heads
        self.mlp_ratio = mlp_ratio
        self.drop_rate = drop_rate
        self.attn_drop = attn_drop

        # Local representation (lightweight convs)
        self.local_conv1 = layers.Conv2D(dim, 3, padding="same", activation="swish")
        self.local_conv2 = layers.Conv2D(dim, 1, padding="same", activation="swish")

        # Transformer parts (stacked)
        self.norms1 = [layers.LayerNormalization(epsilon=1e-6) for _ in range(depth)]
        self.attns  = [layers.MultiHeadAttention(num_heads=num_heads, key_dim=dim, dropout=attn_drop)
                       for _ in range(depth)]
        self.dropouts = [layers.Dropout(drop_rate) for _ in range(depth)]
        self.norms2 = [layers.LayerNormalization(epsilon=1e-6) for _ in range(depth)]
        self.mlps   = []
        for _ in range(depth):
            self.mlps.append(tf.keras.Sequential([
                layers.Dense(int(dim * mlp_ratio), activation="swish"),
                layers.Dropout(drop_rate),
                layers.Dense(dim),
                layers.Dropout(drop_rate)
            ]))

        # Projection to/from patch-emb dims
        self.proj_in  = layers.Dense(dim)
        self.proj_out = None  # initialized in build() since it depends on C and patch_size

        # Fusion after folding back
        self.fuse_conv = layers.Conv2D(dim, 1, padding="same", activation="swish")

    def build(self, input_shape):
        # input: (B, H, W, C)
        _, H, W, C = input_shape
        p = self.patch_size
        # the unfolded patch feature dim is p*p*C
        self.patch_feat_dim = p * p * C
        self.proj_out = layers.Dense(self.patch_feat_dim)
        super().build(input_shape)

    def _unfold(self, x):
        # x: (B, H, W, C)
        p = self.patch_size
        patches = tf.image.extract_patches(
            images=x,
            sizes=[1, p, p, 1],
            strides=[1, p, p, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        # patches: (B, H//p, W//p, p*p*C)
        B = tf.shape(x)[0]
        Ph = tf.shape(patches)[1]
        Pw = tf.shape(patches)[2]
        D  = tf.shape(patches)[3]  # p*p*C
        seq = tf.reshape(patches, (B, Ph * Pw, D))  # (B, N, Pdim)
        return seq, (Ph, Pw)

    def _fold(self, seq, grid_hw, x_hw_c):
        # seq: (B, N, Pdim), where Pdim = p*p*C
        # grid_hw: (Ph, Pw)
        # x_hw_c: (H, W, C) for the original feature shape
        B = tf.shape(seq)[0]
        Ph, Pw = grid_hw
        H, W, C = x_hw_c
        p = self.patch_size
        # back to (B, Ph, Pw, p*p*C)
        patches = tf.reshape(seq, (B, Ph, Pw, p * p * C))
        # fold back to (B, H, W, C)
        x = tf.reshape(patches, (B, Ph, Pw, p, p, C))
        x = tf.transpose(x, [0, 1, 3, 2, 4, 5])  # (B, Ph, p, Pw, p, C)
        x = tf.reshape(x, (B, H, W, C))
        return x

    def call(self, x, training=False):
        # x: (B, H, W, C)
        H = tf.shape(x)[1]
        W = tf.shape(x)[2]
        C = tf.shape(x)[3]
        p = self.patch_size

        # 1) Local representation
        y_local = self.local_conv1(x)
        y_local = self.local_conv2(y_local)

        # 2) Global representation via unfold -> transformer -> fold
        seq, (Ph, Pw) = self._unfold(y_local)                       # (B, N, p*p*C)
        seq = self.proj_in(seq)                                     # (B, N, dim)
        for ln1, attn, drop, ln2, mlp in zip(self.norms1, self.attns, self.dropouts, self.norms2, self.mlps):
            # Transformer block
            z = ln1(seq)
            z = attn(z, z, training=training)
            z = drop(z, training=training)
            seq = seq + z
            z = ln2(seq)
            z = mlp(z, training=training)
            seq = seq + z

        seq = self.proj_out(seq)                                    # (B, N, p*p*C)
        y_global = self._fold(seq, (Ph, Pw), (H, W, C))             # (B, H, W, C)

        # 3) Fusion (concat + 1x1)
        y = tf.concat([x, y_local, y_global], axis=-1)
        y = self.fuse_conv(y)
        return y

def build_mobilevit_backbone(input_tensor, variant="s"):
    # A tiny MobileViT-style backbone with downsampling stems + three MVIT blocks.
    # You can tweak channels/patch_sizes/depths for XXS/XS/S variants.
    if variant == "xxs":
        dims = [48, 64, 80]
        depths = [2, 2, 2]
        heads = [2, 2, 4]
        patches = [2, 2, 2]
    elif variant == "xs":
        dims = [64, 80, 96]
        depths = [2, 2, 2]
        heads = [4, 4, 4]
        patches = [2, 2, 2]
    else:  # "s"
        dims = [96, 128, 160]
        depths = [2, 2, 3]
        heads = [4, 4, 5]
        patches = [2, 2, 2]

    x = input_tensor
    # Stem
    x = layers.Conv2D(32, 3, strides=2, padding="same", activation="swish")(x)  # /2
    x = layers.Conv2D(48, 3, strides=2, padding="same", activation="swish")(x)  # /4

    # Stage 1
    x = MobileViTBlock(dim=dims[0], patch_size=patches[0], depth=depths[0], num_heads=heads[0])(x)
    x = layers.Conv2D(dims[0], 3, strides=2, padding="same", activation="swish")(x)  # /8

    # Stage 2
    x = MobileViTBlock(dim=dims[1], patch_size=patches[1], depth=depths[1], num_heads=heads[1])(x)
    x = layers.Conv2D(dims[1], 3, strides=2, padding="same", activation="swish")(x)  # /16

    # Stage 3
    x = MobileViTBlock(dim=dims[2], patch_size=patches[2], depth=depths[2], num_heads=heads[2])(x)

    return x  # feature map

def build_mobilevit(input_shape, num_classes, variant="s"):
    inputs = layers.Input(shape=input_shape)
    feats = build_mobilevit_backbone(inputs, variant=variant)
    x = layers.GlobalAveragePooling2D()(feats)
    outputs = layers.Dense(num_classes, activation="sigmoid")(x)  # multi-label
    model = Model(inputs, outputs, name=f"mobilevit_{variant}")
    return model


In [13]:
# ==============================================================================
# CELL 6: Compile and Train the MobileViT Model
# ==============================================================================
tf.keras.backend.clear_session()
mobilevit_model = build_mobilevit(input_shape=IMG_SIZE + (3,), num_classes=len(CLASSES), variant="s")

# --- Custom Weighted Binary Cross-Entropy Loss ---
# Uses per-class weights derived from pos/neg counts calculated in Cell 3.
pos_counts_tf = tf.constant(pos_counts, dtype=tf.float32)
neg_counts_tf = tf.constant(neg_counts, dtype=tf.float32)
pos_weight = neg_counts_tf / (pos_counts_tf + 1e-6)      # higher weight for rare positives
neg_weight = tf.ones_like(pos_weight)                     # keep negatives weight = 1

def weighted_bce(y_true, y_pred, smooth=0.05, eps=1e-7):
    # Label smoothing for stability on small datasets
    y_true = y_true * (1.0 - smooth) + 0.5 * smooth
    y_pred = tf.clip_by_value(y_pred, eps, 1.0 - eps)

    # Broadcast weights: (batch, classes)
    w_pos = pos_weight[tf.newaxis, :]
    w_neg = neg_weight[tf.newaxis, :]

    loss_pos = - w_pos * y_true * tf.math.log(y_pred)
    loss_neg = - w_neg * (1.0 - y_true) * tf.math.log(1.0 - y_pred)
    loss = loss_pos + loss_neg
    return tf.reduce_mean(loss)

mobilevit_model.compile(
    optimizer=tf.keras.optimizers.AdamW(1e-4, weight_decay=1e-5),
    loss=weighted_bce,
    metrics=[
        tf.keras.metrics.BinaryAccuracy(name="acc", threshold=0.5),
        tf.keras.metrics.AUC(name="auc", multi_label=True),
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall"),
    ],
)

MOBILEVIT_MODEL_PATH = os.path.join(DATA_ROOT, "mobilevit_skin_model.keras")
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=7, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", factor=0.5, patience=3, min_lr=1e-6),
    tf.keras.callbacks.ModelCheckpoint(MOBILEVIT_MODEL_PATH, monitor="val_auc", mode="max", save_best_only=True),
]

print("\n💪 Starting MobileViT model training...")
history_mobilevit = mobilevit_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,                 # EarlyStopping will likely stop earlier
    callbacks=callbacks,
    verbose=1
)



💪 Starting MobileViT model training...
Epoch 1/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1943s[0m 16s/step - acc: 0.5261 - auc: 0.5979 - loss: 1.1376 - precision: 0.2345 - recall: 0.6295 - val_acc: 0.7477 - val_auc: 0.8491 - val_loss: 0.9406 - val_precision: 0.4259 - val_recall: 0.8439 - learning_rate: 1.0000e-04
Epoch 2/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 226ms/step - acc: 0.7468 - auc: 0.8354 - loss: 0.9156 - precision: 0.4197 - recall: 0.7997 - val_acc: 0.7724 - val_auc: 0.8534 - val_loss: 0.9123 - val_precision: 0.4536 - val_recall: 0.8148 - learning_rate: 1.0000e-04
Epoch 3/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 232ms/step - acc: 0.7696 - auc: 0.8488 - loss: 0.8874 - precision: 0.4468 - recall: 0.7934 - val_acc: 0.7879 - val_auc: 0.8774 - val_loss: 0.8535 - val_precision: 0.4747 - val_recall: 0.8175 - learning_rate: 1.0000e-04
Epoch 4/50
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [14]:
# ==============================================================================
# CELL 7: Evaluate the MobileViT Model on the Test Set
# ==============================================================================
# No custom layers are required to reload this model (only standard Keras layers),
# but we include the custom loss for completeness if you want to load with compile=True.
custom_objects = {
    "MobileViTBlock": MobileViTBlock,
    "weighted_bce": weighted_bce
}

loaded_model = tf.keras.models.load_model(MOBILEVIT_MODEL_PATH, custom_objects=custom_objects, compile=False)
loaded_model.compile(
    optimizer=tf.keras.optimizers.AdamW(1e-4, weight_decay=1e-5),
    loss=weighted_bce,
    metrics=[
        tf.keras.metrics.BinaryAccuracy(name="acc", threshold=0.5),
        tf.keras.metrics.AUC(name="auc", multi_label=True),
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall"),
    ],
)
print("✅ MobileViT model loaded successfully!")

print("\n🔬 Evaluating the final MobileViT on the unseen test set...")
test_results = loaded_model.evaluate(test_ds)

print("\n--- Final Test Set Evaluation Results (MobileViT) ---")
for metric, value in zip(loaded_model.metrics_names, test_results):
    print(f"{metric}: {value:.4f}")


✅ MobileViT model loaded successfully!

🔬 Evaluating the final MobileViT on the unseen test set...
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m317s[0m 13s/step - acc: 0.9497 - auc: 0.9818 - loss: 0.5092 - precision: 0.8216 - recall: 0.9372

--- Final Test Set Evaluation Results (MobileViT) ---
loss: 0.5316
compile_metrics: 0.9447
