# MobileNetV2-style image classifier — training pipeline
- Loads processed index.csv / meta.json
- Builds tf.data pipelines with light augmentation
- Defines a custom MobileNetV2 implementation (random initialization)
- Trains with proper regularization and callbacks
- Evaluates on the test set and saves artifacts

In [2]:
import os, json
from pathlib import Path
from collections import Counter

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
# ----------------------------
# Reproducibility & device setup
# ----------------------------
SEED = 42
tf.keras.utils.set_random_seed(SEED)
try:
    tf.config.experimental.enable_op_determinism()
except Exception:
    pass  # not available on all builds

In [5]:
# ----------------------------
# Paths & configuration
# ----------------------------
INDEX_CSV = "..\..\data\processed\FC211042_Heshani\index.csv"
BASE_DIR  = Path(INDEX_CSV).parent
META_JSON = BASE_DIR / "meta.json"

IMG_SIZE    = 224
BATCH_SIZE  = 32    # tune based on CPU memory
NUM_EPOCHS  = 20
LABEL_SMOOTH = 0.05
L2_WEIGHT    = 1e-4
ALPHA        = 1.0  # width multiplier (e.g., 0.75 / 1.0 / 1.25)

REPORTS_DIR = Path.cwd() / "reports_custom_mobilenetv2"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

  INDEX_CSV = "..\..\data\processed\FC211042_Heshani\index.csv"


In [6]:
import pandas as pd

df = pd.read_csv(INDEX_CSV)
print("Columns in CSV:", list(df.columns))
print(df.head(3))


Columns in CSV: ['split', 'label', 'path']
   split  label                                               path
0  train  happy  ..\..\data\processed\FC211042_Heshani\train\ha...
1  train  angry  ..\..\data\processed\FC211042_Heshani\train\an...
2  train  happy  ..\..\data\processed\FC211042_Heshani\train\ha...


In [30]:
# ----------------------------
# Load index & metadata (robust path normalization)
# ----------------------------
import re, json
from pathlib import Path
from collections import Counter

# Read CSV once
df = pd.read_csv(INDEX_CSV)

# 1) Standardize expected column names
if "path" in df.columns and "filepath" not in df.columns:
    df = df.rename(columns={"path": "filepath"})

# 2) Normalize split names
df["split"] = (
    df["split"]
    .astype(str).str.strip().str.lower()
    .replace({"validation": "val", "valid": "val", "dev": "val"})
)

# 3) Robustly convert relative/messy paths to absolute, normalized paths
index_dir = Path(INDEX_CSV).parent

def resolve_filepath(p_str: str) -> str:
    # Normalize separators and collapse repeats
    p_norm = str(p_str).replace("\\", "/")
    p_norm = re.sub(r"/+", "/", p_norm)
    # Fix common duplication like "/data/data/"
    p_norm = p_norm.replace("/data/data/", "/data/")
    # Remove accidental "./"
    p_norm = re.sub(r"(^|/)\\.(/|$)", r"\1", p_norm)

    p = Path(p_norm)

    # If already absolute and exists, return as-is
    if p.is_absolute() and p.exists():
        return p.as_posix()

    # Try relative to the CSV's folder
    cand = (index_dir / p).resolve()
    if cand.exists():
        return cand.as_posix()

    # Try a few parent bases (repo root patterns)
    bases = [index_dir.parent, index_dir.parent.parent, index_dir.parent.parent.parent]
    for base in bases:
        cand = (base / p).resolve()
        if cand.exists():
            return cand.as_posix()

    # Also try removing leading ../ segments and re-joining
    p_clean = Path(re.sub(r"^(\.\./)+", "", p_norm))
    for base in [index_dir] + bases:
        cand = (base / p_clean).resolve()
        if cand.exists():
            return cand.as_posix()

    # Fall back to best guess (so we can log it later)
    return (index_dir / p).resolve().as_posix()

# Build absolute, normalized filepath column
assert "filepath" in df.columns, "index.csv must contain a 'path' or 'filepath' column"
df["filepath"] = df["filepath"].apply(resolve_filepath)

# 4) Sanity check files exist (warn if any missing; hard-fail if too many)
missing = [fp for fp in df["filepath"].tolist() if not Path(fp).exists()]
if missing:
    print(f"WARNING: {len(missing)} file(s) not found. First few:")
    for m in missing[:5]:
        print("  ", m)

missing_ratio = len(missing) / max(1, len(df))
assert missing_ratio < 0.01, (
    f"Too many missing files ({len(missing)} of {len(df)}). "
    f"Please verify path roots in index.csv."
)

# 5) Final schema assert
assert {"filepath","label","split"}.issubset(df.columns), "index.csv must contain filepath,label,split"

# ----------------------------
# Label mapping and class weights
# ----------------------------
# Consistent, sorted label mapping
classes = sorted(df["label"].unique().tolist())
class2id = {c: i for i, c in enumerate(classes)}
id2class = {i: c for c, i in class2id.items()}

if "label_id" not in df.columns:
    df["label_id"] = df["label"].map(class2id).astype(int)
NUM_CLASSES = len(classes)

# Optional: class weights from meta.json
class_weights = None
if META_JSON.exists():
    try:
        meta = json.load(open(META_JSON, "r", encoding="utf-8"))
        if "class_weights" in meta and isinstance(meta["class_weights"], dict):
            class_weights = {int(k): float(v) for k, v in meta["class_weights"].items()}
    except Exception:
        pass

# Splits
train_df = df[df["split"] == "train"].reset_index(drop=True)
val_df   = df[df["split"] == "val"].reset_index(drop=True)
test_df  = df[df["split"] == "test"].reset_index(drop=True)

# Fallback: compute class weights from train split
if class_weights is None and len(train_df) > 0:
    counts = Counter(train_df["label_id"].tolist())
    total  = sum(counts.values())
    class_weights = {i: total / (NUM_CLASSES * counts.get(i, 1)) for i in range(NUM_CLASSES)}

print(f"Classes: {classes}")
print(f"Splits → train: {len(train_df)}, val: {len(val_df)}, test: {len(test_df)}")
print("Class weights:", class_weights)


Classes: ['angry', 'happy', 'neutral', 'sad', 'surprise']
Splits → train: 21758, val: 2418, test: 6043
Class weights: {0: 1.2101223581757508, 1: 0.6701986754966888, 2: 1.001058201058201, 3: 1.5247372109320252, 4: 0.9739480752014325}


In [None]:
# ----------------------------
# Data pipeline
# ----------------------------
AUTOTUNE = tf.data.AUTOTUNE

def decode_and_resize(path: tf.Tensor) -> tf.Tensor:
    """Reads an image file, decodes, resizes to IMG_SIZE, returns float32 in [0,1]."""
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE), method="bicubic")
    img = tf.clip_by_value(img, 0, 255)
    img = tf.cast(img, tf.float32) / 255.0
    return img

@tf.function
def make_example(path, label_id):
    img = decode_and_resize(path)
    return img, tf.cast(label_id, tf.int32)

# Light, CPU-friendly augmentation; keep ranges conservative
augmenter = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.10),
    layers.RandomTranslation(0.05, 0.05),
    layers.RandomContrast(0.10),
], name="augment")

def make_ds(frame: pd.DataFrame, shuffle: bool, augment: bool) -> tf.data.Dataset:
    paths  = frame["filepath"].astype(str).tolist()
    labels = frame["label_id"].astype(int).tolist()
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths), seed=SEED, reshuffle_each_iteration=True)
    ds = ds.map(make_example, num_parallel_calls=AUTOTUNE)
    if augment:
        ds = ds.map(lambda x,y: (augmenter(x, training=True), y), num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds

train_df = df[df["split"]=="train"].reset_index(drop=True)
val_df   = df[df["split"]=="val"].reset_index(drop=True)
test_df  = df[df["split"]=="test"].reset_index(drop=True)                                                                                                    

train_ds = make_ds(train_df, shuffle=True,  augment=True)
val_ds   = make_ds(val_df,   shuffle=False, augment=False)
test_ds  = make_ds(test_df,  shuffle=False, augment=False)

# Convert to one-hot for categorical cross-entropy
def to_one_hot(ds: tf.data.Dataset, n: int) -> tf.data.Dataset:
    return ds.map(lambda x,y: (x, tf.one_hot(y, n)), num_parallel_calls=AUTOTUNE)

train_ds_oh = to_one_hot(train_ds, NUM_CLASSES)
val_ds_oh   = to_one_hot(val_ds,   NUM_CLASSES)
test_ds_oh  = to_one_hot(test_ds,  NUM_CLASSES)

In [32]:
# ----------------------------
# Custom MobileNetV2 components (random initialization)
# ----------------------------
def relu6(x): return layers.ReLU(max_value=6.0)(x)

def conv_bn_relu(x, filters, k, s, name):
    x = layers.Conv2D(filters, k, strides=s, padding="same", use_bias=False,
                      kernel_regularizer=keras.regularizers.l2(L2_WEIGHT),
                      name=f"{name}_conv")(x)
    x = layers.BatchNormalization(name=f"{name}_bn")(x)
    x = relu6(x)
    return x

def inverted_res_block(x, t, out_channels, stride, block_id):
    """
    Inverted residual block: 1x1 expand (ReLU6) → 3x3 depthwise (stride) (ReLU6) → 1x1 project (linear).
    Residual connection if stride==1 and input/output channels match.
    """
    in_ch   = int(x.shape[-1])
    prefix  = f"block_{block_id}"

    # Expand
    if t != 1:
        ex = layers.Conv2D(int(in_ch * t), 1, padding="same", use_bias=False,
                           kernel_regularizer=keras.regularizers.l2(L2_WEIGHT),
                           name=f"{prefix}_expand")(x)
        ex = layers.BatchNormalization(name=f"{prefix}_expand_bn")(ex)
        ex = relu6(ex)
    else:
        ex = x

    # Depthwise
    dw = layers.DepthwiseConv2D(3, strides=stride, padding="same", use_bias=False,
                                depthwise_regularizer=keras.regularizers.l2(L2_WEIGHT),
                                name=f"{prefix}_dw")(ex)
    dw = layers.BatchNormalization(name=f"{prefix}_dw_bn")(dw)
    dw = relu6(dw)

    # Project (linear)
    pj = layers.Conv2D(out_channels, 1, padding="same", use_bias=False,
                       kernel_regularizer=keras.regularizers.l2(L2_WEIGHT),
                       name=f"{prefix}_project")(dw)
    pj = layers.BatchNormalization(name=f"{prefix}_project_bn")(pj)

    # Residual if possible
    if stride == 1 and in_ch == out_channels:
        out = layers.Add(name=f"{prefix}_add")([x, pj])
    else:
        out = pj
    return out

def build_custom_mobilenetv2(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=NUM_CLASSES, alpha=ALPHA):
    """
    Custom MobileNetV2 implementation with width scaling.
    """
    def C(c):  # width scaling
        return max(8, int(c * alpha))

    inputs = keras.Input(shape=input_shape, name="input")

    # Stem: 3x3 conv, stride 2 → 112x112
    x = conv_bn_relu(inputs, C(32), 3, 2, name="stem")

    # (t, c, n, s): expansion, output channels, repeats, stride of first block
    cfg = [
        (1,   16, 1, 1),
        (6,   24, 2, 2),  # ↓ 112→56
        (6,   32, 3, 2),  # ↓ 56→28
        (6,   64, 4, 2),  # ↓ 28→14
        (6,   96, 3, 1),  # keep 14
        (6,  160, 3, 2),  # ↓ 14→7
        (6,  320, 1, 1),  # keep 7
    ]

    block_id = 1
    for t, c, n, s in cfg:
        out_c = C(c)
        x = inverted_res_block(x, t=t, out_channels=out_c, stride=s, block_id=block_id)
        block_id += 1
        for _ in range(n - 1):
            x = inverted_res_block(x, t=t, out_channels=out_c, stride=1, block_id=block_id)
            block_id += 1

    # Last 1x1 conv to 1280 (scaled if alpha > 1.0, per convention)
    last_channels = 1280 if alpha <= 1.0 else int(1280 * alpha)
    x = conv_bn_relu(x, last_channels, 1, 1, name="last")

    # Head: global average pooling → dropout → dense
    x = layers.GlobalAveragePooling2D(name="global_avg_pool")(x)
    x = layers.Dropout(0.20, name="pre_head_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x)

    model = keras.Model(inputs, outputs, name=f"CustomMobileNetV2_a{alpha}")
    return model

model = build_custom_mobilenetv2()
model.summary(line_length=120)

# ----------------------------
# Optimization, loss, metrics
# ----------------------------
steps_per_epoch = max(1, len(train_df) // BATCH_SIZE)

# Cosine decay with restarts (works well for mobile-size nets)
lr_schedule = keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=1e-3,
    decay_steps=steps_per_epoch * 18,  # ~90% of 20 epochs
    alpha=1e-4
)
optimizer = keras.optimizers.Adam(lr_schedule)

loss      = keras.losses.CategoricalCrossentropy(label_smoothing=LABEL_SMOOTH)
metrics   = [
    keras.metrics.CategoricalAccuracy(name="accuracy"),
    keras.metrics.TopKCategoricalAccuracy(k=3, name="top3")
]

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

# ----------------------------
# Callbacks
# ----------------------------
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath=str(REPORTS_DIR / "best_model.keras"),
        monitor="val_accuracy", mode="max",
        save_best_only=True, verbose=1
    ),
    keras.callbacks.EarlyStopping(
        monitor="val_accuracy", mode="max",
        patience=5, restore_best_weights=True, verbose=1
    ),
    keras.callbacks.CSVLogger(str(REPORTS_DIR / "history.csv")),
]

In [33]:
# ----------------------------
# Train
# ----------------------------
history = model.fit(
    train_ds_oh,
    validation_data=val_ds_oh,
    epochs=NUM_EPOCHS,
    class_weight=class_weights,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/20
[1m392/680[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m13:05[0m 3s/step - accuracy: 0.2317 - loss: 2.2377 - top3: 0.6646

KeyboardInterrupt: 