In [None]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    try:
        tf.config.experimental.set_memory_growth(gpu, True)
    except:
        pass
print("TF version:", tf.__version__)
print("GPUs:", gpus)

TF version: 2.18.0
GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
import numpy as np
import random

SEED = 1337
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

ROOT_DIR = r"/kaggle/input/sysu-ceus-fll/SYSU-CEUS-FLL"

CFG = {
    "binary": True,  # True = gộp HEM+FNH thành HHE (2-class), False = 3-class
    "split_ratio": (0.7, 0.15, 0.15),
    "frames": 2,  # Giảm xuống 2 frames để tiết kiệm memory tối đa
    "img_size": 32,  # Giảm xuống 32x32 để tránh kernel crash
    "batch_size": 2,  # Batch size nhỏ nhất để tránh OOM
    "epochs_head": 1,  # Giảm xuống 1 epoch cho head training
    "epochs_ft": 1,  # Giảm xuống 1 epoch cho fine-tuning
    "lr_head": 5e-3,  # Tăng learning rate để hội tụ nhanh hơn
    "lr_ft": 1e-3,  # Tăng learning rate cho fine-tuning
    "weight_decay": 1e-6,  # Giảm weight decay
    "shuffle_buffer": 4,  # Giảm buffer size xuống tối thiểu
    "num_parallel_calls": 1,  # Chỉ 1 thread để tránh memory spike
    "cache_ds": False, # TẮT dataset caching để tiết kiệm RAM
}

In [None]:
from typing import List, Tuple, Dict
from pathlib import Path


def list_videos(root: str) -> List[Tuple[str, str]]:
    rootp = Path(root)
    items = []
    for cls_dir in sorted([d for d in rootp.iterdir() if d.is_dir()]):
        for vid in cls_dir.glob("*.avi"):
            items.append((str(vid), cls_dir.name))
    return items

def make_split(items: List[Tuple[str,str]], ratios=(0.7,0.15,0.15), seed=SEED):
    rnd = random.Random(seed)
    items = items.copy()
    rnd.shuffle(items)
    n = len(items)
    n_tr = int(n*ratios[0]); n_va = int(n*ratios[1])
    return items[:n_tr], items[n_tr:n_tr+n_va], items[n_tr+n_va:]

def label_map_from_items(items: List[Tuple[str,str]], binary=False) -> Dict[str,int]:
    classes = sorted({c for _,c in items})
    if binary:
        merged = []
        for c in classes:
            cu = c.upper()
            if cu in ["HEM","FNH"]:
                merged.append("HHE")
            elif cu == "HCC":
                merged.append("HCC")
            else:
                merged.append(c)
        # Ưu tiên HCC trước HHE
        classes = sorted(set(merged), key=lambda x: 0 if x=="HCC" else 1)
    return {c:i for i,c in enumerate(classes)}

def remap_label(label: str, binary: bool) -> str:
    return "HHE" if (binary and label.upper() in ["HEM","FNH"]) else ("HCC" if label.upper()=="HCC" else label)

all_items = list_videos(ROOT_DIR)
print("Tổng:", len(all_items))

train_items, val_items, test_items = make_split(all_items, CFG["split_ratio"], SEED)
label_map = label_map_from_items(all_items, CFG["binary"])
inv_labels = [k for k,v in sorted(label_map.items(), key=lambda kv: kv[1])]
print("Classes:", inv_labels)
print("Train/Val/Test:", len(train_items), len(val_items), len(test_items))


Tổng: 358
Classes: ['HCC', 'HHE']
Train/Val/Test: 250 53 55


In [None]:
import cv2


def uniform_indices(n_frames: int, T: int):
    if n_frames <= 0:
        return [0]*T
    if n_frames >= T:
        return [int(round(i*(n_frames-1)/(T-1))) for i in range(T)]
    base = list(range(n_frames))
    out = []
    while len(out) < T:
        out.extend(base)
    return out[:T]

def read_avi_as_rgb(path: str) -> np.ndarray:
    """Đọc toàn bộ khung hình dưới dạng RGB uint8, shape (N,H,W,3)."""
    cap = cv2.VideoCapture(path)
    frames = []
    if not cap.isOpened():
        return np.zeros((0,224,224,3), dtype=np.uint8)
    while True:
        ok, frame = cap.read()
        if not ok:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    cap.release()
    if not frames:
        return np.zeros((0,224,224,3), dtype=np.uint8)
    return np.stack(frames, axis=0)

def sample_preprocess_clip(path: str, T: int, img_size: int) -> np.ndarray:
    vid = read_avi_as_rgb(path)  # (N,H,W,3)
    N = vid.shape[0]
    idxs = uniform_indices(N, T)
    sel = vid[idxs] if N>0 else np.zeros((T, img_size, img_size, 3), dtype=np.uint8)
    sel_resized = np.stack([cv2.resize(f, (img_size, img_size)) for f in sel], axis=0)
    return sel_resized  # uint8


In [None]:
from tensorflow.keras.applications import resnet50, efficientnet, mobilenet_v2

def get_preprocess_fn(arch: str):
    if arch == "resnet50_lstm":
        return resnet50.preprocess_input
    elif arch == "efficientnet_b0_gru":
        return efficientnet.preprocess_input
    elif arch == "mobilenet_v2_gru":
        return mobilenet_v2.preprocess_input
    elif arch == "tiny_cnn_gru":
        # Simple normalization for custom CNN
        return lambda x: (x / 255.0 - 0.5) * 2.0
    else:
        raise ValueError("Unknown arch")

def make_examples(items: List[Tuple[str,str]], binary: bool) -> List[Tuple[str,int]]:
    ex = []
    for path, cls in items:
        ex.append((path, label_map[remap_label(cls, binary)]))
    return ex

def gen_examples(examples: List[Tuple[str,int]], T: int, img_size: int, arch: str):
    preprocess_input = get_preprocess_fn(arch)
    for path, y in examples:
        x = sample_preprocess_clip(path, T, img_size)        # (T,H,W,3) uint8
        x = preprocess_input(x.astype(np.float32))           # theo backbone
        yield x, y

def make_dataset(items, binary, T, img_size, arch, batch_size, shuffle=False, repeat=False, cache=False):
    examples = make_examples(items, binary)
    output_sig = (
        tf.TensorSpec(shape=(T, img_size, img_size, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int32)
    )
    ds = tf.data.Dataset.from_generator(
        lambda: gen_examples(examples, T, img_size, arch),
        output_signature=output_sig
    )
    if shuffle:
        ds = ds.shuffle(CFG["shuffle_buffer"], seed=SEED, reshuffle_each_iteration=True)
    if cache:
        ds = ds.cache()
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    if repeat:
        ds = ds.repeat()
    return ds, len(examples)


## ResNet50→LSTM

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers


def build_resnet50_lstm(T, img_size, num_classes, freeze_backbone=True, rnn_units=256, bidir=False, dropout=0.3): # Reduced RNN units and disabled bidirectional
    inp = tf.keras.layers.Input(shape=(T, img_size, img_size, 3))
    base = tf.keras.applications.ResNet50(include_top=False, weights="imagenet", pooling='avg',
                                          input_shape=(img_size, img_size, 3))
    base.trainable = not freeze_backbone
    x = layers.TimeDistributed(base)(inp)
    # Add feature reduction to speed up RNN processing
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    if bidir:
        x = layers.Bidirectional(layers.LSTM(rnn_units, dropout=dropout, return_sequences=False))(x)
    else:
        x = layers.LSTM(rnn_units, dropout=dropout, return_sequences=False)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inp, out, name="ResNet50_LSTM")
    return model, base

## EfficientNetB0→GRU

In [None]:
from tensorflow.keras import layers

def build_efficientnet_b0_gru(T, img_size, num_classes, freeze_backbone=True, rnn_units=192, bidir=False, dropout=0.3): # Reduced units and disabled bidirectional
    inp = tf.keras.layers.Input(shape=(T, img_size, img_size, 3))
    base = tf.keras.applications.EfficientNetB0(include_top=False, weights="imagenet", pooling='avg',
                                                input_shape=(img_size, img_size, 3))
    base.trainable = not freeze_backbone
    x = layers.TimeDistributed(base)(inp)        # (B,T,1280)
    # Add feature reduction to speed up RNN processing
    x = layers.Dense(384, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    if bidir:
        x = layers.Bidirectional(layers.GRU(rnn_units, dropout=dropout, return_sequences=False))(x)
    else:
        x = layers.GRU(rnn_units, dropout=dropout, return_sequences=False)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inp, out, name="EffB0_GRU")
    return model, base

# Add lightweight MobileNetV2 model for even faster training
def build_mobilenet_v2_gru(T, img_size, num_classes, freeze_backbone=True, rnn_units=128, bidir=False, dropout=0.3):
    inp = tf.keras.layers.Input(shape=(T, img_size, img_size, 3))
    base = tf.keras.applications.MobileNetV2(include_top=False, weights="imagenet", pooling='avg',
                                            input_shape=(img_size, img_size, 3))
    base.trainable = not freeze_backbone
    x = layers.TimeDistributed(base)(inp)        # (B,T,1280)
    # Feature reduction for faster processing
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    if bidir:
        x = layers.Bidirectional(layers.GRU(rnn_units, dropout=dropout, return_sequences=False))(x)
    else:
        x = layers.GRU(rnn_units, dropout=dropout, return_sequences=False)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inp, out, name="MobileNetV2_GRU")
    return model, base

# Add ultra-lightweight custom CNN model for maximum speed
def build_tiny_cnn_gru(T, img_size, num_classes, freeze_backbone=True, rnn_units=64, bidir=False, dropout=0.2):
    inp = tf.keras.layers.Input(shape=(T, img_size, img_size, 3))

    # Ultra-lightweight CNN backbone instead of pretrained models
    def tiny_cnn_block():
        model = tf.keras.Sequential([
            layers.Conv2D(16, 3, padding='same', activation='relu'),
            layers.MaxPooling2D(2),
            layers.Conv2D(32, 3, padding='same', activation='relu'),
            layers.MaxPooling2D(2),
            layers.Conv2D(64, 3, padding='same', activation='relu'),
            layers.GlobalAveragePooling2D()
        ])
        return model

    base = tiny_cnn_block()
    x = layers.TimeDistributed(base)(inp)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    x = layers.GRU(rnn_units, dropout=dropout, return_sequences=False)(x)
    out = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inp, out, name="TinyCNN_GRU")
    return model, base

In [None]:
def build_model(arch, T, img_size, num_classes, freeze_backbone=True):
    if arch == "resnet50_lstm":
        return build_resnet50_lstm(T, img_size, num_classes, freeze_backbone=freeze_backbone)
    elif arch == "efficientnet_b0_gru":
        return build_efficientnet_b0_gru(T, img_size, num_classes, freeze_backbone=freeze_backbone)
    elif arch == "mobilenet_v2_gru":
        return build_mobilenet_v2_gru(T, img_size, num_classes, freeze_backbone=freeze_backbone)
    elif arch == "tiny_cnn_gru":
        return build_tiny_cnn_gru(T, img_size, num_classes, freeze_backbone=freeze_backbone)
    else:
        raise ValueError("Unknown arch")

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tensorflow.keras import callbacks


def compute_class_weights(items, binary=False):
    counts = {}
    for _,c in items:
        name = remap_label(c, binary)
        counts[name] = counts.get(name, 0) + 1
    total = sum(counts.values())
    classes = [k for v,k in sorted(label_map.items(), key=lambda kv: kv[1])]
    weights = {label_map[c]: total/(len(counts)*counts[c]) for c in classes}
    return weights, counts

class ConsoleMetrics(callbacks.Callback):
    def __init__(self, val_ds, y_val_true, label_names):
        super().__init__()
        self.val_ds = val_ds
        self.y_true = y_val_true
        self.label_names = label_names
    def on_epoch_end(self, epoch, logs=None):
        y_prob = self.model.predict(self.val_ds, verbose=0)
        y_pred = np.argmax(y_prob, axis=1)
        acc = accuracy_score(self.y_true, y_pred)
        p,r,f,_ = precision_recall_fscore_support(self.y_true, y_pred, average='weighted', zero_division=0)
        print(f"  >> [VAL] acc={acc:.4f} | f1={f:.4f}")

In [None]:
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import time
from tensorflow.keras import optimizers


def train_and_evaluate(arch="efficientnet_b0_gru", cfg=CFG, save_best="best_tf_A.h5"):
    train_ds, n_train = make_dataset(train_items, cfg["binary"], cfg["frames"], cfg["img_size"], arch,
                                     cfg["batch_size"], shuffle=True, cache=cfg["cache_ds"])
    val_ds,   n_val   = make_dataset(val_items,   cfg["binary"], cfg["frames"], cfg["img_size"], arch,
                                     cfg["batch_size"], shuffle=False, cache=cfg["cache_ds"])
    test_ds,  n_test  = make_dataset(test_items,  cfg["binary"], cfg["frames"], cfg["img_size"], arch,
                                     cfg["batch_size"], shuffle=False, cache=False)

    num_classes = len(inv_labels)

    y_val_true = np.array([label_map[remap_label(c, cfg["binary"])] for _,c in val_items])
    y_test_true= np.array([label_map[remap_label(c, cfg["binary"])] for _,c in test_items])

    # Model (giai đoạn 1: freeze backbone)
    model, base = build_model(arch, cfg["frames"], cfg["img_size"], num_classes, freeze_backbone=True)

    # Optimizer AdamW nếu có (TF 2.16 có sẵn), fallback Adam
    try:
        opt_head = optimizers.experimental.AdamW(learning_rate=cfg["lr_head"], weight_decay=cfg["weight_decay"])
        opt_ft   = optimizers.experimental.AdamW(learning_rate=cfg["lr_ft"],  weight_decay=cfg["weight_decay"])
    except:
        opt_head = optimizers.Adam(learning_rate=cfg["lr_head"])
        opt_ft   = optimizers.Adam(learning_rate=cfg["lr_ft"])

    model.compile(optimizer=opt_head, loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    # class weights
    class_weights = None
    if not cfg["binary"]:
        cw, counts = compute_class_weights(all_items, binary=cfg["binary"])
        class_weights = cw
        print("Class counts:", counts)
        print("Class weights:", class_weights)

    cb = [
        callbacks.ModelCheckpoint(save_best, monitor="val_accuracy", save_best_only=True, save_weights_only=False, verbose=1),
    ]

    print(f"===> Phase 1: Train head (freeze backbone) | epochs={cfg['epochs_head']}")
    hist1 = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=cfg["epochs_head"],
        class_weight=class_weights,
        verbose=1,
        callbacks=cb
    )

    # Phase 2: unfreeze một phần (hoặc toàn bộ) backbone & fine-tune
    base.trainable = True
    model.compile(optimizer=opt_ft, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    print(f"===> Phase 2: Fine-tune backbone | epochs={cfg['epochs_ft']}")
    hist2 = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=cfg["epochs_ft"],
        class_weight=class_weights,
        verbose=1,
        callbacks=cb
    )

    best_model = tf.keras.models.load_model(save_best)
    t0 = time.time()
    y_prob = best_model.predict(test_ds, verbose=0)
    infer_time = (time.time() - t0)
    y_pred = np.argmax(y_prob, axis=1)

    acc = accuracy_score(y_test_true, y_pred)
    p,r,f,_ = precision_recall_fscore_support(y_test_true, y_pred, average='weighted', zero_division=0)
    cm = confusion_matrix(y_test_true, y_pred)
    report = classification_report(y_test_true, y_pred, target_names=inv_labels, digits=4, zero_division=0)

    ms_per_vid = 1000.0 * infer_time / max(1, len(test_items))
    print(f"\n===== TEST ({arch}) =====")
    print(f"[TEST] acc={acc:.4f} | prec={p:.4f} | rec={r:.4f} | f1={f:.4f} | infer={ms_per_vid:.1f} ms/vid")
    print("Classification report:\n", report)
    print("Confusion matrix:")
    print(cm)

    return {
        "acc": acc, "prec": p, "rec": r, "f1": f, "cm": cm, "ms_per_vid": ms_per_vid,
        "labels": inv_labels, "best_path": save_best
    }

In [None]:
CFG["binary"] = True          # đổi False để chạy 3-class
CFG["frames"] = 3
CFG["img_size"] = 48
CFG["batch_size"] = 4
CFG["epochs_head"] = 1
CFG["epochs_ft"] = 2

res_A2 = train_and_evaluate(
    arch="efficientnet_b0_gru",
    cfg=CFG,
    save_best="best_A2_efficientnet_b0_gru.h5"
)


===> Phase 1: Train head (freeze backbone) | epochs=3
Epoch 1/3


E0000 00:00:1759485917.680845      36 meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/EffB0_GRU_1/time_distributed_1_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


In [None]:
CFG_A1 = CFG.copy()
CFG_A1["batch_size"] = 2

res_A1 = train_and_evaluate(
    arch="resnet50_lstm",
    cfg=CFG_A1,
    save_best="best_A1_resnet50_lstm.h5"
)

In [None]:
# New lightweight configuration for fastest training
CFG_FAST = CFG.copy()
CFG_FAST["frames"] = 6  # Even fewer frames
CFG_FAST["img_size"] = 96  # Smaller image size
CFG_FAST["batch_size"] = 6  # Larger batch size
CFG_FAST["epochs_head"] = 1  # Minimal head training
CFG_FAST["epochs_ft"] = 4  # Reduced fine-tuning

res_A3 = train_and_evaluate(
    arch="mobilenet_v2_gru",
    cfg=CFG_FAST,
    save_best="best_A3_mobilenet_v2_gru.h5"
)

In [None]:
# Ultra-minimal configuration for absolute fastest training
CFG_ULTRA_FAST = {
    "binary": True,
    "split_ratio": (0.7, 0.15, 0.15),
    "frames": 3,  # Cực kỳ ít frames - chỉ 3 frame
    "img_size": 48,  # Siêu nhỏ - chỉ 48x48 pixels
    "batch_size": 12,  # Batch size lớn nhất có thể
    "epochs_head": 1,  # Chỉ 1 epoch
    "epochs_ft": 2,  # Chỉ 2 epochs fine-tuning
    "lr_head": 3e-3,  # Learning rate cao để hội tụ nhanh
    "lr_ft": 8e-4,
    "weight_decay": 1e-6,
    "shuffle_buffer": 8,  # Buffer cực nhỏ
    "num_parallel_calls": tf.data.AUTOTUNE,
    "cache_ds": True,
}

print("🚀 TRAINING SIÊU NHANH với TinyCNN - Cấu hình tối giản nhất!")
res_ULTRA = train_and_evaluate(
    arch="tiny_cnn_gru",
    cfg=CFG_ULTRA_FAST,
    save_best="best_ULTRA_tiny_cnn.h5"
)

In [None]:
# CHỈ CHẠY MỘT MÔ HÌNH DUY NHẤT - NHANH NHẤT
print("🚀 CHỈ CHẠY TinyCNN - NHANH NHẤT VÀ ÍT MEMORY NHẤT!")

# Cấu hình tối giản tuyệt đối cho memory thấp
CFG_MINIMAL = {
    "binary": True,
    "split_ratio": (0.7, 0.15, 0.15),
    "frames": 2,  # CHỈ 2 frames - cực kỳ ít
    "img_size": 32,  # CHỈ 32x32 pixels - siêu nhỏ
    "batch_size": 2,  # Batch size nhỏ nhất
    "epochs_head": 1,  # CHỈ 1 epoch
    "epochs_ft": 1,   # CHỈ 1 epoch fine-tuning
    "lr_head": 5e-3,  # Learning rate cao
    "lr_ft": 1e-3,
    "weight_decay": 1e-6,
    "shuffle_buffer": 4,  # Buffer cực nhỏ
    "num_parallel_calls": 1,  # Chỉ 1 thread
    "cache_ds": False, # TẮT caching
}

res_MINIMAL = train_and_evaluate(
    arch="tiny_cnn_gru",
    cfg=CFG_MINIMAL,
    save_best="best_MINIMAL_tiny_cnn.h5"
)
