In [1]:
# Full fixed script. Set DATA_DIR at top and run.
import os
import math
import time
import random
from pathlib import Path
import numpy as np
import cv2
import albumentations as A
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB4

# ---------------------
# USER CONFIG - EDIT THIS
# ---------------------
DATA_DIR = "/kaggle/input/eye-diseases-classification/dataset"  # <<-- set your dataset root
OUTPUT_DIR = "/kaggle/working"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, "artifacts"), exist_ok=True)

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

IMG_SIZE = (380, 380)
BATCH_SIZE = 16
WARMUP_EPOCHS = 5
TOTAL_EPOCHS = 80
INITIAL_LR = 3e-4
WEIGHT_DECAY = 1e-5
PATIENCE_ES = 12
PATIENCE_RLR = 6

USE_IMAGENET = True
USE_CLAHE = True
USE_MIXUP = True
MIXUP_ALPHA = 0.2
USE_CUTMIX = True
CUTMIX_ALPHA = 1.0
LABEL_SMOOTHING = 0.05
USE_FOCAL_LOSS = False
TTA_ROUNDS = 3

# ---------------------
# Utility: categorical focal loss (optional)
# ---------------------
def categorical_focal_loss(gamma=2.0, alpha=0.25):
    def loss_fn(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, keras.backend.epsilon(), 1.0 - keras.backend.epsilon())
        ce = -y_true * tf.math.log(y_pred)
        weight = alpha * tf.pow(1 - y_pred, gamma)
        loss = weight * ce
        return tf.reduce_sum(loss, axis=-1)
    return loss_fn

# ---------------------
# SE block
# ---------------------
class SEBlock(layers.Layer):
    def __init__(self, se_ratio=0.25, **kwargs):
        super().__init__(**kwargs)
        self.se_ratio = se_ratio
    def build(self, input_shape):
        channels = int(input_shape[-1])
        reduced = max(1, int(channels * self.se_ratio))
        self.gap = layers.GlobalAveragePooling2D()
        self.fc1 = layers.Dense(reduced, activation="relu", kernel_initializer="he_normal")
        self.fc2 = layers.Dense(channels, activation="sigmoid", kernel_initializer="he_normal")
        self.reshape = layers.Reshape((1,1,channels))
    def call(self, x):
        se = self.gap(x)
        se = self.fc1(se)
        se = self.fc2(se)
        se = self.reshape(se)
        return x * se

# ---------------------
# Preprocessor
# ---------------------
class Preprocessor:
    def __init__(self, img_size=(380,380), use_clahe=True):
        self.img_size = img_size
        self.use_clahe = use_clahe
        if use_clahe:
            try:
                self.clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            except Exception:
                self.clahe = None
        self.train_aug = A.Compose([
            A.Resize(img_size[0], img_size[1]),
            A.RandomRotate90(p=0.15),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.1),
            A.OneOf([A.RandomBrightnessContrast(p=1.0), A.HueSaturationValue(p=1.0)], p=0.6),
            A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.15, rotate_limit=15, p=0.6),
            A.OneOf([A.GaussNoise(), A.ISONoise()], p=0.2),
            A.OneOf([A.Blur(3), A.GaussianBlur(3)], p=0.2),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225))
        ])
        self.val_aug = A.Compose([
            A.Resize(img_size[0], img_size[1]),
            A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225))
        ])
    def apply_clahe(self, img):
        if self.clahe is None:
            return img
        lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
        lab[:,:,0] = self.clahe.apply(lab[:,:,0])
        return cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
    def preprocess(self, path_or_img, training=True):
        if isinstance(path_or_img, str):
            img = cv2.imread(path_or_img)
            if img is None:
                img = np.zeros((self.img_size[0], self.img_size[1], 3), dtype=np.uint8)
            else:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        else:
            img = path_or_img
        if self.use_clahe and img is not None:
            try:
                img = self.apply_clahe(img)
            except Exception:
                pass
        aug = self.train_aug if training else self.val_aug
        out = aug(image=img)['image']
        # albumentations returns float32 already for Normalize; ensure dtype
        return out.astype(np.float32)

# ---------------------
# Data generator with MixUp/CutMix
# ---------------------
class DataGenerator(keras.utils.Sequence):
    def __init__(self, filepaths, labels, batch_size, preprocessor, num_classes, shuffle=True,
                 mixup_prob=0.5, cutmix_prob=0.5, mixup_alpha=0.2, cutmix_alpha=1.0):
        super().__init__()
        self.filepaths = np.array(filepaths)
        self.labels = np.array(labels)
        self.batch_size = batch_size
        self.prep = preprocessor
        self.num_classes = num_classes
        self.indexes = np.arange(len(self.filepaths))
        self.shuffle = shuffle
        self.on_epoch_end()
        self.mixup_prob = mixup_prob
        self.cutmix_prob = cutmix_prob
        self.mixup_alpha = mixup_alpha
        self.cutmix_alpha = cutmix_alpha
    def __len__(self):
        return int(np.ceil(len(self.filepaths) / self.batch_size))
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)
    def _one_hot(self, idx):
        lab = np.zeros(self.num_classes, dtype=np.float32)
        lab[idx] = 1.0
        return lab
    def _mixup(self, x1, y1, x2, y2, alpha):
        lam = np.random.beta(alpha, alpha) if alpha > 0 else 1.0
        x = lam * x1 + (1 - lam) * x2
        y = lam * y1 + (1 - lam) * y2
        return x, y
    def _cutmix(self, x1, y1, x2, y2, alpha):
        H, W = x1.shape[:2]
        lam = np.random.beta(alpha, alpha) if alpha > 0 else 1.0
        cut_rat = math.sqrt(max(0.0, 1.0 - lam))
        cut_w = int(W * cut_rat)
        cut_h = int(H * cut_rat)
        cx = np.random.randint(0, W)
        cy = np.random.randint(0, H)
        x1_copy = x1.copy()
        x2_copy = x2.copy()
        x1_copy[max(0, cy - cut_h//2):max(0, cy - cut_h//2) + cut_h,
                max(0, cx - cut_w//2):max(0, cx - cut_w//2) + cut_w, :] = \
            x2_copy[max(0, cy - cut_h//2):max(0, cy - cut_h//2) + cut_h,
                    max(0, cx - cut_w//2):max(0, cx - cut_w//2) + cut_w, :]
        new_lam = 1.0 - (cut_w * cut_h) / (W * H) if (W*H)>0 else 1.0
        y = new_lam * y1 + (1.0 - new_lam) * y2
        return x1_copy, y
    def __getitem__(self, idx):
        start = idx * self.batch_size
        end = min((idx + 1) * self.batch_size, len(self.filepaths))
        batch_inds = self.indexes[start:end]
        bsize = len(batch_inds)
        X = np.zeros((bsize, IMG_SIZE[0], IMG_SIZE[1], 3), dtype=np.float32)
        Y = np.zeros((bsize, self.num_classes), dtype=np.float32)
        for i, ind in enumerate(batch_inds):
            img = self.prep.preprocess(self.filepaths[ind], training=True)
            lbl = self._one_hot(self.labels[ind])
            X[i] = img
            Y[i] = lbl
        if (USE_MIXUP or USE_CUTMIX) and bsize > 0:
            for i in range(bsize):
                p = np.random.rand()
                if len(self.filepaths) > 1:
                    # sample a different index
                    j = np.random.randint(0, len(self.filepaths))
                    # avoid same index; if same, allow but try a few times
                    tries = 0
                    while j == batch_inds[i] and tries < 5:
                        j = np.random.randint(0, len(self.filepaths))
                        tries += 1
                else:
                    j = batch_inds[i]
                x2 = self.prep.preprocess(self.filepaths[j], training=True)
                y2 = self._one_hot(self.labels[j])
                if USE_CUTMIX and p < self.cutmix_prob:
                    try:
                        X[i], Y[i] = self._cutmix(X[i], Y[i], x2, y2, CUTMIX_ALPHA)
                    except Exception:
                        X[i], Y[i] = self._mixup(X[i], Y[i], x2, y2, MIXUP_ALPHA)
                elif USE_MIXUP and p < self.mixup_prob:
                    X[i], Y[i] = self._mixup(X[i], Y[i], x2, y2, MIXUP_ALPHA)
        return X, Y

# ---------------------
# Load filepaths & labels
# ---------------------
def load_filepaths_labels(root):
    root = Path(root)
    if not root.exists():
        raise FileNotFoundError(f"DATA_DIR '{root}' does not exist. Update DATA_DIR.")
    class_dirs = sorted([d for d in root.iterdir() if d.is_dir()])
    if len(class_dirs) == 0:
        raise FileNotFoundError(f"No class subfolders found under {root}.")
    classes = [p.name for p in class_dirs]
    filepaths = []
    labels = []
    for idx, dirp in enumerate(class_dirs):
        for p in dirp.glob("*"):
            if p.suffix.lower() in (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"):
                filepaths.append(str(p))
                labels.append(idx)
    return filepaths, labels, classes

filepaths, labels, classes = load_filepaths_labels(DATA_DIR)
NUM_CLASSES = len(classes)
print("Classes:", classes)
print("Total images:", len(filepaths))

# Train/val/test split
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    filepaths, labels, test_size=0.30, random_state=SEED, stratify=labels)
val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths, temp_labels, test_size=0.50, random_state=SEED, stratify=temp_labels)

print("Train/Val/Test:", len(train_paths), len(val_paths), len(test_paths))

cw = compute_class_weight("balanced", classes=np.unique(train_labels), y=np.array(train_labels))
class_weights = {i: float(w) for i,w in enumerate(cw)}
print("class_weights:", class_weights)

# Generators
preproc = Preprocessor(img_size=IMG_SIZE, use_clahe=USE_CLAHE)
train_gen = DataGenerator(train_paths, train_labels, BATCH_SIZE, preproc, NUM_CLASSES,
                          shuffle=True,
                          mixup_prob=0.5 if USE_MIXUP else 0.0,
                          cutmix_prob=0.5 if USE_CUTMIX else 0.0,
                          mixup_alpha=MIXUP_ALPHA, cutmix_alpha=CUTMIX_ALPHA)
val_gen = DataGenerator(val_paths, val_labels, BATCH_SIZE, preproc, NUM_CLASSES, shuffle=False,
                        mixup_prob=0.0, cutmix_prob=0.0)

# ---------------------
# Build model
# ---------------------
def build_model(input_shape=(380,380,3), num_classes=4, dropout_rate=0.3):
    inputs = keras.Input(shape=input_shape)
    weights = "imagenet" if USE_IMAGENET else None
    try:
        base = EfficientNetB4(include_top=False, weights=weights, input_tensor=inputs)
    except Exception as e:
        # fallback: include_top=False with random init
        print("EfficientNetB4 weight load failed; using random init. Error:", e)
        base = EfficientNetB4(include_top=False, weights=None, input_tensor=inputs)
    x = base.output
    x = SEBlock(se_ratio=0.25)(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(512, activation="relu", kernel_initializer="he_normal")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate*0.5)(x)
    outputs = layers.Dense(num_classes, activation="softmax", dtype="float32")(x)
    model = keras.Model(inputs, outputs)
    return model, base

model, base = build_model(input_shape=(*IMG_SIZE,3), num_classes=NUM_CLASSES)
print("Model params:", model.count_params())

# ---------------------
# Loss and optimizer factory
# ---------------------
if USE_FOCAL_LOSS:
    loss_fn = categorical_focal_loss(gamma=2.0, alpha=0.25)
else:
    loss_fn = keras.losses.CategoricalCrossentropy(label_smoothing=LABEL_SMOOTHING)

def make_optimizer(lr):
    # pass float to optimizer
    return keras.optimizers.AdamW(learning_rate=float(lr), weight_decay=WEIGHT_DECAY)

steps_per_epoch = len(train_gen)
total_steps = max(1, steps_per_epoch * (TOTAL_EPOCHS - WARMUP_EPOCHS))

# ---------------------
# Callbacks, including WarmupCosine (updates optimizer.learning_rate)
# ---------------------
os.makedirs(os.path.join(OUTPUT_DIR, "artifacts"), exist_ok=True)

callbacks = [
    keras.callbacks.ModelCheckpoint(os.path.join(OUTPUT_DIR, "artifacts", "best_model.h5"),
                                    monitor="val_loss", save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=PATIENCE_ES, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=PATIENCE_RLR, verbose=1, min_lr=1e-7),
    keras.callbacks.CSVLogger(os.path.join(OUTPUT_DIR, "artifacts", "train_log.csv"))
]

class WarmupCosine(tf.keras.callbacks.Callback):
    def __init__(self, warmup_epochs, initial_lr, total_steps, steps_per_epoch):
        super().__init__()
        self.warmup_epochs = warmup_epochs
        self.initial_lr = float(initial_lr)
        self.total_steps = max(1, total_steps)
        self.steps_per_epoch = steps_per_epoch
    def on_train_begin(self, logs=None):
        self.step = 0
    def on_batch_begin(self, batch, logs=None):
        if self.step < self.warmup_epochs * self.steps_per_epoch:
            warmup_total = float(self.warmup_epochs * self.steps_per_epoch)
            lr = self.initial_lr * max(0.0, (self.step / warmup_total))
        else:
            t = (self.step - self.warmup_epochs * self.steps_per_epoch) / float(self.total_steps)
            t = min(1.0, max(0.0, t))
            lr = 0.5 * self.initial_lr * (1 + math.cos(math.pi * t))
        # Update optimizer learning rate safely
        try:
            tf.keras.backend.set_value(self.model.optimizer.learning_rate, lr if lr>0 else 1e-8)
        except Exception:
            try:
                self.model.optimizer.learning_rate.assign(lr if lr>0 else 1e-8)
            except Exception:
                pass
        self.step += 1
    def on_epoch_end(self, epoch, logs=None):
        try:
            current_lr = float(tf.keras.backend.get_value(self.model.optimizer.learning_rate))
        except Exception:
            current_lr = float(self.initial_lr)
        print(f"Epoch {epoch+1} lr={current_lr:.6e}")

callbacks.append(WarmupCosine(warmup_epochs=WARMUP_EPOCHS, initial_lr=INITIAL_LR,
                             total_steps=total_steps, steps_per_epoch=steps_per_epoch))

# ---------------------
# Phase 1: freeze base, train head
# ---------------------
for layer in base.layers:
    layer.trainable = False

optimizer = make_optimizer(INITIAL_LR)
model.compile(optimizer=optimizer, loss=loss_fn, metrics=["accuracy"])
print("Phase 1 (warmup head only) training...")
history1 = model.fit(train_gen, validation_data=val_gen, epochs=WARMUP_EPOCHS,
                     class_weight=class_weights, callbacks=callbacks, verbose=1)

# ---------------------
# Phase 2: unfreeze and fine-tune
# ---------------------
for layer in base.layers:
    layer.trainable = True

# re-create optimizer with lower LR for fine-tune (or reuse & assign)
optimizer2 = make_optimizer(INITIAL_LR * 0.5)
model.compile(optimizer=optimizer2, loss=loss_fn, metrics=["accuracy"])
print("Phase 2 (fine-tune full model) training...")
initial_epoch = history1.epoch[-1] + 1 if hasattr(history1, "epoch") and len(history1.epoch) else 0
history2 = model.fit(train_gen, validation_data=val_gen,
                     epochs=(TOTAL_EPOCHS - WARMUP_EPOCHS),
                     initial_epoch=initial_epoch,
                     class_weight=class_weights, callbacks=callbacks, verbose=1)

# Merge histories for simple analysis
history = history1
for k, v in history2.history.items():
    history.history.setdefault(k, []).extend(v)

# Save final model
final_path = os.path.join(OUTPUT_DIR, "artifacts", "efnb4_se_final.h5")
try:
    model.save(final_path)
    print("Saved final model to:", final_path)
except Exception as e:
    print("Model.save failed, saving weights only. Error:", e)
    model.save_weights(final_path + ".weights.h5")

# ---------------------
# TTA evaluate on test set
# ---------------------
def tta_predict(model, file_list, preprocessor, tta_rounds=3):
    preds = []
    for t in range(tta_rounds):
        batch_preds = []
        for i in range(0, len(file_list), BATCH_SIZE):
            batch_files = file_list[i:i+BATCH_SIZE]
            X = np.zeros((len(batch_files), IMG_SIZE[0], IMG_SIZE[1], 3), dtype=np.float32)
            for j, fp in enumerate(batch_files):
                X[j] = preprocessor.preprocess(fp, training=(t>0))
            p = model.predict(X, verbose=0)
            batch_preds.append(p)
        if len(batch_preds):
            batch_preds = np.vstack(batch_preds)
        else:
            batch_preds = np.zeros((0, NUM_CLASSES), dtype=np.float32)
        preds.append(batch_preds)
    if len(preds) == 0:
        return np.zeros((len(file_list), NUM_CLASSES), dtype=np.float32)
    return np.mean(preds, axis=0)

y_true = np.array(test_labels)
y_prob = tta_predict(model, test_paths, preproc, tta_rounds=TTA_ROUNDS)
y_pred = np.argmax(y_prob, axis=1)
acc = accuracy_score(y_true, y_pred)
print(f"\nTTA TEST Accuracy: {acc*100:.2f}%\n")
print(classification_report(y_true, y_pred, target_names=classes, digits=4))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))


2025-10-02 07:50:49.351033: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759391449.543066      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759391449.601811      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Classes: ['cataract', 'diabetic_retinopathy', 'glaucoma', 'normal']
Total images: 4217
Train/Val/Test: 2951 633 633
class_weights: {0: 1.0161845730027548, 1: 0.9606119791666666, 2: 1.04645390070922, 3: 0.9810505319148937}


  original_init(self, **validated_kwargs)
I0000 00:00:1759391461.246600      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1759391461.247336      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
[1m71686520/71686520[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Model params: 20203811
Phase 1 (warmup head only) training...
Epoch 1/5


I0000 00:00:1759391499.895731     101 service.cc:148] XLA service 0x7e830801c8f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1759391499.896421     101 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1759391499.896441     101 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1759391503.794441     101 cuda_dnn.cc:529] Loaded cuDNN version 90300
E0000 00:00:1759391508.800196     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759391508.981570     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m  1/185[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:54:08[0m 57s/step - accuracy: 0.3125 - loss: 1.9742

I0000 00:00:1759391524.255687     101 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m184/185[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 630ms/step - accuracy: 0.2911 - loss: 1.7644

E0000 00:00:1759391646.472431     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759391646.627149     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 742ms/step - accuracy: 0.2914 - loss: 1.7638

E0000 00:00:1759391702.250282      99 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759391702.411187      99 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.



Epoch 1: val_loss improved from inf to 1.24734, saving model to /kaggle/working/artifacts/best_model.h5
Epoch 1 lr=5.967568e-05
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 1s/step - accuracy: 0.2916 - loss: 1.7632 - val_accuracy: 0.4834 - val_loss: 1.2473 - learning_rate: 5.9676e-05
Epoch 2/5
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 552ms/step - accuracy: 0.3972 - loss: 1.4841
Epoch 2: val_loss improved from 1.24734 to 1.06716, saving model to /kaggle/working/artifacts/best_model.h5
Epoch 2 lr=1.196757e-04
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 682ms/step - accuracy: 0.3973 - loss: 1.4838 - val_accuracy: 0.6161 - val_loss: 1.0672 - learning_rate: 1.1968e-04
Epoch 3/5
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 535ms/step - accuracy: 0.4480 - loss: 1.3570
Epoch 3: val_loss improved from 1.06716 to 0.99791, saving model to /kaggle/working/artifacts/best_model.h5
Epoch 3 lr=1.796757e-04
[

E0000 00:00:1759392347.757411     102 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392347.903404     102 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392348.362715     102 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392348.508890     102 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392349.046409     102 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:0

[1m 88/185[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m57s[0m 588ms/step - accuracy: 0.2822 - loss: 1.9149

E0000 00:00:1759392463.416960     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392463.553117     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392463.856180     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392463.992305     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1759392464.406645     101 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:0

[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 930ms/step - accuracy: 0.3216 - loss: 1.7712
Epoch 6: val_loss did not improve from 0.99791
Epoch 6 lr=5.967568e-05
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m397s[0m 1s/step - accuracy: 0.3220 - loss: 1.7700 - val_accuracy: 0.4566 - val_loss: 1.2567 - learning_rate: 5.9676e-05
Epoch 7/75
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589ms/step - accuracy: 0.5598 - loss: 1.2208
Epoch 7: val_loss improved from 0.99791 to 0.97338, saving model to /kaggle/working/artifacts/best_model.h5
Epoch 7 lr=1.196757e-04
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 725ms/step - accuracy: 0.5600 - loss: 1.2205 - val_accuracy: 0.6240 - val_loss: 0.9734 - learning_rate: 1.1968e-04
Epoch 8/75
[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 595ms/step - accuracy: 0.6564 - loss: 1.0374
Epoch 8: val_loss improved from 0.97338 to 0.68331, saving model to /kaggle/wor

In [2]:
# Duplicate checks: exact (hash) and perceptual (average hash)
import os, hashlib
from pathlib import Path
from collections import defaultdict
import numpy as np
from PIL import Image

# set paths (these variables should exist from your earlier code)
# If you used different variables, update them here:
try:
    all_filepaths, all_labels, classes = load_filepaths_labels(DATA_DIR)
except Exception as e:
    # fallback: find classes automatically if earlier function not loaded
    root = Path(DATA_DIR)
    class_dirs = sorted([d for d in root.iterdir() if d.is_dir()])
    classes = [p.name for p in class_dirs]
    all_filepaths = []
    all_labels = []
    for i, p in enumerate(class_dirs):
        for f in p.glob("*"):
            if f.suffix.lower() in (".jpg",".jpeg",".png",".bmp",".tif",".tiff"):
                all_filepaths.append(str(f))
                all_labels.append(i)

# if your train/test splits are named train_paths/test_paths, use them;
# otherwise recreate a simple stratified split for the check:
from sklearn.model_selection import train_test_split
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    all_filepaths, all_labels, test_size=0.30, random_state=42, stratify=all_labels)
val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels)

print(f"Using sets sizes — train: {len(train_paths)}, val: {len(val_paths)}, test: {len(test_paths)}")

# 1) Exact duplicate check via MD5
def file_md5(path, block_size=2**20):
    h = hashlib.md5()
    with open(path, "rb") as f:
        while True:
            chunk = f.read(block_size)
            if not chunk:
                break
            h.update(chunk)
    return h.hexdigest()

def find_exact_duplicates(setA, setB):
    md5_to_path_A = {}
    for p in setA:
        try:
            md5_to_path_A[file_md5(p)] = p
        except Exception:
            pass
    dups = []
    for p in setB:
        try:
            m = file_md5(p)
            if m in md5_to_path_A:
                dups.append((md5_to_path_A[m], p))
        except Exception:
            pass
    return dups

exact_dups = find_exact_duplicates(train_paths, test_paths)
print("Exact duplicates between train and test:", len(exact_dups))
if exact_dups:
    for a,b in exact_dups[:10]:
        print("  ", a, "<->", b)

# 2) Perceptual duplicates via a simple average-hash (fast)
#    (works reasonably well for detecting resized/contrast-changed same images)
def average_hash(image_path, hash_size=16):
    try:
        img = Image.open(image_path).convert("L").resize((hash_size, hash_size), Image.BILINEAR)
        arr = np.asarray(img, dtype=np.float32)
        avg = arr.mean()
        diff = arr > avg
        # return bitstring as integer tuple
        return tuple(diff.reshape(-1).astype(int))
    except Exception:
        return None

def hamming_distance(h1, h2):
    if h1 is None or h2 is None: return 999
    return sum(a!=b for a,b in zip(h1,h2))

# build hashes for train and test (can take some seconds)
print("Computing perceptual hashes (avg-hash) for train/test...")
train_hashes = {p: average_hash(p) for p in train_paths}
test_hashes  = {p: average_hash(p) for p in test_paths}

# find near-duplicates with small Hamming distance (threshold 10 is conservative for 16x16)
threshold = 10
near_dups = []
for tp, th in train_hashes.items():
    if th is None: continue
    for qp, qh in test_hashes.items():
        if qh is None: continue
        d = hamming_distance(th, qh)
        if d <= threshold:
            near_dups.append((tp, qp, d))
# Sort by distance and show top 10
near_dups = sorted(near_dups, key=lambda x: x[2])
print("Perceptual near-duplicates between train and test (<= threshold):", len(near_dups))
if near_dups:
    for a,b,d in near_dups[:10]:
        print(f"  dist={d}: {a} <-> {b}")


Using sets sizes — train: 2951, val: 633, test: 633
Exact duplicates between train and test: 0
Computing perceptual hashes (avg-hash) for train/test...
Perceptual near-duplicates between train and test (<= threshold): 36512
  dist=0: /kaggle/input/eye-diseases-classification/dataset/cataract/cataract_060.png <-> /kaggle/input/eye-diseases-classification/dataset/cataract/cataract_064.png
  dist=0: /kaggle/input/eye-diseases-classification/dataset/cataract/cataract_060.png <-> /kaggle/input/eye-diseases-classification/dataset/glaucoma/Glaucoma_084.png
  dist=0: /kaggle/input/eye-diseases-classification/dataset/cataract/cataract_060.png <-> /kaggle/input/eye-diseases-classification/dataset/cataract/cataract_024.png
  dist=0: /kaggle/input/eye-diseases-classification/dataset/cataract/cataract_060.png <-> /kaggle/input/eye-diseases-classification/dataset/glaucoma/_387_8614768.jpg
  dist=0: /kaggle/input/eye-diseases-classification/dataset/cataract/cataract_080.png <-> /kaggle/input/eye-dise