In [3]:
!rm -rf /content/preprocessed_aptos

In [4]:
# ==========================
# HÜCRE 1: PREPROCESSED DATASET OLUŞTUR (MULTI-THREAD) - AUTO PATH RESOLVER
# ==========================
import os, glob, warnings, sys
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import cv2
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# --------------------------
# CONFIG
# --------------------------
SEED = 42
TRAIN_FRACTION = 1.00   # debug: 0.20, full: 1.00
VAL_FRACTION   = 1.00

PREP_ROOT = "/content/preprocessed_aptos"
PREP_TRAIN_DIR = os.path.join(PREP_ROOT, "train_images")
PREP_VAL_DIR   = os.path.join(PREP_ROOT, "val_images")
PREP_TEST_DIR  = os.path.join(PREP_ROOT, "test_images")

IMG_EXTS = (".png", ".jpg", ".jpeg")

# OpenCV thread kapat (ThreadPool ile daha stabil)
try:
    cv2.setNumThreads(0)
except:
    pass

np.random.seed(SEED)

# --------------------------
# 0) Drive mount (Colab ise)
# --------------------------
def try_mount_drive():
    if os.path.exists("/content/drive"):
        return
    try:
        from google.colab import drive
        drive.mount("/content/drive")
    except Exception as e:
        print(" Drive mount edilemedi (Colab dışında olabilir):", e)

try_mount_drive()

# Arama kökleri
SEARCH_ROOTS = []
for r in ["/content/drive/MyDrive", "/content/drive", "/content"]:
    if os.path.exists(r):
        SEARCH_ROOTS.append(r)

print("SEARCH_ROOTS:", SEARCH_ROOTS)

# --------------------------
# 1) CSV'leri otomatik
# --------------------------
def find_file_candidates(filename, roots, limit=10):
    hits = []
    for root in roots:
        pat = os.path.join(root, "**", filename)
        hits.extend(glob.glob(pat, recursive=True))

    hits = sorted(list(dict.fromkeys(hits)))
    return hits[:limit]

def pick_best_csv(name, candidates):

    return candidates[0] if candidates else None


expected_csvs = {
    "TRAIN_CSV": ["train_1.csv", "train.csv"],
    "VAL_CSV"  : ["valid.csv", "val.csv", "validation.csv"],
    "TEST_CSV" : ["test.csv"]
}

resolved_csv = {}
all_candidates_debug = {}

for key, names in expected_csvs.items():
    cands = []
    for nm in names:
        c = find_file_candidates(nm, SEARCH_ROOTS, limit=20)
        if c:
            cands.extend(c)
    cands = sorted(list(dict.fromkeys(cands)))
    all_candidates_debug[key] = cands
    resolved_csv[key] = pick_best_csv(key, cands)

print("\n CSV candidates (first 20 each):")
for k, cands in all_candidates_debug.items():
    print(f" - {k}: {len(cands)} found")
    for x in cands[:5]:
        print("    ", x)

TRAIN_CSV = resolved_csv["TRAIN_CSV"]
VAL_CSV   = resolved_csv["VAL_CSV"]
TEST_CSV  = resolved_csv["TEST_CSV"]

if not (TRAIN_CSV and VAL_CSV and TEST_CSV):
    raise FileNotFoundError(
        " CSV bulunamadı.\n"
        " Çözüm: Drive’da dosyaların gerçekten var olduğundan emin ol.\n"
        "   - train_1.csv / valid.csv / test.csv dosyalarını Drive’a yükle\n"
        "   - ya da dosya adların farklıysa burada bulunan aday isimlere göre güncelle.\n"
        "Not: Yukarıda 'CSV candidates' altında bulunan path’lerden biri doğru olmalı."
    )

print("\n RESOLVED CSV PATHS:")
print("TRAIN_CSV:", TRAIN_CSV)
print("VAL_CSV  :", VAL_CSV)
print("TEST_CSV :", TEST_CSV)

# --------------------------
# 2) CSV'leri oku
# --------------------------
df_train = pd.read_csv(TRAIN_CSV)
df_val   = pd.read_csv(VAL_CSV)
df_test  = pd.read_csv(TEST_CSV)

for name, df in [("train", df_train), ("val", df_val), ("test", df_test)]:
    if "id_code" not in df.columns:
        raise ValueError(f" {name} csv içinde 'id_code' kolonu yok!")
print(" CSV read OK. Columns:", df_train.columns.tolist())

# diagnosis varsa string yap
if "diagnosis" in df_train.columns:
    df_train["diagnosis"] = df_train["diagnosis"].astype(str)
if "diagnosis" in df_val.columns:
    df_val["diagnosis"] = df_val["diagnosis"].astype(str)

# --------------------------

def ensure_ext(series, ext):
    s = series.astype(str)
    return s.apply(lambda x: x if x.lower().endswith(IMG_EXTS) else (x + ext))

def find_dir_by_filenames(roots, filenames, max_probe=50):
    probe = [str(x) for x in filenames[:max_probe]]

    for root in roots:
        if not os.path.exists(root):
            continue

        # probe ile birebir eşleşme
        for nm in probe:
            if nm.lower().endswith(IMG_EXTS):
                hits = glob.glob(os.path.join(root, "**", nm), recursive=True)
                if hits:
                    hit = hits[0]
                    return os.path.dirname(hit), os.path.splitext(hit)[1].lower()
            else:
                for ext in IMG_EXTS:
                    hits = glob.glob(os.path.join(root, "**", nm + ext), recursive=True)
                    if hits:
                        hit = hits[0]
                        return os.path.dirname(hit), ext

        # fallback: root altında herhangi bir resim var mı
        any_imgs = []
        for ext in IMG_EXTS:
            any_imgs += glob.glob(os.path.join(root, "**", f"*{ext}"), recursive=True)
        if any_imgs:
            hit = any_imgs[0]
            return os.path.dirname(hit), os.path.splitext(hit)[1].lower()

    return None, None

# id_code örnekleri (ext'siz olabilir)
train_ids = df_train["id_code"].astype(str).tolist()
val_ids   = df_val["id_code"].astype(str).tolist()
test_ids  = df_test["id_code"].astype(str).tolist()

TRAIN_DIR, ext1 = find_dir_by_filenames(SEARCH_ROOTS, train_ids)
VAL_DIR,   ext2 = find_dir_by_filenames(SEARCH_ROOTS, val_ids)
TEST_DIR,  ext3 = find_dir_by_filenames(SEARCH_ROOTS, test_ids)

# APTOS gibi yapılarda val ayrı klasör olmayabilir → train'e düş
if VAL_DIR is None and TRAIN_DIR is not None:
    VAL_DIR, ext2 = TRAIN_DIR, ext1

if TEST_DIR is None and TRAIN_DIR is not None:
    # bazı senaryolarda test de aynı yerde olabilir
    TEST_DIR, ext3 = TRAIN_DIR, ext1

print("\n RESOLVED IMAGE DIRS:")
print("TRAIN_DIR:", TRAIN_DIR, "ext:", ext1)
print("VAL_DIR  :", VAL_DIR,   "ext:", ext2)
print("TEST_DIR :", TEST_DIR,  "ext:", ext3)

if TRAIN_DIR is None:
    raise FileNotFoundError(" TRAIN resimleri bulunamadı. Drive içinde train_images klasörü/PNG'ler yok gibi.")
if TEST_DIR is None:
    raise FileNotFoundError(" TEST resimleri bulunamadı. Drive içinde test images yok gibi.")

IMAGE_EXT = ext1 or ext2 or ext3 or ".png"
print(" Using IMAGE_EXT:", IMAGE_EXT)


df_train["id_code"] = ensure_ext(df_train["id_code"], IMAGE_EXT)
df_val["id_code"]   = ensure_ext(df_val["id_code"],   IMAGE_EXT)
df_test["id_code"]  = ensure_ext(df_test["id_code"],  IMAGE_EXT)

# --------------------------
# 4) % subset (train/val)
# --------------------------
def apply_fraction(df, frac, label_col=None):
    if frac >= 1.0:
        return df.reset_index(drop=True)
    if label_col and (label_col in df.columns):
        try:
            small, _ = train_test_split(
                df,
                test_size=(1.0-frac),
                random_state=SEED,
                stratify=df[label_col]
            )
            return small.reset_index(drop=True)
        except Exception:
            return df.sample(frac=frac, random_state=SEED).reset_index(drop=True)
    return df.sample(frac=frac, random_state=SEED).reset_index(drop=True)

df_train_small = apply_fraction(df_train, TRAIN_FRACTION, label_col="diagnosis" if "diagnosis" in df_train.columns else None)
df_val_small   = apply_fraction(df_val,   VAL_FRACTION,   label_col="diagnosis" if "diagnosis" in df_val.columns else None)

print("\n Using train:", len(df_train_small), "/", len(df_train))
print(" Using val  :", len(df_val_small),   "/", len(df_val))
print(" Using test :", len(df_test),        "/", len(df_test))

# --------------------------
# 5) Preprocess ops
# --------------------------
def crop_black_borders(img_bgr, tol=10):
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    mask = gray > tol
    if mask.sum() == 0:
        return img_bgr
    coords = np.argwhere(mask)
    y0, x0 = coords.min(axis=0)
    y1, x1 = coords.max(axis=0) + 1
    return img_bgr[y0:y1, x0:x1]

def align_by_centroid(img_bgr, tol=10):
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    mask = (gray > tol).astype(np.uint8)
    M = cv2.moments(mask)
    if M["m00"] == 0:
        return img_bgr
    cx = int(M["m10"]/M["m00"]); cy = int(M["m01"]/M["m00"])
    h, w = img_bgr.shape[:2]
    tx = (w//2) - cx; ty = (h//2) - cy
    mat = np.float32([[1,0,tx],[0,1,ty]])
    return cv2.warpAffine(img_bgr, mat, (w,h), flags=cv2.INTER_LINEAR,
                          borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))

def clahe_green(img_bgr, clip=2.0, grid=(8,8)):
    b,g,r = cv2.split(img_bgr)
    clahe = cv2.createCLAHE(clipLimit=clip, tileGridSize=grid)
    g2 = clahe.apply(g)
    return cv2.merge([b,g2,r])

def preprocess_bgr(img_bgr):
    img_bgr = cv2.GaussianBlur(img_bgr, (3,3), 0)
    img_bgr = crop_black_borders(img_bgr, tol=10)
    img_bgr = align_by_centroid(img_bgr, tol=10)
    img_bgr = clahe_green(img_bgr)
    return img_bgr

def build_preprocessed_dataset(df, src_dir, dst_dir, workers=8, out_size=(224,224), overwrite=False):
    os.makedirs(dst_dir, exist_ok=True)
    files = df["id_code"].astype(str).tolist()

    def _one(fname):
        base = os.path.basename(fname)  # güvenlik: path gelirse basename al
        src = os.path.join(src_dir, base)
        dst = os.path.join(dst_dir, base)

        if (not overwrite) and os.path.exists(dst):
            return "skipped"

        img = cv2.imread(src)
        if img is None:
            return "missing"

        img = preprocess_bgr(img)
        img = cv2.resize(img, out_size, interpolation=cv2.INTER_AREA)
        ok = cv2.imwrite(dst, img)
        return "ok" if ok else "failed"

    stats = {"ok":0, "skipped":0, "missing":0, "failed":0}
    with ThreadPoolExecutor(max_workers=workers) as ex:
        futs = [ex.submit(_one, f) for f in files]
        for fu in tqdm(as_completed(futs), total=len(futs), desc=f"Preprocess -> {os.path.basename(dst_dir)}"):
            stats[fu.result()] += 1

    print(f" Done {dst_dir} | {stats}")
    return stats



_ = build_preprocessed_dataset(df_train_small, TRAIN_DIR, PREP_TRAIN_DIR, workers=8, overwrite=False)
_ = build_preprocessed_dataset(df_val_small,   VAL_DIR,   PREP_VAL_DIR,   workers=8, overwrite=False)
_ = build_preprocessed_dataset(df_test,        TEST_DIR,  PREP_TEST_DIR,  workers=8, overwrite=False)

print("\nPREP_ROOT:", PREP_ROOT)
print("Counts:",
      len(glob.glob(PREP_TRAIN_DIR+"/*")),
      len(glob.glob(PREP_VAL_DIR+"/*")),
      len(glob.glob(PREP_TEST_DIR+"/*")))

print("\n Ready: df_train_small, df_val_small, PREP_TRAIN_DIR, PREP_VAL_DIR, PREP_TEST_DIR")


SEARCH_ROOTS: ['/content/drive/MyDrive', '/content/drive', '/content']

 CSV candidates (first 20 each):
 - TRAIN_CSV: 1 found
     /content/drive/MyDrive/Image Processing Project/train_1.csv
 - VAL_CSV: 1 found
     /content/drive/MyDrive/Image Processing Project/valid.csv
 - TEST_CSV: 1 found
     /content/drive/MyDrive/Image Processing Project/test.csv

 RESOLVED CSV PATHS:
TRAIN_CSV: /content/drive/MyDrive/Image Processing Project/train_1.csv
VAL_CSV  : /content/drive/MyDrive/Image Processing Project/valid.csv
TEST_CSV : /content/drive/MyDrive/Image Processing Project/test.csv
 CSV read OK. Columns: ['id_code', 'diagnosis']

 RESOLVED IMAGE DIRS:
TRAIN_DIR: /content/drive/MyDrive/Image Processing Project/train_images/train_images ext: .png
VAL_DIR  : /content/drive/MyDrive/Image Processing Project/val_images/val_images ext: .png
TEST_DIR : /content/drive/MyDrive/Image Processing Project/test_images/test_images ext: .png
 Using IMAGE_EXT: .png

 Using train: 2930 / 2930
 Using val  

Preprocess -> train_images: 100%|██████████| 2930/2930 [10:13<00:00,  4.77it/s]


 Done /content/preprocessed_aptos/train_images | {'ok': 2930, 'skipped': 0, 'missing': 0, 'failed': 0}


Preprocess -> val_images: 100%|██████████| 366/366 [01:20<00:00,  4.57it/s]


 Done /content/preprocessed_aptos/val_images | {'ok': 366, 'skipped': 0, 'missing': 0, 'failed': 0}


Preprocess -> test_images: 100%|██████████| 366/366 [01:08<00:00,  5.33it/s]

 Done /content/preprocessed_aptos/test_images | {'ok': 366, 'skipped': 0, 'missing': 0, 'failed': 0}

PREP_ROOT: /content/preprocessed_aptos
Counts: 2930 366 366

 Ready: df_train_small, df_val_small, PREP_TRAIN_DIR, PREP_VAL_DIR, PREP_TEST_DIR





In [5]:
print("df_train_small" in globals())
print("df_val_small" in globals())
print("PREP_TRAIN_DIR" in globals())
print("PREP_VAL_DIR" in globals())

True
True
True
True


In [6]:
# ==========================
# HÜCRE 2 (GECE ÇALIŞMAYA UYGUN) - TRAINING (İSTEDİĞİN ÇIKTILARLA)
# - Checkpoint'ler DRIVE'a yazılır (best_model.keras + her epoch weights)
# - Her epoch: progress satırı + QWK + Confusion Matrix (print + Drive'a kaydet)
# - preprocess_input
# - class mapping fix
# - EarlyStopping + "Restoring model weights..." çıktısı
# - FINAL: QWK + prediction distribution + confusion matrix
# ==========================

import os, re, glob, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import cohen_kappa_score, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print("TF:", tf.__version__)
print("GPU:", tf.config.list_physical_devices("GPU"))

# --------------------------
# 0) Drive mount (Colab)
# --------------------------
def try_mount_drive():
    if os.path.exists("/content/drive"):
        return
    try:
        from google.colab import drive
        drive.mount("/content/drive")
    except Exception as e:
        print(" Drive mount edilemedi:", e)

try_mount_drive()

# --------------------------
# 1) Bu hücrenin beklediği değişkenler (Hücre 1’den gelir)
# --------------------------
needed = ["df_train_small", "df_val_small", "PREP_TRAIN_DIR", "PREP_VAL_DIR"]
missing = [n for n in needed if n not in globals()]
if missing:
    raise RuntimeError(f" Hücre 1 çalışmadan bu hücre çalışmaz. Eksikler: {missing}")

# --------------------------
# 2) Ayarlar
# --------------------------
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

IMG_SIZE   = (224, 224)
BATCH_SIZE = 32

WARMUP_EPOCHS = 3
FT_EPOCHS     = 10
FT_LR         = 1e-4
WARMUP_LR     = 1e-3

NUM_CLASSES = 5
CLASS_NAMES = [str(i) for i in range(NUM_CLASSES)]
CLASS_TO_IDX = {c:i for i,c in enumerate(CLASS_NAMES)}

# --------------------------
# 3) Checkpoint & Log klasörleri
# --------------------------
BASE_DRIVE = "/content/drive/MyDrive/Image Processing Project"
CKPT_DIR   = os.path.join(BASE_DRIVE, "checkpoints")
LOG_DIR    = os.path.join(BASE_DRIVE, "logs_night")
CM_DIR     = os.path.join(BASE_DRIVE, "confusion_matrices_night")

os.makedirs(CKPT_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(CM_DIR, exist_ok=True)

BEST_MODEL_PATH = os.path.join(CKPT_DIR, "best_model.keras")  #
EPOCH_WEIGHTS_PATH = os.path.join(CKPT_DIR, "epoch_{epoch:03d}_valloss_{val_loss:.5f}.weights.h5")

print(" CKPT_DIR:", CKPT_DIR)
print(" BEST_MODEL:", BEST_MODEL_PATH)
print(" CM_DIR:", CM_DIR)

# --------------------------
# 4) Class mapping fix + path hazırlama
# --------------------------
def fix_label_series(s):
    s = s.astype(str).str.strip()
    s = s.str.replace(r"\.0$", "", regex=True)     # "2.0" -> "2"
    s = s.where(s.isin(CLASS_NAMES), other=np.nan) # sadece 0..4 kalsın
    return s

df_train_small = df_train_small.copy()
df_val_small   = df_val_small.copy()

if "diagnosis" not in df_train_small.columns or "diagnosis" not in df_val_small.columns:
    raise ValueError(" df_train_small / df_val_small içinde 'diagnosis' yok!")

df_train_small["diagnosis_fixed"] = fix_label_series(df_train_small["diagnosis"])
df_val_small["diagnosis_fixed"]   = fix_label_series(df_val_small["diagnosis"])

before_tr, before_va = len(df_train_small), len(df_val_small)
df_train_small = df_train_small.dropna(subset=["diagnosis_fixed"]).reset_index(drop=True)
df_val_small   = df_val_small.dropna(subset=["diagnosis_fixed"]).reset_index(drop=True)
print(f" Train label cleaned: {before_tr} -> {len(df_train_small)}")
print(f" Val label cleaned  : {before_va} -> {len(df_val_small)}")

df_train_small["label"] = df_train_small["diagnosis_fixed"].map(CLASS_TO_IDX).astype(int)
df_val_small["label"]   = df_val_small["diagnosis_fixed"].map(CLASS_TO_IDX).astype(int)

def make_paths(df, img_dir):
    names = df["id_code"].astype(str).apply(lambda x: os.path.basename(x)).tolist()
    return [os.path.join(img_dir, n) for n in names]

train_paths = make_paths(df_train_small, PREP_TRAIN_DIR)
val_paths   = make_paths(df_val_small,   PREP_VAL_DIR)
train_labels = df_train_small["label"].values.astype(np.int32)
val_labels   = df_val_small["label"].values.astype(np.int32)

def filter_existing(paths, labels):
    keep_p, keep_y = [], []
    missing = 0
    for p,y in zip(paths, labels):
        if os.path.exists(p):
            keep_p.append(p); keep_y.append(y)
        else:
            missing += 1
    return keep_p, np.array(keep_y, dtype=np.int32), missing

train_paths, train_labels, miss_tr = filter_existing(train_paths, train_labels)
val_paths,   val_labels,   miss_va = filter_existing(val_paths,   val_labels)

print(" Train files:", len(train_paths), "| missing:", miss_tr)
print(" Val files  :", len(val_paths),   "| missing:", miss_va)
if len(train_paths) == 0 or len(val_paths) == 0:
    raise RuntimeError(" Train/Val path boş. PREP_TRAIN_DIR / PREP_VAL_DIR içinde dosya yok gibi.")

# --------------------------
# 5) tf.data pipeline (preprocess_input + one-hot labels)
# --------------------------
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input

AUTOTUNE = tf.data.AUTOTUNE

def decode_and_preprocess(path, label, training=False):
    img_bytes = tf.io.read_file(path)
    img = tf.image.decode_image(img_bytes, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE, method="bilinear")
    img = tf.cast(img, tf.float32)

    if training:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_brightness(img, 0.05)

    img = preprocess_input(img)  # preprocess_input
    y = tf.one_hot(label, NUM_CLASSES)  #  AUC + top2_acc için
    return img, y

def make_ds(paths, labels, training=False):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if training:
        ds = ds.shuffle(min(2000, len(paths)), seed=SEED, reshuffle_each_iteration=True)
    ds = ds.map(lambda p,y: decode_and_preprocess(p,y,training=training), num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds

train_ds = make_ds(train_paths, train_labels, training=True)
val_ds   = make_ds(val_paths,   val_labels,   training=False)

# --------------------------
# 6) Model
# --------------------------
def build_model():
    base = EfficientNetB0(include_top=False, weights="imagenet", input_shape=(*IMG_SIZE, 3))
    base.trainable = False  # warmup: frozen

    inp = keras.Input(shape=(*IMG_SIZE, 3))
    x = base(inp, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(NUM_CLASSES, activation="softmax")(x)
    model = keras.Model(inp, out)
    return model, base

model, base = build_model()

metrics = [
    keras.metrics.CategoricalAccuracy(name="acc"),
    keras.metrics.AUC(name="auc", multi_label=True, num_labels=NUM_CLASSES),
    keras.metrics.TopKCategoricalAccuracy(k=2, name="top2_acc"),
]

model.compile(
    optimizer=keras.optimizers.Adam(WARMUP_LR),
    loss="categorical_crossentropy",
    metrics=metrics
)

# --------------------------
# 7) Resume (varsa en son epoch weights’ten devam)
# --------------------------
def get_latest_epoch_ckpt(ckpt_dir):
    files = glob.glob(os.path.join(ckpt_dir, "epoch_*.weights.h5"))
    if not files:
        return None, 0
    def epoch_num(f):
        m = re.search(r"epoch_(\d+)_", os.path.basename(f))
        return int(m.group(1)) if m else -1
    files = sorted(files, key=epoch_num)
    latest = files[-1]
    return latest, epoch_num(latest)

latest_ckpt, last_epoch = get_latest_epoch_ckpt(CKPT_DIR)
initial_epoch = 0
if latest_ckpt:
    print(f" Resume: {latest_ckpt} (epoch={last_epoch})")
    model.load_weights(latest_ckpt)
    initial_epoch = last_epoch
else:
    print(" Resume checkpoint yok, sıfırdan başlanıyor.")

# --------------------------
# 8) Callback: Epoch progress + QWK + Confusion Matrix (print + kaydet)
# --------------------------
class EpochProgressQWKCM(keras.callbacks.Callback):
    def __init__(self, val_ds, class_names, cm_dir, total_epochs):
        super().__init__()
        self.val_ds = val_ds
        self.class_names = class_names
        self.cm_dir = cm_dir
        self.total_epochs = total_epochs

    def on_epoch_begin(self, epoch, logs=None):
        # epoch index 0-based
        pct = (epoch / max(1, self.total_epochs)) * 100.0
        print(f"\n Epoch progress: {epoch+1}/{self.total_epochs} ({pct:.1f}%)")

    def on_epoch_end(self, epoch, logs=None):
        # y_true / y_pred
        y_true = []
        y_pred = []

        for xb, yb_onehot in self.val_ds:
            pr = self.model.predict(xb, verbose=0)
            y_true.extend(np.argmax(yb_onehot.numpy(), axis=1).tolist())
            y_pred.extend(np.argmax(pr, axis=1).tolist())

        qwk = cohen_kappa_score(y_true, y_pred, weights="quadratic")
        if logs is not None:
            logs["val_qwk"] = qwk

        print(f"✅ QWK (val): {qwk:.4f}")

        cm = confusion_matrix(y_true, y_pred, labels=list(range(len(self.class_names))))
        print("Confusion Matrix:\n", cm)

        # Kaydet (png + csv)
        out_png = os.path.join(self.cm_dir, f"cm_epoch_{epoch+1:03d}.png")
        out_csv = os.path.join(self.cm_dir, f"cm_epoch_{epoch+1:03d}.csv")

        # png
        fig = plt.figure(figsize=(6,5))
        plt.imshow(cm, interpolation="nearest")
        plt.title(f"Confusion Matrix (epoch {epoch+1})")
        plt.colorbar()
        ticks = np.arange(len(self.class_names))
        plt.xticks(ticks, self.class_names)
        plt.yticks(ticks, self.class_names)
        thresh = cm.max()/2.0 if cm.max() > 0 else 0.5
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                plt.text(j, i, int(cm[i,j]),
                         ha="center", va="center",
                         color="white" if cm[i,j] > thresh else "black")
        plt.ylabel("True")
        plt.xlabel("Pred")
        plt.tight_layout()
        fig.savefig(out_png, dpi=150)
        plt.close(fig)

        # csv
        pd.DataFrame(cm, index=self.class_names, columns=self.class_names).to_csv(out_csv)

# --------------------------
# 9) Callbacks: best_model.keras + epoch weights + early stop + LR
# --------------------------
cb_best = keras.callbacks.ModelCheckpoint(
    filepath=BEST_MODEL_PATH,
    monitor="val_loss",
    mode="min",
    save_best_only=True,
    save_weights_only=False,   #  best_model.keras
    verbose=1
)

cb_epoch = keras.callbacks.ModelCheckpoint(
    filepath=EPOCH_WEIGHTS_PATH,
    monitor="val_loss",
    mode="min",
    save_best_only=False,
    save_weights_only=True,    #  her epoch weights
    verbose=1
)

cb_csv = keras.callbacks.CSVLogger(os.path.join(LOG_DIR, "history.csv"), append=True)

cb_rlr = keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=2,
    min_lr=1e-6,
    verbose=1
)

cb_es = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True,  #  "Restoring model weights..." çıktısı
    verbose=1
)

# WARMUP + FINETUNE için ayrı total epoch hesapları (progress satırı için)
cb_qwk_cm_warm = EpochProgressQWKCM(val_ds, CLASS_NAMES, CM_DIR, total_epochs=WARMUP_EPOCHS)
cb_qwk_cm_ft   = EpochProgressQWKCM(val_ds, CLASS_NAMES, CM_DIR, total_epochs=FT_EPOCHS)

# --------------------------
# 10) TRAIN: WARMUP
# --------------------------
print("\n=== WARMUP (base frozen) ===")

hist_warm = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=WARMUP_EPOCHS,
    initial_epoch=min(initial_epoch, WARMUP_EPOCHS),
    callbacks=[cb_qwk_cm_warm, cb_best, cb_epoch, cb_csv, cb_rlr, cb_es],
    verbose=1
)

# --------------------------
# 11) TRAIN: FINE-TUNE
# --------------------------
print("\n=== FINE-TUNE (base trainable=True) ===")

# base trainable
base.trainable = True
model.compile(
    optimizer=keras.optimizers.Adam(FT_LR),
    loss="categorical_crossentropy",
    metrics=metrics
)

# fine-tune için early stopping'i yeniden başlatmak için yeni callback oluşturulur
cb_es_ft = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True,
    verbose=1
)

hist_ft = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=FT_EPOCHS,
    initial_epoch=0,  # fine-tune kendi içinde 1/10 şeklinde yazsın
    callbacks=[cb_qwk_cm_ft, cb_best, cb_epoch, cb_csv, cb_rlr, cb_es_ft],
    verbose=1
)

# --------------------------
# 12) FINAL: QWK + distribution + confusion matrix
# --------------------------
y_true, y_pred = [], []
for xb, yb_onehot in val_ds:
    pr = model.predict(xb, verbose=0)
    y_true.extend(np.argmax(yb_onehot.numpy(), axis=1).tolist())
    y_pred.extend(np.argmax(pr, axis=1).tolist())

final_qwk = cohen_kappa_score(y_true, y_pred, weights="quadratic")
dist = np.bincount(np.array(y_pred), minlength=NUM_CLASSES)
cm = confusion_matrix(y_true, y_pred, labels=list(range(NUM_CLASSES)))

print("\nFINAL QWK:", round(final_qwk, 4))
print("Prediction distribution:", dist)
print("Confusion Matrix:\n", cm)

# final cm kaydet
pd.DataFrame(cm, index=CLASS_NAMES, columns=CLASS_NAMES).to_csv(os.path.join(CM_DIR, "cm_FINAL.csv"))
print("\n Best model path:", BEST_MODEL_PATH)
print(" History CSV:", os.path.join(LOG_DIR, "history.csv"))
print(" Confusion matrices folder:", CM_DIR)


TF: 2.19.0
GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
 CKPT_DIR: /content/drive/MyDrive/Image Processing Project/checkpoints
 BEST_MODEL: /content/drive/MyDrive/Image Processing Project/checkpoints/best_model.keras
 CM_DIR: /content/drive/MyDrive/Image Processing Project/confusion_matrices_night
 Train label cleaned: 2930 -> 2930
 Val label cleaned  : 366 -> 366
 Train files: 2930 | missing: 0
 Val files  : 366 | missing: 0
 Resume: /content/drive/MyDrive/Image Processing Project/checkpoints/epoch_003_valloss_0.64533.weights.h5 (epoch=3)

=== WARMUP (base frozen) ===

=== FINE-TUNE (base trainable=True) ===

 Epoch progress: 1/10 (0.0%)
Epoch 1/10
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 655ms/step - acc: 0.5568 - auc: 0.7925 - loss: 1.0896 - top2_acc: 0.7076✅ QWK (val): 0.8556
Confusion Matrix:
 [[171   1   0   0   0]
 [  1  25  12   0   2]
 [  5  11  72   7   9]
 [  0   2  11   1   8]
 [  0   1   9   1  17]]

Epoch 1: val_loss impro