In [1]:
# =========================================
# [0] IMPORTS & GLOBAL CONFIG
# =========================================
import os, json, random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

import optuna
from optuna.pruners import MedianPruner

from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report

SEED = 42
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()
os.environ["PYTHONHASHSEED"] = str(SEED)

EPOCHS = 50
NUM_CLASSES = 4
CLASS_NAMES = ["defect", "longberry", "peaberry", "premium"]

# =========================================
# [1] FIND PREPROCESSED ARTIFACTS
# =========================================
CANDIDATE_PATHS = [
    Path("/kaggle/input/coffe-bean-classification-preprocessing/artifacts_preprocess"),
    Path("/kaggle/input/coffee-bean-classification-preprocessing/artifacts_preprocess"),
    Path("/kaggle/input/coffe-bean-classification-preprocessing"),
]
ART_DIR = None
for base in CANDIDATE_PATHS:
    if base.exists():
        possible_dirs = [base] if (base / "split_train.csv").exists() else list(base.rglob("artifacts_preprocess"))
        for p in possible_dirs:
            if (p / "split_train.csv").exists() and (p / "split_val.csv").exists():
                ART_DIR = p
                break
    if ART_DIR: break

if ART_DIR is None:
    input_dir = Path("/kaggle/input")
    for dataset_dir in input_dir.iterdir():
        if dataset_dir.is_dir():
            for p in dataset_dir.rglob("artifacts_preprocess"):
                if (p / "split_train.csv").exists() and (p / "split_val.csv").exists():
                    ART_DIR = p
                    break
        if ART_DIR: break

if ART_DIR is None:
    raise FileNotFoundError("Tidak menemukan artifacts_preprocess")

print("✅ ART_DIR:", ART_DIR)

# =========================================
# [2] LOAD DATAFRAMES
# =========================================
train_df = pd.read_csv(ART_DIR / "split_train.csv")
val_df   = pd.read_csv(ART_DIR / "split_val.csv")

# =========================================
# [3] TF.DATA PIPELINE
# =========================================
AUTOTUNE = tf.data.AUTOTUNE

def decode_and_resize(image_path, label, target_size):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, target_size, method="bilinear")
    img = tf.cast(img, tf.float32)
    return img, label

def create_dataset(df, target_size, training=True, batch_size=32):
    paths = df["filepath"].values
    labels = df["class_name"].map({c:i for i,c in enumerate(CLASS_NAMES)}).values.astype(np.int32)
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if training:
        ds = ds.shuffle(buffer_size=len(df), seed=SEED, reshuffle_each_iteration=True)
    ds = ds.map(lambda p,l: decode_and_resize(p,l,target_size), num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds

# =========================================
# [4] MODEL BUILDER: EfficientNet (TUNABLE)
# =========================================
from tensorflow.keras import layers, models, regularizers

EFFNET_IMG_SIZES = {"B0": (224,224), "B1": (240,240), "B2": (260,260), "B3": (300,300)}
EFFNET_FNS = {
    "B0": tf.keras.applications.EfficientNetB0,
    "B1": tf.keras.applications.EfficientNetB1,
    "B2": tf.keras.applications.EfficientNetB2,
    "B3": tf.keras.applications.EfficientNetB3,
}

def build_effnet_tunable(
    variant="B0",
    dense_units=0,
    dropout=0.2,
    l2=0.0,
    freeze_backbone=True,
    fine_tune_at=None
):
    img_size = EFFNET_IMG_SIZES[variant]
    base = EFFNET_FNS[variant](
        include_top=False,
        weights="imagenet",
        input_shape=(*img_size, 3)
    )

    if freeze_backbone:
        base.trainable = False
    else:
        base.trainable = True
        if fine_tune_at is not None:
            for layer in base.layers[:fine_tune_at]:
                layer.trainable = False

    inputs = layers.Input(shape=(*img_size, 3))
    x = tf.keras.applications.efficientnet.preprocess_input(inputs)
    x = base(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)

    if dense_units and dense_units > 0:
        x = layers.Dense(
            dense_units,
            activation="relu",
            kernel_regularizer=regularizers.l2(l2) if l2 > 0 else None
        )(x)

    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)
    return models.Model(inputs, outputs, name=f"EfficientNet{variant}_Tuned")

# =========================================
# [5] OPTUNA OBJECTIVE
# =========================================
def objective(trial: optuna.Trial):
    variant = trial.suggest_categorical("variant", ["B0","B1","B2","B3"])
    img_size = EFFNET_IMG_SIZES[variant]

    batch_size = trial.suggest_categorical("batch_size", [16, 32, 48, 64])
    lr = trial.suggest_float("lr", 1e-5, 3e-3, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    dense_units = trial.suggest_categorical("dense_units", [0, 128, 256, 512])
    l2 = trial.suggest_float("l2", 1e-7, 1e-3, log=True)
    label_smoothing = trial.suggest_float("label_smoothing", 0.0, 0.15)

    freeze_backbone = trial.suggest_categorical("freeze_backbone", [True, False])
    fine_tune_at = None
    if not freeze_backbone:
        # layer count tiap variant beda; pilih opsi aman
        fine_tune_at = trial.suggest_categorical("fine_tune_at", [20, 50, 80, 120])

    ds_train = create_dataset(train_df, target_size=img_size, training=True, batch_size=batch_size)
    ds_val   = create_dataset(val_df,   target_size=img_size, training=False, batch_size=batch_size)

    model = build_effnet_tunable(
        variant=variant,
        dense_units=dense_units,
        dropout=dropout,
        l2=l2,
        freeze_backbone=freeze_backbone,
        fine_tune_at=fine_tune_at
    )

    opt_name = trial.suggest_categorical("optimizer", ["adam", "adamw"])
    if opt_name == "adamw":
        try:
            opt = tf.keras.optimizers.AdamW(
                learning_rate=lr,
                weight_decay=trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)
            )
        except Exception:
            opt = tf.keras.optimizers.Adam(learning_rate=lr)
    else:
        opt = tf.keras.optimizers.Adam(learning_rate=lr)

    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
    model.compile(optimizer=opt, loss=loss_fn, metrics=["accuracy"])

    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True, verbose=0),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-7, verbose=0),
        optuna.integration.TFKerasPruningCallback(trial, monitor="val_accuracy"),
    ]

    history = model.fit(ds_train, validation_data=ds_val, epochs=EPOCHS, callbacks=callbacks, verbose=0)
    return float(np.max(history.history["val_accuracy"]))

# =========================================
# [6] RUN STUDY
# =========================================
study = optuna.create_study(direction="maximize", pruner=MedianPruner(n_startup_trials=5))
study.optimize(objective, n_trials=30, gc_after_trial=True)  # sesuaikan

print("Best value:", study.best_value)
print("Best params:", study.best_params)

OUTDIR = Path("/kaggle/working/optuna_effnet_b0_b3")
OUTDIR.mkdir(parents=True, exist_ok=True)
pd.DataFrame(study.trials_dataframe()).to_csv(OUTDIR/"trials.csv", index=False)
with open(OUTDIR/"best_params.json", "w") as f:
    json.dump(study.best_params, f, indent=2)

print("✅ Saved to:", OUTDIR)

# =========================================
# [7] RETRAIN BEST + SAVE MODEL
# =========================================
best = study.best_params
variant = best["variant"]
img_size = EFFNET_IMG_SIZES[variant]
bs = best["batch_size"]

ds_train = create_dataset(train_df, target_size=img_size, training=True, batch_size=bs)
ds_val   = create_dataset(val_df,   target_size=img_size, training=False, batch_size=bs)

model = build_effnet_tunable(
    variant=variant,
    dense_units=best["dense_units"],
    dropout=best["dropout"],
    l2=best["l2"],
    freeze_backbone=best["freeze_backbone"],
    fine_tune_at=best.get("fine_tune_at", None)
)

if best["optimizer"] == "adamw":
    try:
        opt = tf.keras.optimizers.AdamW(learning_rate=best["lr"], weight_decay=best.get("weight_decay", 1e-5))
    except Exception:
        opt = tf.keras.optimizers.Adam(learning_rate=best["lr"])
else:
    opt = tf.keras.optimizers.Adam(learning_rate=best["lr"])

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
model.compile(optimizer=opt, loss=loss_fn, metrics=["accuracy"])

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True, verbose=1),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-7, verbose=1),
]

history = model.fit(ds_train, validation_data=ds_val, epochs=EPOCHS, callbacks=callbacks, verbose=1)

model.save(OUTDIR/"best_model.keras")
print("✅ Saved:", OUTDIR/"best_model.keras")

2026-01-08 02:44:57.985947: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767840298.179243      24 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767840298.230268      24 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767840298.655510      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767840298.655551      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767840298.655554      24 computation_placer.cc:177] computation placer alr

✅ ART_DIR: /kaggle/input/coffe-bean-classification-preprocessing/artifacts_preprocess


I0000 00:00:1767840313.262876      24 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb2_notop.h5
[1m31790344/31790344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


E0000 00:00:1767840353.075998      24 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/EfficientNetB2_Tuned_1/efficientnetb2_1/block1b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
I0000 00:00:1767840358.372765      63 cuda_dnn.cc:529] Loaded cuDNN version 91002
[32m[I 2026-01-08 02:50:37,684][0m Trial 0 finished with value: 0.9839572310447693 and parameters: {'variant': 'B2', 'batch_size': 48, 'lr': 0.00032357184597318195, 'dropout': 0.2269541167803069, 'dense_units': 512, 'l2': 1.319976086082518e-05, 'label_smoothing': 0.10534375574896877, 'freeze_backbone': False, 'fine_tune_at': 120, 'optimizer': 'adamw', 'weight_decay': 2.6462112902335828e-05}. Best is trial 0 with value: 0.9839572310447693.[0m


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb1_notop.h5
[1m27018416/27018416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


E0000 00:00:1767840651.454258      24 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/EfficientNetB1_Tuned_1/efficientnetb1_1/block1b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
[32m[I 2026-01-08 02:52:25,734][0m Trial 1 finished with value: 0.9358288645744324 and parameters: {'variant': 'B1', 'batch_size': 64, 'lr': 0.0006699764747710552, 'dropout': 0.13744983203382383, 'dense_units': 128, 'l2': 4.468618412331749e-06, 'label_smoothing': 0.030769713149501936, 'freeze_backbone': True, 'optimizer': 'adam'}. Best is trial 0 with value: 0.9839572310447693.[0m


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
[1m43941136/43941136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


E0000 00:00:1767840760.778870      24 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/EfficientNetB3_Tuned_1/efficientnetb3_1/block1b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
[32m[I 2026-01-08 02:57:00,029][0m Trial 2 finished with value: 0.9625668525695801 and parameters: {'variant': 'B3', 'batch_size': 16, 'lr': 0.000338910009543314, 'dropout': 0.4504831731973134, 'dense_units': 512, 'l2': 0.0007082069840015407, 'label_smoothing': 0.12679685759946216, 'freeze_backbone': True, 'optimizer': 'adamw', 'weight_decay': 4.46237643673059e-05}. Best is trial 0 with value: 0.9839572310447693.[0m


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


E0000 00:00:1767841029.653750      24 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/EfficientNetB0_Tuned_1/efficientnetb0_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
[32m[I 2026-01-08 02:58:31,287][0m Trial 3 finished with value: 0.9358288645744324 and parameters: {'variant': 'B0', 'batch_size': 32, 'lr': 0.00023630038867736522, 'dropout': 0.03782242911029554, 'dense_units': 0, 'l2': 7.530903660381047e-07, 'label_smoothing': 0.13553258490939663, 'freeze_backbone': True, 'optimizer': 'adam'}. Best is trial 0 with value: 0.9839572310447693.[0m
E0000 00:00:1767841148.508367      24 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/EfficientNetB2_Tuned_1/efficientnetb2_1/block1b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer

Best value: 1.0
Best params: {'variant': 'B3', 'batch_size': 16, 'lr': 0.0008650221856095975, 'dropout': 0.4373578704553349, 'dense_units': 0, 'l2': 0.0001371278630792193, 'label_smoothing': 0.029227182207990365, 'freeze_backbone': False, 'fine_tune_at': 120, 'optimizer': 'adamw', 'weight_decay': 0.00015096859040107197}
✅ Saved to: /kaggle/working/optuna_effnet_b0_b3
Epoch 1/50


E0000 00:00:1767844815.786065      24 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/EfficientNetB3_Tuned_1/efficientnetb3_1/block1b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 312ms/step - accuracy: 0.6665 - loss: 0.8143 - val_accuracy: 0.9198 - val_loss: 0.2338 - learning_rate: 8.6502e-04
Epoch 2/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 227ms/step - accuracy: 0.9116 - loss: 0.2800 - val_accuracy: 0.8663 - val_loss: 0.9702 - learning_rate: 8.6502e-04
Epoch 3/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 227ms/step - accuracy: 0.9365 - loss: 0.1940 - val_accuracy: 0.8610 - val_loss: 0.4517 - learning_rate: 8.6502e-04
Epoch 4/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 229ms/step - accuracy: 0.9479 - loss: 0.1505 - val_accuracy: 0.9626 - val_loss: 0.0918 - learning_rate: 8.6502e-04
Epoch 5/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 227ms/step - accuracy: 0.9850 - loss: 0.0505 - val_accuracy: 0.9679 - val_loss: 0.1416 - learning_rate: 8.6502e-04
Epoch 6/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[