In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras import layers, models, optimizers
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, RocCurveDisplay
import matplotlib.pyplot as plt
import matplotlib as mpl

# Base directory dataset (pastikan struktur direktori sesuai dataset Pneumonia)
base_dir = "/kaggle/input/pneumonia"
train_dir = os.path.join(base_dir, "Pneumonia/train")
val_dir = os.path.join(base_dir, "Pneumonia/val")
test_dir = os.path.join(base_dir, "Pneumonia/test")

# Verifikasi direktori
for d in [train_dir, val_dir, test_dir]:
    if not os.path.exists(d):
        raise FileNotFoundError(f"Directory not found: {d}")

# ============================
# Data Preparation & Augmentasi
# ============================
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)
val_gen = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)
test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# ============================
# Class Weights (imbalance)
# ============================
classes = np.unique(train_gen.classes)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=train_gen.classes)
class_weights = dict(enumerate(class_weights))

# ============================
# Model: EfficientNetB0
# ============================
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Bekukan bobot pretrained

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Keluaran sigmoid untuk klasifikasi biner
])

model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# ============================
# Callbacks: EarlyStopping & ReduceLROnPlateau
# ============================
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
]

# ============================
# Training awal dengan Frozen Base
# ============================
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    class_weight=class_weights,
    callbacks=callbacks
)

# ============================
# Fine-Tuning: buka kembali base_model
# ============================
base_model.trainable = True
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)
history_fine = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    class_weight=class_weights,
    callbacks=callbacks
)

# ============================
# Evaluasi pada Test Set
# ============================
test_loss, test_acc = model.evaluate(test_gen, verbose=0)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

y_true = test_gen.classes
y_pred_probs = model.predict(test_gen)
y_pred = (y_pred_probs > 0.5).astype(int).reshape(-1)

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Classification Report
target_names = list(test_gen.class_indices.keys())
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=target_names))

# ROC AUC Score
roc_auc = roc_auc_score(y_true, y_pred_probs)
print(f"AUROC: {roc_auc:.2f}")

# Plot ROC Curve
RocCurveDisplay.from_predictions(y_true, y_pred_probs)
plt.title("ROC Curve")
plt.show()

# Plot Training History
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Acc (phase 1)')
plt.plot(history.history['val_accuracy'], label='Val Acc (phase 1)')
plt.plot(history_fine.history['accuracy'], label='Train Acc (phase 2)')
plt.plot(history_fine.history['val_accuracy'], label='Val Acc (phase 2)')
plt.title("Akurasi Training dan Validasi")
plt.legend()
plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss (phase 1)')
plt.plot(history.history['val_loss'], label='Val Loss (phase 1)')
plt.plot(history_fine.history['loss'], label='Train Loss (phase 2)')
plt.plot(history_fine.history['val_loss'], label='Val Loss (phase 2)')
plt.title("Loss Training dan Validasi")
plt.legend()
plt.show()

# ============================
# Fungsi Grad-CAM
# ============================
def make_gradcam_heatmap(img_array, model, last_conv_layer_name):
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        # untuk klasifikasi biner, kelas positif pada indeks 0
        class_channel = predictions[:, 0]
    # Gradien output kelas terhadap peta fitur terakhir
    grads = tape.gradient(class_channel, conv_outputs)
    # rata-rata gradien untuk setiap channel peta fitur
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    # kalikan setiap saluran peta fitur dengan bobotnya
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    # normalisasi 0..1
    heatmap = tf.maximum(heatmap, 0) / (tf.math.reduce_max(heatmap) + tf.keras.backend.epsilon())
    return heatmap.numpy()

def save_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
    # Load citra asli
    img = load_img(img_path)
    img = img_to_array(img)
    # Rescale heatmap ke 0-255 dan terapkan colormap Jet
    heatmap_uint8 = np.uint8(255 * heatmap)
    jet_colors = mpl.cm.get_cmap("jet")(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap_uint8]
    jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = img_to_array(jet_heatmap)
    # Superimpose heatmap ke citra asli
    superimposed = jet_heatmap * alpha + img
    superimposed = tf.keras.preprocessing.image.array_to_img(superimposed)
    superimposed.save(cam_path)

# ============================
# Hasilkan Grad-CAM Heatmap pada Beberapa Contoh Gambar Test
# ============================
last_conv_layer = "top_conv"  # nama lapisan conv terakhir pada EfficientNetB0
gradcam_dir = "gradcam_outputs"
os.makedirs(gradcam_dir, exist_ok=True)

filenames = test_gen.filenames  # daftar file relatif dalam test_dir
for i, fname in enumerate(filenames[:5]):  # misalnya 5 gambar pertama
    img_path = os.path.join(test_dir, fname)
    img = load_img(img_path, target_size=(224, 224))
    x = img_to_array(img) / 255.0
    x = np.expand_dims(x, axis=0)
    # Hasilkan heatmap Grad-CAM
    heatmap = make_gradcam_heatmap(x, model, last_conv_layer_name=last_conv_layer)
    cam_path = os.path.join(gradcam_dir, f"gradcam_{i}.jpg")
    save_gradcam(img_path, heatmap, cam_path=cam_path, alpha=0.4)
    print(f"Saved Grad-CAM for {fname} as {cam_path}")
