In [2]:
# Cell 0: Import & konfigurasi dasar + cek GPU

import os
import cv2
import glob
import random
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers, models

# --- Konfigurasi path dataset asli (TIDAK pakai cls_dataset) ---
BASE_DIR    = os.getcwd()
PROJECT_DIR = os.path.join(BASE_DIR, "Fraud_Detectio")

TRAIN_DIR = os.path.join(PROJECT_DIR, "train")  # berisi folder "0"
VAL_DIR   = os.path.join(PROJECT_DIR, "val")    # berisi folder 0,90,180,270
TEST_DIR  = os.path.join(PROJECT_DIR, "test")   # berisi folder 0,90,180,270

IMG_HEIGHT = 224
IMG_WIDTH  = 224
BATCH_SIZE = 32

print("BASE_DIR   :", BASE_DIR)
print("PROJECT_DIR:", PROJECT_DIR)
print("TRAIN_DIR  :", TRAIN_DIR)
print("VAL_DIR    :", VAL_DIR)
print("TEST_DIR   :", TEST_DIR)

# Seed biar eksperimen agak konsisten
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)


BASE_DIR   : d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning
PROJECT_DIR: d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning\Fraud_Detectio
TRAIN_DIR  : d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning\Fraud_Detectio\train
VAL_DIR    : d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning\Fraud_Detectio\val
TEST_DIR   : d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning\Fraud_Detectio\test


In [3]:
# Cell 1: Helper tampilan & loader

def show_image(img_bgr, title=None, size=(4, 4)):
    if img_bgr is None:
        print("Image is None")
        return
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=size)
    plt.imshow(img_rgb)
    plt.axis("off")
    if title:
        plt.title(title)
    plt.show()


def load_image_tf(path, label):
    """
    Loader untuk val/test:
    - path: string path
    - label: 0 (fraud) atau 1 (valid)
    """
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
    # tidak dibagi 255 di sini; nanti ada Rescaling(1/255) di model
    img = tf.cast(img, tf.float32)
    return img, tf.reshape(label, (1,))


In [4]:
# Cell 2: Kumpulkan path training base dari train/0

train0_dir = os.path.join(TRAIN_DIR, "0")
train0_paths = []

for ext in ("*.jpg", "*.jpeg", "*.png"):
    train0_paths.extend(glob.glob(os.path.join(train0_dir, ext)))

train0_paths = sorted(train0_paths)

print("Total gambar base train/0 :", len(train0_paths))
if len(train0_paths) > 0:
    print("Contoh:", train0_paths[:3])


Total gambar base train/0 : 4825
Contoh: ['d:\\Coolyeah-Ngoding\\PBL\\Four-Heavenly-Principle\\Machine Learning\\Fraud_Detectio\\train\\0\\103_jpg.rf.095a1043937fa300847e77a2bb26d77c_ktp_0.jpg', 'd:\\Coolyeah-Ngoding\\PBL\\Four-Heavenly-Principle\\Machine Learning\\Fraud_Detectio\\train\\0\\103_jpg.rf.164c8de4bbaf6855d630b1312b08c82b_ktp_0.jpg', 'd:\\Coolyeah-Ngoding\\PBL\\Four-Heavenly-Principle\\Machine Learning\\Fraud_Detectio\\train\\0\\103_jpg.rf.1ebeee8b2d5f896f697f98f27f65a092_ktp_0.jpg']


In [5]:
# Cell 3: Kumpulkan path & label untuk VAL dan TEST

def collect_split_from_root(root_dir):
    """
    Ambil semua gambar dari root_dir/{0,90,180,270}
    0   -> label 1 (VALID)
    90/180/270 -> label 0 (FRAUD)
    """
    paths = []
    labels = []

    def add_angle(angle, label):
        d = os.path.join(root_dir, str(angle))
        if not os.path.isdir(d):
            return
        all_files = []
        for ext in ("*.jpg", "*.jpeg", "*.png"):
            all_files.extend(glob.glob(os.path.join(d, ext)))
        for p in all_files:
            paths.append(p)
            labels.append(label)

    # VALID
    add_angle(0, 1.0)
    # FRAUD (orientasi salah)
    for ang in [90, 180, 270]:
        add_angle(ang, 0.0)

    paths = np.array(paths)
    labels = np.array(labels, dtype=np.float32)

    print(f"Root: {root_dir}")
    print(f"  Total  : {len(paths)}")
    print(f"  VALID  : {(labels == 1).sum()}")
    print(f"  FRAUD  : {(labels == 0).sum()}")
    return paths, labels


paths_val, labels_val   = collect_split_from_root(VAL_DIR)
paths_test, labels_test = collect_split_from_root(TEST_DIR)


Root: d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning\Fraud_Detectio\val
  Total  : 1860
  VALID  : 465
  FRAUD  : 1395
Root: d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning\Fraud_Detectio\test
  Total  : 936
  VALID  : 234
  FRAUD  : 702


In [6]:
# Cell 4: Fungsi augmentasi untuk training generator

def rotate_image(img_bgr, angle):
    (h, w) = img_bgr.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(img_bgr, M, (w, h), borderMode=cv2.BORDER_REPLICATE)
    return rotated

def random_brightness_contrast(img_bgr, brightness=0.2, contrast=0.2):
    img = img_bgr.astype(np.float32) / 255.0
    alpha = 1.0 + random.uniform(-contrast, contrast)
    beta  = random.uniform(-brightness, brightness)
    img = alpha * img + beta
    img = np.clip(img, 0, 1)
    img = (img * 255).astype(np.uint8)
    return img

def add_gaussian_noise(img_bgr, std=10.0):
    noise = np.random.normal(0, std, img_bgr.shape).astype(np.float32)
    noisy = img_bgr.astype(np.float32) + noise
    noisy = np.clip(noisy, 0, 255).astype(np.uint8)
    return noisy

def random_crop_and_resize(img_bgr, crop_factor=(0.5, 0.9)):
    h, w = img_bgr.shape[:2]
    min_cf, max_cf = crop_factor
    cf = random.uniform(min_cf, max_cf)
    new_h, new_w = int(h * cf), int(w * cf)
    if new_h <= 0 or new_w <= 0:
        return cv2.resize(img_bgr, (w, h))
    y = random.randint(0, h - new_h)
    x = random.randint(0, w - new_w)
    cropped = img_bgr[y:y+new_h, x:x+new_w]
    resized = cv2.resize(cropped, (w, h))
    return resized

def add_letterbox_fraud(img_bgr, scale=0.6, bg_color=0):
    h, w = img_bgr.shape[:2]
    nh, nw = int(h*scale), int(w*scale)
    resized = cv2.resize(img_bgr, (nw, nh))
    canvas = np.full_like(img_bgr, bg_color)
    y = (h - nh)//2
    x = (w - nw)//2
    canvas[y:y+nh, x:x+nw] = resized
    return canvas

# --- Tampering (edit isi) ---

def block_nik_area(img_bgr):
    img = img_bgr.copy()
    h, w = img.shape[:2]
    x1, y1 = int(0.05 * w), int(0.12 * h)
    x2, y2 = int(0.95 * w), int(0.25 * h)
    cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 0), thickness=-1)
    return img

def big_text_area_blur(img_bgr):
    img = img_bgr.copy()
    h, w = img.shape[:2]
    x1, y1 = int(0.05 * w), int(0.20 * h)
    x2, y2 = int(0.95 * w), int(0.75 * h)
    roi = img[y1:y2, x1:x2]
    if roi.size > 0:
        roi = cv2.GaussianBlur(roi, (31, 31), 0)
        img[y1:y2, x1:x2] = roi
    return img

def big_white_patch(img_bgr):
    img = img_bgr.copy()
    h, w = img.shape[:2]
    x1, y1 = int(0.05 * w), int(0.20 * h)
    x2, y2 = int(0.95 * w), int(0.75 * h)
    cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 255), thickness=-1)
    return img

def random_text_overlay(img_bgr):
    img = img_bgr.copy()
    h, w = img.shape[:2]
    text = "EDITED"
    org = (int(0.1 * w), int(0.5 * h))
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1.0
    color = (0, 0, 255)
    thickness = 2
    cv2.putText(img, text, org, font, font_scale, color, thickness, cv2.LINE_AA)
    return img

def random_face_blur(img_bgr):
    img = img_bgr.copy()
    h, w = img.shape[:2]
    x1, y1 = int(0.55 * w), int(0.25 * h)
    x2, y2 = int(0.9  * w), int(0.75 * h)
    roi = img[y1:y2, x1:x2]
    if roi.size > 0:
        roi = cv2.GaussianBlur(roi, (31, 31), 0)
        img[y1:y2, x1:x2] = roi
    return img

def strong_letterbox_meme(img_bgr):
    img = img_bgr.copy()
    h, w = img.shape[:2]
    scale = 0.5
    nh, nw = int(h * scale), int(w * scale)
    resized = cv2.resize(img_bgr, (nw, nh))
    canvas = np.ones_like(img_bgr) * 255  # putih
    y = (h - nh)//2
    x = (w - nw)//2
    canvas[y:y+nh, x:x+nw] = resized
    return canvas

# --- Wrapper augmentasi VALID & FRAUD ---

def make_valid_aug(img_bgr):
    img = img_bgr.copy()
    # sedikit jitter tapi tetap "rapih"
    if random.random() < 0.7:
        img = random_brightness_contrast(img, 0.15, 0.15)
    if random.random() < 0.4:
        img = add_gaussian_noise(img, std=5.0)
    if random.random() < 0.3:
        img = random_crop_and_resize(img, crop_factor=(0.9, 0.98))
    return img

def make_fraud_aug(img_bgr):
    """
    Fraud bisa:
    - orientasi salah (rotasi, crop parah, letterbox)
    - tampering 0° (blur teks besar, patch putih, blur wajah, dll.)
    """
    img = img_bgr.copy()
    
    fraud_mode = random.choice(["orientation", "tampering"])

    if fraud_mode == "orientation":
        ftype = random.choice(["rot_90", "rot_180", "rot_270",
                               "tilt_small", "crop", "letterbox"])
        if ftype == "rot_90":
            img = rotate_image(img, 90)
        elif ftype == "rot_180":
            img = rotate_image(img, 180)
        elif ftype == "rot_270":
            img = rotate_image(img, 270)
        elif ftype == "tilt_small":
            angle = random.choice([-30, -20, -15, 15, 20, 30])
            img = rotate_image(img, angle)
        elif ftype == "crop":
            img = random_crop_and_resize(img, crop_factor=(0.4, 0.75))
        elif ftype == "letterbox":
            img = add_letterbox_fraud(img, scale=0.6, bg_color=0)
    else:
        ftype = random.choice([
            "block_nik",
            "big_text_blur",
            "big_white_patch",
            "face_blur",
            "text_overlay",
            "meme_letterbox"
        ])
        if ftype == "block_nik":
            img = block_nik_area(img)
        elif ftype == "big_text_blur":
            img = big_text_area_blur(img)
        elif ftype == "big_white_patch":
            img = big_white_patch(img)
        elif ftype == "face_blur":
            img = random_face_blur(img)
        elif ftype == "text_overlay":
            img = random_text_overlay(img)
        elif ftype == "meme_letterbox":
            img = strong_letterbox_meme(img)

    # Noise/brightness tambahan
    if random.random() < 0.6:
        img = random_brightness_contrast(img, 0.2, 0.2)
    if random.random() < 0.6:
        img = add_gaussian_noise(img, std=8.0)
    
    return img


In [7]:
# Cell 5: Keras Sequence untuk training (augment on-the-fly)

class KTPTrainSequence(tf.keras.utils.Sequence):
    """
    Generator untuk training:
    - Sumber: train0_paths (0°)
    - Setiap batch, kita buat campuran:
        label 1 (VALID) : image + make_valid_aug
        label 0 (FRAUD) : image + make_fraud_aug (rotasi/tampering)
    """
    def __init__(self, img_paths, batch_size=BATCH_SIZE, steps_per_epoch=1000, fraud_ratio=0.5):
        self.img_paths = list(img_paths)
        self.batch_size = batch_size
        self.steps_per_epoch = steps_per_epoch
        self.fraud_ratio = fraud_ratio

    def __len__(self):
        return self.steps_per_epoch

    def __getitem__(self, idx):
        batch_x = np.zeros((self.batch_size, IMG_HEIGHT, IMG_WIDTH, 3), dtype=np.float32)
        batch_y = np.zeros((self.batch_size, 1), dtype=np.float32)

        for i in range(self.batch_size):
            path = random.choice(self.img_paths)
            img = cv2.imread(path)
            if img is None:
                # fallback: kalau gagal baca, pakai gambar random lain
                path2 = random.choice(self.img_paths)
                img = cv2.imread(path2)
                if img is None:
                    continue

            img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))

            # tentukan valid/fraud
            if random.random() < self.fraud_ratio:
                # FRAUD
                img_aug = make_fraud_aug(img)
                label = 0.0
            else:
                # VALID
                img_aug = make_valid_aug(img)
                label = 1.0

            batch_x[i] = img_aug.astype(np.float32)
            batch_y[i, 0] = label

        return batch_x, batch_y


# tentukan steps_per_epoch (boleh disesuaikan)
STEPS_PER_EPOCH = 200  # misal 200 batch per epoch

train_gen = KTPTrainSequence(
    train0_paths,
    batch_size=BATCH_SIZE,
    steps_per_epoch=STEPS_PER_EPOCH,
    fraud_ratio=0.5
)


In [8]:
# Cell 6: Buat tf.data.Dataset untuk val & test (tanpa augment)

def make_dataset(paths, labels, batch_size=BATCH_SIZE, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths), seed=SEED)
    ds = ds.map(load_image_tf, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

val_ds  = make_dataset(paths_val,  labels_val,  shuffle=False)
test_ds = make_dataset(paths_test, labels_test, shuffle=False)

print("val_ds :", val_ds)
print("test_ds:", test_ds)


val_ds : <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))>
test_ds: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))>


In [9]:
# Cell 7: Definisi model CNN (valid vs fraud)

def build_ktp_fraud_cnn(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Rescaling(1./255),

        layers.Conv2D(32, (3,3), activation='relu', padding='same'),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(64, (3,3), activation='relu', padding='same'),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        layers.MaxPooling2D((2,2)),

        layers.Conv2D(256, (3,3), activation='relu', padding='same'),
        layers.MaxPooling2D((2,2)),

        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')  # output = P(VALID)
    ])
    return model

model = build_ktp_fraud_cnn()
model.summary()


In [10]:
# Cell 8: Compile & training

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
)

MODEL_SAVE_DIR = os.path.join(PROJECT_DIR, "saved_models")
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
MODEL_OUT_PATH = os.path.join(MODEL_SAVE_DIR, "ktp_fraud_cnn_tampering_v1.h5")

callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        MODEL_OUT_PATH,
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    )
]

EPOCHS = 20  # bisa dinaikkan kalau belum overfitting

history = model.fit(
    train_gen,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=callbacks
)

print("Model disimpan ke:", MODEL_OUT_PATH)


  self._warn_if_super_not_called()


Epoch 1/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 488ms/step - accuracy: 0.7410 - auc: 0.8194 - loss: 0.4892
Epoch 1: val_loss improved from None to 1.43120, saving model to d:\Coolyeah-Ngoding\PBL\Four-Heavenly-Principle\Machine Learning\Fraud_Detectio\saved_models\ktp_fraud_cnn_tampering_v1.h5




[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 523ms/step - accuracy: 0.8461 - auc: 0.9295 - loss: 0.3404 - val_accuracy: 0.5522 - val_auc: 0.5000 - val_loss: 1.4312
Epoch 2/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step - accuracy: 0.9528 - auc: 0.9900 - loss: 0.1303
Epoch 2: val_loss did not improve from 1.43120
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 400ms/step - accuracy: 0.9577 - auc: 0.9921 - loss: 0.1152 - val_accuracy: 0.4683 - val_auc: 0.5000 - val_loss: 2.8949
Epoch 3/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step - accuracy: 0.9819 - auc: 0.9963 - loss: 0.0687
Epoch 3: val_loss did not improve from 1.43120
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 375ms/step - accuracy: 0.9827 - auc: 0.9968 - loss: 0.0636 - val_accuracy: 0.4726 - val_auc: 0.5000 - val_loss: 3.4256
Epoch 4/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

In [11]:
# Cell 9: Evaluasi di test set

test_loss, test_acc, test_auc = model.evaluate(test_ds)
print(f"Test loss: {test_loss:.4f}")
print(f"Test acc : {test_acc:.4f}")
print(f"Test AUC : {test_auc:.4f}")


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 99ms/step - accuracy: 0.5214 - auc: 0.5000 - loss: 1.3995 
Test loss: 1.3995
Test acc : 0.5214
Test AUC : 0.5000
