In [None]:
# List all files in the dataset
!ls /kaggle/input


In [None]:
import os

# Root path of the dataset
DATASET_ROOT = "/kaggle/input/recodai-luc-scientific-image-forgery-detection/"

# List all folders/files in the dataset root
print(os.listdir(DATASET_ROOT))


In [None]:
def show_image_with_mask(image_file, mask_file):
    # Load the image
    img = cv2.imread(image_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Load the mask (.npy) and remove any singleton dimensions
    mask = np.load(mask_file)
    mask = np.squeeze(mask)  # <-- this fixes the shape
    
    plt.figure(figsize=(12,6))
    
    plt.subplot(1,2,1)
    plt.imshow(img)
    plt.title("Original Image")
    plt.axis('off')
    
    plt.subplot(1,2,2)
    plt.imshow(img)
    plt.imshow(mask, cmap='jet', alpha=0.5)  # overlay mask
    plt.title("Image with Mask Overlay")
    plt.axis('off')
    
    plt.show()

# Show 3 random images with masks
for _ in range(3):
    img_file = random.choice(image_files)
    mask_file = random.choice(mask_files)  # random mask
    show_image_with_mask(img_file, mask_file)


In [None]:
# ==========================
# 1️⃣ Imports
# ==========================
import os
import cv2
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt

# ==========================
# 2️⃣ Paths
# ==========================
BASE_PATH = "/kaggle/input/recodai-luc-scientific-image-forgery-detection"
TRAIN_PATH = os.path.join(BASE_PATH, "train_images")
MASK_PATH = os.path.join(BASE_PATH, "train_masks")

# ==========================
# 3️⃣ Load Image-Mask Pairs
# ==========================
image_files = []
mask_files = []

for label in ["authentic", "forged"]:
    folder = os.path.join(TRAIN_PATH, label)
    if os.path.exists(folder):
        for img_name in os.listdir(folder):
            img_path = os.path.join(label, img_name)
            mask_name = img_name.split(".")[0] + ".npy"
            mask_path = os.path.join(MASK_PATH, mask_name)

            if os.path.exists(mask_path):
                image_files.append(img_path)
                mask_files.append(mask_name)

print(f"✅ Found {len(image_files)} potential image-mask pairs")

# ==========================
# 4️⃣ Data Generator (Fixed)
# ==========================
class DataGenerator(keras.utils.Sequence):
    def __init__(self, image_files, mask_files, batch_size=8, img_size=(256, 256), shuffle=True):
        self.image_files = image_files
        self.mask_files = mask_files
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return len(self.image_files) // self.batch_size

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.image_files))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_images, batch_masks = [], []

        for i in indexes:
            img_path = os.path.join(TRAIN_PATH, self.image_files[i])
            mask_path = os.path.join(MASK_PATH, self.mask_files[i])

            img = cv2.imread(img_path)
            if img is None:
                continue

            # Load mask safely
            try:
                mask = np.load(mask_path)
            except:
                continue

            # Skip empty or invalid masks
            if mask is None or mask.size == 0:
                continue

            if len(mask.shape) > 2:
                mask = mask[..., 0]

            # Ensure both resize work
            img = cv2.resize(img, self.img_size)
            mask = cv2.resize(mask, self.img_size, interpolation=cv2.INTER_NEAREST)

            img = img.astype("float32") / 255.0
            mask = np.expand_dims(mask, axis=-1)
            mask = (mask > 0.5).astype("float32")

            batch_images.append(img)
            batch_masks.append(mask)

        if len(batch_images) == 0:
            # In case entire batch skipped, load next random valid one
            return self.__getitem__((index + 1) % len(self))

        return np.array(batch_images), np.array(batch_masks)

# ==========================
# 5️⃣ UNet Model
# ==========================
def build_unet(input_shape=(256, 256, 3)):
    inputs = keras.Input(shape=input_shape)

    def conv_block(x, filters):
        x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
        x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
        return x

    def encoder_block(x, filters):
        f = conv_block(x, filters)
        p = layers.MaxPooling2D((2, 2))(f)
        return f, p

    def decoder_block(x, skip, filters):
        x = layers.Conv2DTranspose(filters, (2, 2), strides=2, padding="same")(x)
        x = layers.Concatenate()([x, skip])
        x = conv_block(x, filters)
        return x

    f1, p1 = encoder_block(inputs, 64)
    f2, p2 = encoder_block(p1, 128)
    f3, p3 = encoder_block(p2, 256)
    f4, p4 = encoder_block(p3, 512)

    bottleneck = conv_block(p4, 1024)

    d1 = decoder_block(bottleneck, f4, 512)
    d2 = decoder_block(d1, f3, 256)
    d3 = decoder_block(d2, f2, 128)
    d4 = decoder_block(d3, f1, 64)

    outputs = layers.Conv2D(1, (1, 1), activation="sigmoid")(d4)

    model = keras.Model(inputs, outputs)
    return model

# ==========================
# 6️⃣ Train Model
# ==========================
model = build_unet()
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

train_gen = DataGenerator(image_files, mask_files, batch_size=4, img_size=(256, 256))

checkpoint = ModelCheckpoint("unet_best.h5", save_best_only=True, monitor="loss", mode="min")
history = model.fit(train_gen, epochs=5, callbacks=[checkpoint])

# ==========================
# 7️⃣ Plot Results
# ==========================
plt.plot(history.history["loss"], label="loss")
plt.plot(history.history["accuracy"], label="accuracy")
plt.legend()
plt.title("Training Progress")
plt.show()

print("✅ Training complete! Model saved as unet_best.h5")
v