In [None]:

# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

import os
import json
from pathlib import Path
class Config:
    """Advanced configuration with optimized parameters"""
    
    # Paths
    BASE_PATH = Path('/kaggle/input/recodai-luc-scientific-image-forgery-detection')
    TRAIN_IMAGES_DIR = BASE_PATH / 'train_images'
    TRAIN_MASKS_DIR = BASE_PATH / 'train_masks'
    TEST_IMAGES_DIR = BASE_PATH / 'test_images'
    SAMPLE_SUB_PATH = BASE_PATH / 'sample_submission.csv'


In [None]:
def discover_and_map_data():
    """Discover all images and correctly map their corresponding .npy masks."""
    import numpy as np
    from pathlib import Path

    config = Config()

    # Discover all training images
    train_images = []
    authentic_dir = config.TRAIN_IMAGES_DIR / 'authentic'
    forged_dir = config.TRAIN_IMAGES_DIR / 'forged'

    # Get authentic images
    authentic_images = []
    if authentic_dir.exists():
        authentic_images = list(authentic_dir.glob('*.[jpJP][npNP][gG]*'))
        train_images.extend(authentic_images)
        print(f"Found {len(authentic_images)} authentic images")

    # Get forged images
    forged_images = []
    if forged_dir.exists():
        forged_images = list(forged_dir.glob('*.[jpJP][npNP][gG]*'))
        train_images.extend(forged_images)
        print(f"Found {len(forged_images)} forged images")

    # Create mask mapping
    mask_mapping = {}
    forged_with_masks = set()

    if config.TRAIN_MASKS_DIR.exists():
        # Masks are stored as .npy files
        for mask_file in config.TRAIN_MASKS_DIR.glob('*.npy'):
            img_id = mask_file.stem  # e.g. '10015' from '10015.npy'

            # Check if this mask corresponds to a forged image
            matching_imgs = [
                forged_img for forged_img in forged_images
                if forged_img.stem == img_id
            ]

            if matching_imgs:
                forged_img = matching_imgs[0]
                actual_img_id = forged_img.stem
                if actual_img_id not in mask_mapping:
                    mask_mapping[actual_img_id] = []
                mask_mapping[actual_img_id].append(mask_file)
                forged_with_masks.add(forged_img)

    print(f"Created mask mapping for {len(mask_mapping)} images")
    print(f"Forged images with masks: {len(forged_with_masks)}")

    # Get test images
    test_images = []
    if config.TEST_IMAGES_DIR.exists():
        test_images = list(config.TEST_IMAGES_DIR.glob('*.[jpJP][npNP][gG]*'))
        print(f"Found {len(test_images)} test images")

    return train_images, mask_mapping, test_images, forged_with_masks


In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from PIL import Image
import cv2
import random

# Assuming Config and discover_and_map_data() are defined
train_images, mask_mapping, test_images, forged_with_masks = discover_and_map_data()

print(f"Total train images: {len(train_images)}")
print(f"Forged with masks: {len(forged_with_masks)}")
print(f"Authentic: {len(train_images) - len(forged_with_masks)}")


In [None]:
# Summarize the image distribution
data_summary = {
    "Authentic": len([p for p in train_images if 'authentic' in str(p)]),
    "Forged": len([p for p in train_images if 'forged' in str(p)]),
    "Forged_with_masks": len(mask_mapping),
    "Test Images": len(test_images)
}
pd.DataFrame(data_summary, index=["Count"])


In [None]:
size = []
leng=0
for image_path in random.sample(train_images, min(300, len(train_images))):
    img = Image.open(image_path)
    size.append(img.size)
df_sizes = pd.DataFrame(size, columns=['width', 'height'])
plt.figure(figsize=(7,5))
sns.scatterplot(x='width', y='height', data=df_sizes)
plt.title('Image Resolution Distribution')
plt.xlabel('Width')
plt.ylabel('Height')
plt.show()


In [None]:
print(df_sizes.describe())


In [None]:
mask_coverage = []

for img_id, mask_files in list(mask_mapping.items())[:300]:  # sample subset for speed
    total_mask = np.zeros(np.load(mask_files[0]).shape, dtype=np.uint8)
    for mf in mask_files:
        mask = np.load(mf)
        total_mask = np.logical_or(total_mask, mask)
    forged_area_ratio = np.sum(total_mask) / total_mask.size
    mask_coverage.append(forged_area_ratio)

plt.figure(figsize=(7,5))
sns.histplot(mask_coverage, bins=30, kde=True)
plt.title("Distribution of Forged Area Ratio per Image")
plt.xlabel("Forged area ratio")
plt.ylabel("Frequency")
plt.show()

print(f"Average forged area ratio: {np.mean(mask_coverage):.4f}")


In [None]:
def show_image_and_mask(image_path, mask_paths=None):
    """
    Display image and overlay one or more associated .npy masks.
    Handles variable shapes and multiple masks per image.
    """
    import cv2
    import numpy as np
    from PIL import Image
    import matplotlib.pyplot as plt

    img = np.array(Image.open(image_path).convert("RGB"))
    h, w = img.shape[:2]
    combined_mask = np.zeros((h, w), dtype=np.uint8)

    if mask_paths:
        for mask_path in mask_paths:
            mask = np.load(mask_path)

            # üß© Handle different possible mask shapes
            # e.g. (2,256,256), (256,256,2), (256,256)
            if mask.ndim == 3:
                if mask.shape[0] <= 3:
                    mask = np.any(mask, axis=0).astype(np.uint8)
                elif mask.shape[-1] <= 3:
                    mask = np.any(mask, axis=-1).astype(np.uint8)
                else:
                    # If multiple masks are stacked, combine all
                    mask = np.any(mask, axis=0).astype(np.uint8)
            elif mask.ndim == 2:
                mask = mask.astype(np.uint8)
            else:
                mask = mask.squeeze().astype(np.uint8)

            # Resize mask to match image dimensions
            mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)

            # Combine with previously loaded masks
            combined_mask = np.logical_or(combined_mask, mask)

    #  Display results
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.axis('off')
    plt.title("Original Image")

    plt.subplot(1, 2, 2)
    plt.imshow(img)
    if mask_paths:
        plt.imshow(combined_mask, alpha=0.5, cmap='jet')
        plt.title(f"Forgery Mask Overlay ({len(mask_paths)} mask file(s))")
    else:
        plt.title("No Mask (Authentic)")
    plt.axis('off')
    plt.tight_layout()
    plt.show()


In [None]:
# Pick random forged case
sample_id = random.choice(list(mask_mapping.keys()))
image_path = next((f for f in forged_with_masks if sample_id in str(f)), None)

if image_path:
    show_image_and_mask(image_path, mask_mapping[sample_id])
else:
    print("No image match found for that mask ID.")


In [None]:
import os
import random
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2


In [None]:
class ForgeryDataset(Dataset):
    def __init__(self, image_paths, mask_mapping, image_size=(512, 512), augment=False):
        self.image_paths = image_paths
        self.mask_mapping = mask_mapping
        self.image_size = image_size
        self.augment = augment

        # Albumentations handles resize + normalization
        self.transform = A.Compose([
            A.Resize(*image_size),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.3),
            A.RandomRotate90(p=0.3),
            A.RandomBrightnessContrast(p=0.3),
            A.GaussNoise(p=0.2),
            A.MotionBlur(p=0.2),
            A.Normalize(mean=(0.485, 0.456, 0.406),
                        std=(0.229, 0.224, 0.225)),
            ToTensorV2()
        ])

    def __len__(self):
        return len(self.image_paths)

    def load_mask(self, img_id, h, w):
        """Merge multiple .npy masks for an image ID; return unresized mask"""
        if img_id not in self.mask_mapping:
            return np.zeros((h, w), dtype=np.uint8)

        # Load image-sized mask template (same size as image)
        combined_mask = np.zeros((h, w), dtype=np.uint8)
        for mf in self.mask_mapping[img_id]:
            mask = np.load(mf)

            # Merge channels if present
            if mask.ndim == 3:
                mask = np.any(mask, axis=0).astype(np.uint8)
            else:
                mask = mask.squeeze().astype(np.uint8)

            # If mask not same size as image, resize later using Albumentations
            if mask.shape != (h, w):
                mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
            combined_mask = np.logical_or(combined_mask, mask)

        return combined_mask.astype(np.float32)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img_id = Path(img_path).stem
        img = np.array(Image.open(img_path).convert("RGB"))
        h, w = img.shape[:2]

        mask = self.load_mask(img_id, h, w)

        # Albumentations will resize both image and mask together
        augmented = self.transform(image=img, mask=mask)
        img = augmented["image"]
        mask = augmented["mask"].unsqueeze(0)  # (1, H, W)

        return img, mask


In [None]:
from sklearn.model_selection import train_test_split

forged_img_paths = [p for p in train_images if 'forged' in str(p)]
train_paths, val_paths = train_test_split(forged_img_paths, test_size=0.2, random_state=42)

train_dataset = ForgeryDataset(train_paths, mask_mapping, augment=True)
val_dataset = ForgeryDataset(val_paths, mask_mapping, augment=False)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2)

print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}")


In [None]:
import os, random, cv2, torch, numpy as np, pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from pathlib import Path


In [None]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.net(x)


class UNet(nn.Module):
    def __init__(self, n_channels=3, n_classes=1):
        super().__init__()
        self.inc = DoubleConv(n_channels, 64)
        self.down1 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(64, 128))
        self.down2 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(128, 256))
        self.down3 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(256, 512))
        self.down4 = nn.Sequential(nn.MaxPool2d(2), DoubleConv(512, 512))

        self.up1 = nn.ConvTranspose2d(512, 512, 2, stride=2)
        self.conv1 = DoubleConv(1024, 256)
        self.up2 = nn.ConvTranspose2d(256, 256, 2, stride=2)
        self.conv2 = DoubleConv(512, 128)
        self.up3 = nn.ConvTranspose2d(128, 128, 2, stride=2)
        self.conv3 = DoubleConv(256, 64)
        self.up4 = nn.ConvTranspose2d(64, 64, 2, stride=2)
        self.conv4 = DoubleConv(128, 64)

        self.outc = nn.Conv2d(64, n_classes, 1)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5)
        x = self.conv1(torch.cat([x, x4], dim=1))
        x = self.up2(x)
        x = self.conv2(torch.cat([x, x3], dim=1))
        x = self.up3(x)
        x = self.conv3(torch.cat([x, x2], dim=1))
        x = self.up4(x)
        x = self.conv4(torch.cat([x, x1], dim=1))
        return self.outc(x)


In [None]:
def dice_loss(pred, target, smooth=1.):
    pred = torch.sigmoid(pred)
    num = 2 * (pred * target).sum() + smooth
    den = pred.sum() + target.sum() + smooth
    return 1 - num / den

def combined_loss(pred, target):
    bce = F.binary_cross_entropy_with_logits(pred, target)
    dice = dice_loss(pred, target)
    return 0.5*bce + 0.5*dice


In [None]:
def validate(model, loader, device):
    model.eval()
    dice_scores = []
    with torch.no_grad():
        for imgs, masks in loader:
            imgs, masks = imgs.to(device), masks.to(device)
            preds = torch.sigmoid(model(imgs))
            preds = (preds > 0.5).float()
            dice = (2 * (preds * masks).sum()) / ((preds + masks).sum() + 1e-7)
            dice_scores.append(dice.item())
    return np.mean(dice_scores)


def train_model(model, train_loader, val_loader, epochs=20, lr=1e-4, save_path="unet_basic.pt"):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    best_dice = 0

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        for imgs, masks in pbar:
            imgs, masks = imgs.to(device), masks.to(device)
            opt.zero_grad()
            preds = model(imgs)
            loss = combined_loss(preds, masks)
            loss.backward()
            opt.step()
            total_loss += loss.item()
            pbar.set_postfix(loss=loss.item())

        val_dice = validate(model, val_loader, device)
        print(f"Epoch {epoch+1} | Train Loss: {total_loss/len(train_loader):.4f} | Val Dice: {val_dice:.4f}")
        if val_dice > best_dice:
            best_dice = val_dice
            torch.save(model, save_path)
            print("Saved best model")
    print(f"üèÅ Training complete. Best Dice: {best_dice:.4f}")


In [None]:
# Assuming discover_and_map_data() returns train_images, mask_mapping, test_images, forged_with_masks
train_paths = [p for p in train_images if 'forged' in str(p)]
from sklearn.model_selection import train_test_split
trn, val = train_test_split(train_paths, test_size=0.2, random_state=42)

train_ds = ForgeryDataset(trn, mask_mapping, augment=True)
val_ds   = ForgeryDataset(val, mask_mapping, augment=False)
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds, batch_size=2, shuffle=False, num_workers=2)

model = UNet(n_channels=3, n_classes=1)
train_model(model, train_loader, val_loader, epochs=20, lr=1e-4)
