# CMFD ‚Äî Offline Baseline (Pure PyTorch U‚ÄëNet, authentic/forged subfolders)

Kaggle **Internet Off**

Highlights

-üîå No pip installs ‚Äî Only use libraries available in the standard Kaggle environment (torch, torchvision, numpy, pandas, PIL, matplotlib).

-üß† A pure U-Net implementation using PyTorch.

-üóÇÔ∏è Automatically supports subfolders train_images/authentic/ and train_images/forged/.

-üß© If multiple masks are present, combine them using logical OR.

-üßæ RLE encode/decode, compatible with sample_submission.csv format.

-‚öñÔ∏è Loss function: BCEWithLogits + Dice loss.



In [None]:

# 1) Import and settings
import os, random, glob
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

DATA_DIR = Path('/kaggle/input/recodai-luc-scientific-image-forgery-detection')
TRAIN_IMG_DIR = DATA_DIR / 'train_images'
TEST_IMG_DIR  = DATA_DIR / 'test_images'
TRAIN_MASK_DIR = DATA_DIR / 'train_masks'
SAMPLE_SUB_PATH = DATA_DIR / 'sample_submission.csv'
OUTPUT_DIR = Path('/kaggle/working/'); OUTPUT_DIR.mkdir(exist_ok=True)

# –î—ç–¥ —Ñ–æ–ª–¥–µ—Ä—É—É–¥
TRAIN_IMG_DIR_AUTH = TRAIN_IMG_DIR / 'authentic'
TRAIN_IMG_DIR_FORG = TRAIN_IMG_DIR / 'forged'

# Hyperparams
IMG_SIZE = 256
BATCH_SIZE = 8
EPOCHS = 20
LR = 1e-3
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', DEVICE)

# –¢—É—Å–ª–∞—Ö
IMG_EXTS = {'.png', '.jpg', '.jpeg', '.tif', '.bmp'}

def is_image(p: Path):
    return p.is_file() and p.suffix.lower() in IMG_EXTS

# Train index (authentic/forged)
def build_train_index():
    rows = []
    for split_name, root in [('authentic', TRAIN_IMG_DIR_AUTH), ('forged', TRAIN_IMG_DIR_FORG)]:
        if root.exists():
            for p in sorted(root.rglob('*')):
                if is_image(p):
                    rows.append({'case_id': p.stem, 'split': split_name, 'path': str(p)})
    df = pd.DataFrame(rows)
    if len(df)==0:
        print('‚ö†Ô∏è train_images/authentic & forged —Ö–æ–æ—Å–æ–Ω —ç—Å–≤—ç–ª –æ–ª–¥—Å–æ–Ω–≥“Ø–π')
        return pd.DataFrame(columns=['case_id','split','path'])
    df = df.drop_duplicates(subset=['case_id'], keep='last').reset_index(drop=True)
    return df

TRAIN_INDEX = build_train_index()
ID2PATH = {row.case_id: Path(row.path) for _, row in TRAIN_INDEX.iterrows()}
print(f"Train index: {len(TRAIN_INDEX)} images (auth={(TRAIN_INDEX.split=='authentic').sum()}, forg={(TRAIN_INDEX.split=='forged').sum()})")

print('Sanity:')
print('train_images:', TRAIN_IMG_DIR.exists())
print('train_masks :', TRAIN_MASK_DIR.exists())
print('test_images :', TEST_IMG_DIR.exists())
print('sample_submission:', SAMPLE_SUB_PATH.exists())


In [None]:

# 2) Utils ‚Äî RLE, IO, mask

def rle_encode(mask: np.ndarray) -> str:
    pixels = mask.flatten(order='F')
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] = runs[1::2] - runs[::2]
    return ' '.join(str(x) for x in runs)


def rle_decode(rle: str, shape) -> np.ndarray:
    if isinstance(rle, float) and np.isnan(rle):
        return np.zeros(shape, dtype=np.uint8)
    s = rle.strip().split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')


def load_image(path: Path) -> np.ndarray:
    img = Image.open(path).convert('RGB')
    return np.array(img)


def load_all_masks_for_case(case_id: str, mask_dir: Path, image_shape=None) -> np.ndarray:
    patterns = [
        str(mask_dir / f'{case_id}*.png'),
        str(mask_dir / f'{case_id}*.jpg'),
        str(mask_dir / f'{case_id}*.jpeg'),
        str(mask_dir / f'{case_id}*.tif'),
        str(mask_dir / f'{case_id}*.bmp')
    ]
    files = []
    for pat in patterns:
        files.extend(glob.glob(pat))
    if len(files)==0:
        return None
    mask_sum = None
    for fp in files:
        m = Image.open(fp).convert('L')
        m = (np.array(m) > 0).astype(np.uint8)
        if image_shape is not None and m.shape != image_shape[:2]:
            m = np.array(Image.fromarray(m).resize((image_shape[1], image_shape[0]), resample=Image.NEAREST))
        mask_sum = m if mask_sum is None else np.maximum(mask_sum, m)
    return mask_sum


def has_mask(case_id: str) -> int:
    m = load_all_masks_for_case(case_id, TRAIN_MASK_DIR)
    return 1 if m is not None and m.sum() > 0 else 0


In [None]:

# 3) Simple transform (numpy/PIL/torch) 
class SimpleTransform:
    def __init__(self, img_size=512, train=True):
        self.img_size = img_size
        self.train = train

    def _resize_pad(self, img, mask=None):
        # Longest side fit + pad to square (IMG_SIZE)
        h, w = img.shape[:2]
        scale = self.img_size / max(h, w)
        nh, nw = int(round(h*scale)), int(round(w*scale))
        img_r = np.array(Image.fromarray(img).resize((nw, nh), resample=Image.BILINEAR))
        pad_h = self.img_size - nh
        pad_w = self.img_size - nw
        top = pad_h//2; bottom = pad_h - top
        left = pad_w//2; right = pad_w - left
        img_p = np.pad(img_r, ((top,bottom),(left,right),(0,0)), mode='reflect')
        if mask is None:
            return img_p, None
        mask_r = np.array(Image.fromarray(mask).resize((nw, nh), resample=Image.NEAREST))
        mask_p = np.pad(mask_r, ((top,bottom),(left,right)), mode='reflect')
        return img_p, mask_p

    def _hflip(self, img, mask):
        if random.random() < 0.5:
            img = np.ascontiguousarray(img[:, ::-1])
            if mask is not None:
                mask = np.ascontiguousarray(mask[:, ::-1])
        return img, mask

    def _vflip(self, img, mask):
        if random.random() < 0.5:
            img = np.ascontiguousarray(img[::-1, :])
            if mask is not None:
                mask = np.ascontiguousarray(mask[::-1, :])
        return img, mask

    def __call__(self, image: np.ndarray, mask: np.ndarray=None):
        img, msk = image, mask
        img, msk = self._resize_pad(img, msk)
        if self.train:
            img, msk = self._hflip(img, msk)
            img, msk = self._vflip(img, msk)
        # Normalize to ImageNet stats
        mean = np.array([0.485,0.456,0.406], dtype=np.float32)
        std  = np.array([0.229,0.224,0.225], dtype=np.float32)
        img = img.astype(np.float32)/255.0
        img = (img - mean)/std
        img = np.transpose(img, (2,0,1))  # CHW
        img_t = torch.from_numpy(img)
        if msk is None:
            return {'image': img_t}
        msk = (msk>0).astype(np.float32)
        msk_t = torch.from_numpy(msk)[None, ...]  # 1HW
        return {'image': img_t, 'mask': msk_t}


In [None]:

# 4) Dataset

class CMFDDataset(Dataset):
    def __init__(self, case_ids, img_dir: Path, mask_dir: Path=None, transform=None, id2path: dict=None):
        self.case_ids = case_ids
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.id2path = id2path or {}

    def __len__(self):
        return len(self.case_ids)

    def __getitem__(self, idx):
        cid = self.case_ids[idx]
        # path resolve (subfolders supported)
        if cid in self.id2path:
            img_path = self.id2path[cid]
        else:
            # fallback to root
            img_path = None
            for ext in IMG_EXTS:
                p = self.img_dir / f"{cid}{ext}"
                if p.exists():
                    img_path = p; break
            if img_path is None:
                raise FileNotFoundError(f"Image file not found for {cid}")
        img = load_image(img_path)

        msk = None
        if self.mask_dir is not None:
            msk = load_all_masks_for_case(cid, self.mask_dir, img.shape)
            if msk is None:
                msk = np.zeros(img.shape[:2], dtype=np.uint8)

        if self.transform is not None:
            data = self.transform(img, msk)
            img_t = data['image']
            msk_t = data.get('mask', None)
        else:
            img_t = torch.from_numpy(np.transpose(img.astype(np.float32)/255.0, (2,0,1)))
            msk_t = None if msk is None else torch.from_numpy(msk.astype(np.float32))[None, ...]

        return {'image': img_t, 'mask': msk_t, 'id': cid}


In [None]:

# 5) Model - Pytorch, U-Net

class DoubleConv(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_c, out_c, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.net(x)

class UNet(nn.Module):
    def __init__(self, in_ch=3, out_ch=1, base_ch=32):
        super().__init__()
        self.enc1 = DoubleConv(in_ch, base_ch)
        self.enc2 = DoubleConv(base_ch, base_ch*2)
        self.enc3 = DoubleConv(base_ch*2, base_ch*4)
        self.enc4 = DoubleConv(base_ch*4, base_ch*8)
        self.pool = nn.MaxPool2d(2)

        self.bottleneck = DoubleConv(base_ch*8, base_ch*16)

        self.up4 = nn.ConvTranspose2d(base_ch*16, base_ch*8, 2, stride=2)
        self.dec4 = DoubleConv(base_ch*16, base_ch*8)
        self.up3 = nn.ConvTranspose2d(base_ch*8, base_ch*4, 2, stride=2)
        self.dec3 = DoubleConv(base_ch*8, base_ch*4)
        self.up2 = nn.ConvTranspose2d(base_ch*4, base_ch*2, 2, stride=2)
        self.dec2 = DoubleConv(base_ch*4, base_ch*2)
        self.up1 = nn.ConvTranspose2d(base_ch*2, base_ch, 2, stride=2)
        self.dec1 = DoubleConv(base_ch*2, base_ch)

        self.outc = nn.Conv2d(base_ch, out_ch, 1)

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool(e1))
        e3 = self.enc3(self.pool(e2))
        e4 = self.enc4(self.pool(e3))
        b  = self.bottleneck(self.pool(e4))

        d4 = self.up4(b)
        d4 = torch.cat([d4, e4], dim=1)
        d4 = self.dec4(d4)
        d3 = self.up3(d4)
        d3 = torch.cat([d3, e3], dim=1)
        d3 = self.dec3(d3)
        d2 = self.up2(d3)
        d2 = torch.cat([d2, e2], dim=1)
        d2 = self.dec2(d2)
        d1 = self.up1(d2)
        d1 = torch.cat([d1, e1], dim=1)
        d1 = self.dec1(d1)
        out = self.outc(d1)
        return out

class SoftDiceLoss(nn.Module):
    def __init__(self, smooth=1.0):
        super().__init__()
        self.smooth = smooth
    def forward(self, logits, targets):
        probs = torch.sigmoid(logits)
        num = 2.0 * (probs*targets).sum(dim=(2,3)) + self.smooth
        den = (probs*probs).sum(dim=(2,3)) + (targets*targets).sum(dim=(2,3)) + self.smooth
        dice = num / den
        return 1.0 - dice.mean()

class BCEDiceLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss()
        self.dice = SoftDiceLoss()
    def forward(self, logits, targets):
        return 0.5*self.bce(logits, targets) + 0.5*self.dice(logits, targets)


def dice_coeff(preds: torch.Tensor, targets: torch.Tensor, thr=0.5, eps=1e-7):
    probs = torch.sigmoid(preds)
    preds = (probs > thr).float()
    inter = (preds*targets).sum(dim=(2,3))
    union = preds.sum(dim=(2,3)) + targets.sum(dim=(2,3))
    dice = (2*inter + eps) / (union + eps)
    return dice.mean().item()


In [None]:

# 6) Training (K-Fold)

def fit_one_epoch(model, loader, optimizer, loss_fn):
    model.train()
    loss_sum = 0.0
    dice_sum = 0.0
    for batch in loader:
        imgs = batch['image'].to(DEVICE)
        msks = batch['mask'].to(DEVICE)
        optimizer.zero_grad(set_to_none=True)
        logits = model(imgs)
        loss = loss_fn(logits, msks)
        loss.backward()
        optimizer.step()
        loss_sum += loss.item()*imgs.size(0)
        dice_sum += dice_coeff(logits.detach(), msks)
    n = len(loader.dataset)
    return loss_sum/n, dice_sum/len(loader)


def validate_one_epoch(model, loader, loss_fn):
    model.eval()
    loss_sum = 0.0
    dice_sum = 0.0
    with torch.no_grad():
        for batch in loader:
            imgs = batch['image'].to(DEVICE)
            msks = batch['mask'].to(DEVICE)
            logits = model(imgs)
            loss = loss_fn(logits, msks)
            loss_sum += loss.item()*imgs.size(0)
            dice_sum += dice_coeff(logits, msks)
    n = len(loader.dataset)
    return loss_sum/n, dice_sum/len(loader)

# Case ids & labels
all_ids = TRAIN_INDEX['case_id'].tolist()
labels = np.array([has_mask(cid) for cid in all_ids])
print(f"–ù–∏–π—Ç train images: {len(all_ids)} | Forged(1): {labels.sum()} | Authentic(0): {(labels==0).sum()}")

folds = 3
skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=SEED)

history = []
BEST_MODELS = []

for fold, (tr_idx, va_idx) in enumerate(skf.split(all_ids, labels), start=1):
    print(f"\n========== Fold {fold}/{folds} ==========")
    tr_ids = [all_ids[i] for i in tr_idx]
    va_ids = [all_ids[i] for i in va_idx]

    tr_ds = CMFDDataset(tr_ids, TRAIN_IMG_DIR, TRAIN_MASK_DIR, transform=SimpleTransform(IMG_SIZE, train=True), id2path=ID2PATH)
    va_ds = CMFDDataset(va_ids, TRAIN_IMG_DIR, TRAIN_MASK_DIR, transform=SimpleTransform(IMG_SIZE, train=False), id2path=ID2PATH)

    tr_dl = DataLoader(tr_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
    va_dl = DataLoader(va_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

    model = UNet(in_ch=3, out_ch=1, base_ch=32).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
    loss_fn = BCEDiceLoss()

    best_dice = -1.0
    best_path = OUTPUT_DIR / f"model_fold{fold}.pt"
    patience, patience_cnt = 5, 0

    for epoch in range(1, EPOCHS+1):
        tl, td = fit_one_epoch(model, tr_dl, optimizer, loss_fn)
        vl, vd = validate_one_epoch(model, va_dl, loss_fn)
        print(f"Epoch {epoch:02d} | train_loss={tl:.4f} dice={td:.4f} | valid_loss={vl:.4f} dice={vd:.4f}")
        history.append({"fold": fold, "epoch": epoch, "tr_loss": tl, "tr_dice": td, "va_loss": vl, "va_dice": vd})
        if vd > best_dice:
            best_dice = vd
            torch.save({"state_dict": model.state_dict(), "dice": best_dice}, best_path)
            patience_cnt = 0
        else:
            patience_cnt += 1
            if patience_cnt >= patience:
                print("Early stopping!")
                break
    BEST_MODELS.append(str(best_path))

pd.DataFrame(history).to_csv(OUTPUT_DIR/"training_history.csv", index=False)
print("\nBest model paths:")
for p in BEST_MODELS:
    print(p)


In [None]:

# 6.b) DEBUG ‚Äî –º–∞—Å–∫—É—É–¥ –æ–ª–¥–æ–∂ –±–∞–π–Ω–∞ —É—É?
if len(TRAIN_INDEX)>0:
    sample_ids = TRAIN_INDEX['case_id'].sample(min(10, len(TRAIN_INDEX)), random_state=SEED).tolist()
    missing = []
    for cid in sample_ids:
        m = load_all_masks_for_case(cid, TRAIN_MASK_DIR)
        if m is None:
            missing.append(cid)
    print(f"–ñ–∏—à—ç—ç {len(sample_ids)} case-–æ–æ—Å –º–∞—Å–∫ –æ–ª–¥–æ–æ–≥“Ø–π –Ω—å: {len(missing)}")
    if missing:
        print("–ú–∞—Å–∫ –æ–ª–¥–æ–æ–≥“Ø–π case_id-—É—É–¥—ã–Ω –∂–∏—à—ç—ç:", missing[:10])

mask_files = glob.glob(str(TRAIN_MASK_DIR / '*'))[:10]
print('train_masks –¥–∞—Ö—å —Ñ–∞–π–ª—É—É–¥—ã–Ω –∂–∏—à—ç—ç:', [Path(x).name for x in mask_files])


In [None]:

# 7) Inference & submission (Ensemble: fold checkpoints avg)

def post_process(mask: np.ndarray, min_size: int = 64):
    # remove small connected areas (simple, no skimage)
    # 4-connectivity
    H, W = mask.shape
    visited = np.zeros_like(mask, dtype=np.uint8)
    out = np.zeros_like(mask, dtype=np.uint8)
    dirs = [(1,0),(-1,0),(0,1),(0,-1)]
    from collections import deque
    for y in range(H):
        for x in range(W):
            if mask[y,x] and not visited[y,x]:
                q = deque([(y,x)])
                visited[y,x] = 1
                comp = [(y,x)]
                while q:
                    cy,cx = q.popleft()
                    for dy,dx in dirs:
                        ny,nx = cy+dy, cx+dx
                        if 0<=ny<H and 0<=nx<W and mask[ny,nx] and not visited[ny,nx]:
                            visited[ny,nx]=1
                            q.append((ny,nx))
                            comp.append((ny,nx))
                if len(comp) >= min_size:
                    for (yy,xx) in comp:
                        out[yy,xx] = 1
    return out

# test ids (no subfolders)

def get_case_ids(img_dir: Path) -> list:
    ids = []
    for p in sorted(img_dir.iterdir()):
        if p.is_file() and p.suffix.lower() in IMG_EXTS:
            ids.append(p.stem)
    return ids

case_ids_test = get_case_ids(TEST_IMG_DIR)

class TestDataset(Dataset):
    def __init__(self, case_ids, img_dir: Path, transform):
        self.case_ids = case_ids
        self.img_dir = img_dir
        self.transform = transform
    def __len__(self):
        return len(self.case_ids)
    def __getitem__(self, idx):
        cid = self.case_ids[idx]
        img_path = None
        for ext in IMG_EXTS:
            p = self.img_dir / f"{cid}{ext}"
            if p.exists():
                img_path = p; break
        if img_path is None:
            raise FileNotFoundError(cid)
        img = load_image(img_path)
        t = self.transform(img, None)
        return {'image': t['image'], 'id': cid}

# Load models
models = []
for fn in os.listdir(OUTPUT_DIR):
    if fn.startswith('model_fold') and fn.endswith('.pt'):
        ckpt_path = OUTPUT_DIR / fn
        model = UNet(in_ch=3, out_ch=1, base_ch=32).to(DEVICE)
        state = torch.load(ckpt_path, map_location=DEVICE)
        model.load_state_dict(state['state_dict'])
        model.eval()
        models.append(model)

# Inference

test_ds = TestDataset(case_ids_test, TEST_IMG_DIR, transform=SimpleTransform(IMG_SIZE, train=False))

test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

all_preds = {}
with torch.no_grad():
    for batch in test_dl:
        imgs = batch['image'].to(DEVICE)
        ids = batch['id']
        if len(models)==0:
            # safety: randomly output zeros
            logits_sum = torch.zeros((imgs.size(0),1,imgs.size(2),imgs.size(3)), device=DEVICE)
        else:
            logits_sum = None
            for mdl in models:
                lo = mdl(imgs)
                logits_sum = lo if logits_sum is None else (logits_sum + lo)
            logits_sum = logits_sum / max(1,len(models))
        probs = torch.sigmoid(logits_sum).cpu().numpy()
        for i, cid in enumerate(ids):
            m = (probs[i,0] > 0.5).astype(np.uint8)
            m = post_process(m, min_size=64)
            all_preds[cid] = m

# Submission
sub = pd.read_csv(SAMPLE_SUB_PATH)
annotations = []
for _, row in sub.iterrows():
    cid = str(row['case_id'])
    if cid in all_preds and all_preds[cid].sum() > 0:
        annotations.append(rle_encode(all_preds[cid]))
    else:
        annotations.append('authentic')
sub['annotation'] = annotations
SUB_PATH = OUTPUT_DIR / 'submission.csv'
sub.to_csv(SUB_PATH, index=False)
print('Saved:', SUB_PATH)
