# Cell 1: Environment & Hyper-Parameters
Goal: Set high-resolution inputs and sensitive detection thresholds to jump from 0.321 to the Top 50.

In [1]:
import os, cv2, json, math, torch, random
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoImageProcessor, AutoModel

class CFG:
    seed = 42
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # P100 is powerful enough to handle 616px efficiently
    img_size = 616 
    
    # Paths (Verified based on your last screenshot)
    test_dir = "/kaggle/input/recodai-luc-scientific-image-forgery-detection/test_images"
    sample_sub = "/kaggle/input/recodai-luc-scientific-image-forgery-detection/sample_submission.csv"
    dino_path = "/kaggle/input/dinov2/pytorch/base/1"
    model_weights = "/kaggle/input/cnndinov2-pbd/CNNDINOv2-U52/CNNDINOv2-U52/model_seg_final.pt"
    
    # Aggressive Winning Thresholds
    area_thr = 180    
    mean_thr = 0.20   
    use_tta = True

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(CFG.seed)
print(f"Environment Optimized. Resolution: {CFG.img_size}px")

2026-01-08 22:28:59.946737: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767911340.146313      24 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767911340.203303      24 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767911340.683904      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767911340.683946      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767911340.683949      24 computation_placer.cc:177] computation placer alr

Environment Optimized. Resolution: 616px


# Cell 2: Advanced Model Architecture
Goal: Reconstruct the spatial features from DINOv2 embeddings using a multi-scale decoder.

In [2]:
class DinoTinyDecoder(nn.Module):
    def __init__(self, in_ch=768, out_ch=1):
        super().__init__()
        # Matches your trained weights structure
        self.block1 = nn.Sequential(nn.Conv2d(in_ch, 384, 3, padding=1), nn.ReLU(inplace=True))
        self.block2 = nn.Sequential(nn.Conv2d(384, 192, 3, padding=1), nn.ReLU(inplace=True))
        self.block3 = nn.Sequential(nn.Conv2d(192, 96, 3, padding=1), nn.ReLU(inplace=True))
        self.conv_out = nn.Conv2d(96, out_ch, kernel_size=1)
    
    def forward(self, f, target_size):
        # Gradual upsampling to recover pixel-level edges
        x = F.interpolate(self.block1(f), size=(88, 88), mode='bilinear', align_corners=False)
        x = F.interpolate(self.block2(x), size=(176, 176), mode='bilinear', align_corners=False)
        x = F.interpolate(self.block3(x), size=(352, 352), mode='bilinear', align_corners=False)
        x = self.conv_out(x)
        return F.interpolate(x, size=target_size, mode='bilinear', align_corners=False)

class ScientificForgeryModel(nn.Module):
    def __init__(self, dino_path):
        super().__init__()
        self.processor = AutoImageProcessor.from_pretrained(dino_path, local_files_only=True)
        self.encoder = AutoModel.from_pretrained(dino_path, local_files_only=True)
        for p in self.encoder.parameters(): p.requires_grad = False
        self.seg_head = DinoTinyDecoder(768, 1)

    def forward(self, x):
        imgs = (x * 255).clamp(0, 255).byte().permute(0, 2, 3, 1).cpu().numpy()
        inputs = self.processor(images=list(imgs), return_tensors="pt").to(CFG.device)
        with torch.no_grad():
            feats = self.encoder(**inputs).last_hidden_state
        B, N, C = feats.shape
        s = int(math.sqrt(N-1))
        fmap = feats[:, 1:, :].permute(0, 2, 1).reshape(B, C, s, s)
        return self.seg_head(fmap, (CFG.img_size, CFG.img_size))

# Cell 3: Post-Processing & Encoding
Goal: Clean output noise and convert masks to RLE format.

In [3]:
def rle_encode(mask):
    pixels = mask.T.flatten()
    dots = np.where(pixels == 1)[0]
    if len(dots) == 0: return "authentic"
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1: run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return json.dumps([int(x) for x in run_lengths])

def post_process(prob, original_size):
    # Gaussian Blur to smooth out the probability map
    prob_refined = cv2.GaussianBlur(prob, (5, 5), 0)
    # Adaptive Thresholding: Mean + std deviation for better localization
    thr = np.mean(prob_refined) + 0.3 * np.std(prob_refined)
    mask = (prob_refined > thr).astype(np.uint8)
    
    # Morphological closing to join fragmented forgery segments
    kernel = np.ones((5,5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.resize(mask, original_size, interpolation=cv2.INTER_NEAREST)
    return mask

# Cell 4: Final Inference Loop
Goal: Generate the submission file with strict type-safety.

In [4]:
def run_optimized_inference():
    model = ScientificForgeryModel(CFG.dino_path).to(CFG.device)
    if os.path.exists(CFG.model_weights):
        state_dict = torch.load(CFG.model_weights, map_location=CFG.device)
        model.load_state_dict(state_dict, strict=False)
        print("--- Professional Weights Loaded Successfully ---")
    
    model.eval()
    results = []
    test_images = sorted(os.listdir(CFG.test_dir))

    with torch.no_grad():
        for img_name in tqdm(test_images, desc="High-Res Processing"):
            img_path = os.path.join(CFG.test_dir, img_name)
            pil_img = Image.open(img_path).convert("RGB")
            orig_w, orig_h = pil_img.size
            
            img_t = torch.from_numpy(np.array(pil_img.resize((CFG.img_size, CFG.img_size)), np.float32)/255.).permute(2,0,1)[None].to(CFG.device)
            
            logits = model(img_t)
            if CFG.use_tta:
                # TTA: Horizontal Flip
                logits_h = model(torch.flip(img_t, dims=[3]))
                logits = (logits + torch.flip(logits_h, dims=[3])) / 2.0
            
            prob = torch.sigmoid(logits)[0,0].cpu().numpy()
            mask = post_process(prob, (orig_w, orig_h))
            
            # Area and Mean-Confidence validation
            area = mask.sum()
            mask_s = cv2.resize(mask, (CFG.img_size, CFG.img_size), interpolation=cv2.INTER_NEAREST)
            mean_c = prob[mask_s == 1].mean() if area > 0 else 0
            
            if area >= CFG.area_thr and mean_c >= CFG.mean_thr:
                annotation = rle_encode(mask)
            else:
                annotation = "authentic"
            
            results.append({"case_id": str(Path(img_name).stem), "annotation": annotation})

    sub_df = pd.DataFrame(results)
    sample_df = pd.read_csv(CFG.sample_sub)
    sample_df['case_id'] = sample_df['case_id'].astype(str)
    sub_df['case_id'] = sub_df['case_id'].astype(str)
    
    final_sub = sample_df[['case_id']].merge(sub_df, on="case_id", how="left").fillna("authentic")
    final_sub.to_csv("submission.csv", index=False)
    print("--- SUCCESS: Optimized submission.csv generated! ---")

if __name__ == "__main__":
    run_optimized_inference()

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


--- Professional Weights Loaded Successfully ---


High-Res Processing: 100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

--- SUCCESS: Optimized submission.csv generated! ---



