In [None]:
import sys
import subprocess

print("--- Python Environment Details ---")
print(f"Python version: {sys.version}")
print("-" * 30)

# Function to get installed package version
def get_package_version(package_name):
    try:
        # Using importlib.metadata for Python 3.8+
        from importlib.metadata import version
        return version(package_name)
    except (ImportError, ModuleNotFoundError):
        # Fallback for older Python or if package not found easily
        try:
            return subprocess.check_output([sys.executable, "-m", "pip", "show", package_name]).decode().split('\n')[1].split(': ')[1]
        except Exception:
            return "Not Installed"

print(f"numpy version: {get_package_version('numpy')}")
print(f"scipy version: {get_package_version('scipy')}")
print(f"matplotlib version: {get_package_version('matplotlib')}")
print(f"pandas version: {get_package_version('pandas')}")
print(f"torch version: {get_package_version('torch')}")
print(f"torchvision version: {get_package_version('torchvision')}")
print(f"segmentation_models_pytorch version: {get_package_version('segmentation_models_pytorch')}")
print(f"scikit-learn version: {get_package_version('scikit-learn')}")
print("-" * 30)


print("\n--- Attempting minimal installations ---")

# Uninstall segmentation_models_pytorch if it was partially installed and conflicting
# We'll install it clean later.
# We are NOT uninstalling numpy, scipy, matplotlib, pandas here. We want to keep Kaggle's defaults.
!pip uninstall -y segmentation-models-pytorch --quiet

# Only install segmentation_models_pytorch and scikit-learn.
# We trust Kaggle's numpy/scipy/matplotlib/pandas to be mostly compatible with each other.
# The challenge is adding new libraries without breaking the core.
!pip install segmentation_models_pytorch scikit-learn --quiet

print("\n--- Verification of critical libraries after minimal install ---")
print(f"segmentation_models_pytorch version: {get_package_version('segmentation_models_pytorch')}")
print(f"scikit-learn version: {get_package_version('scikit-learn')}")
print("-" * 30)

# --- Standard Imports ---
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from PIL import Image # For handling image files
import matplotlib.pyplot as plt # For plotting and visualization
import glob # For finding file paths

# PyTorch imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms # Use torchvision transforms for augmentation

# Segmentation model library
import segmentation_models_pytorch as smp

# Scikit-learn for data splitting
from sklearn.model_selection import train_test_split

print("\nAll standard imports attempted.")
print("If no errors above, imports were successful!")


# Optional: Print current directory contents to confirm data structure
print("\n--- Listing input directory contents (first 10 files) ---")
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if "mask" in filename or "train_images" in dirname or "test_images" in dirname or "submission" in filename:
            print(os.path.join(dirname, filename))
            # Limit output to prevent overwhelming the console
            if len(filenames) > 100: # Heuristic
                print("... (truncated list for brevity)")
                break
print("--- End of input directory listing ---\n")

In [None]:
# Official RLE functions from the Kaggle metric notebook
# https://www.kaggle.com/code/metric/recodai-f1?scriptVersionId=270092713&cellId=1

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]]) # Add sentinels
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape=(256, 256)): # Default shape, adjust based on your image size
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    if not s: # Handle empty string for 'authentic' case or missing mask
        return np.zeros(shape[0]*shape[1], dtype=np.uint8).reshape(shape)
    
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1 # RLE is 1-indexed, Python arrays are 0-indexed
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

print("RLE functions defined.")

In [None]:
# --- Configuration ---
DATA_DIR = '/kaggle/input/recodai-luc-scientific-image-forgery-detection' 
IMG_SIZE = 256 # Standardize image size for training
BATCH_SIZE = 16
NUM_EPOCHS = 15 # Increased epochs slightly, you may need more
LEARNING_RATE = 1e-4

# --- Paths ---
train_image_forged_dir = os.path.join(DATA_DIR, 'train_images', 'forged')
train_image_authentic_dir = os.path.join(DATA_DIR, 'train_images', 'authentic') 
train_mask_dir = os.path.join(DATA_DIR, 'train_masks') 

test_image_dir = os.path.join(DATA_DIR, 'test_images') 

# Get all training image paths
train_forged_image_paths = sorted(glob.glob(os.path.join(train_image_forged_dir, '*.png')))
train_authentic_image_paths = sorted(glob.glob(os.path.join(train_image_authentic_dir, '*.png')))
train_image_paths = train_forged_image_paths + train_authentic_image_paths

# Get all mask paths
all_mask_paths = sorted(glob.glob(os.path.join(train_mask_dir, '*.png')))

image_id_to_mask_paths = {}
for mask_path in all_mask_paths:
    mask_basename = os.path.basename(mask_path)
    image_id_part = mask_basename.split('_')[0].split('.')[0] 
    
    if image_id_part not in image_id_to_mask_paths:
        image_id_to_mask_paths[image_id_part] = []
    image_id_to_mask_paths[image_id_part].append(mask_path)


train_data_list = []
for img_path in train_image_paths:
    img_basename = os.path.basename(img_path)
    img_id = img_basename.split('.')[0] 
    
    mask_info = image_id_to_mask_paths.get(img_id, []) 
    train_data_list.append((img_path, mask_info))

print(f"Found {len(train_forged_image_paths)} forged training images.")
print(f"Found {len(train_authentic_image_paths)} authentic training images.")
print(f"Total training images: {len(train_data_list)}")
print(f"Found {len(all_mask_paths)} total mask files.") 

test_image_paths = sorted(glob.glob(os.path.join(test_image_dir, '*.png')))
print(f"Found {len(test_image_paths)} test images.")

print("\nFirst 5 training data entries (image path, mask paths):")
for i in range(min(5, len(train_data_list))):
    img_path, mask_paths = train_data_list[i]
    print(f"Image: {os.path.basename(img_path)}, Masks: {[os.path.basename(p) for p in mask_paths] if mask_paths else 'authentic'}")

In [None]:
class ForgeryDataset(Dataset):
    def __init__(self, data_list, img_size, transform=None, is_test=False):
        self.data_list = data_list 
        self.img_size = img_size
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        if self.is_test:
            img_path = self.data_list[idx]
            image = Image.open(img_path).convert("RGB") # Use PIL.Image
            
            original_w, original_h = image.size # PIL gives (width, height)

            if self.transform:
                image = self.transform(image)
            
            return image, os.path.basename(img_path).split('.')[0], (original_h, original_w)
            
        else: # Training/Validation
            img_path, mask_paths = self.data_list[idx]
            
            image = Image.open(img_path).convert("RGB") # Use PIL.Image

            # Initialize an empty mask (all zeros) as a PIL Image
            combined_mask = Image.fromarray(np.zeros(image.size[::-1], dtype=np.uint8)) # size[::-1] for (H,W)

            # If there are mask paths, combine them
            if mask_paths:
                mask_arrays = []
                for m_path in mask_paths:
                    mask_img = Image.open(m_path).convert("L") # Open as grayscale
                    mask_array = np.array(mask_img)
                    mask_arrays.append((mask_array > 0).astype(np.uint8)) # Binary mask
                
                if mask_arrays: # Combine multiple masks if they exist
                    combined_mask_array = np.logical_or.reduce(mask_arrays).astype(np.uint8)
                    combined_mask = Image.fromarray(combined_mask_array)

            if self.transform:
                # Apply transforms to both image and mask
                # Note: torchvision.transforms are usually applied sequentially
                # We need a custom transform for paired image-mask augmentation
                # For simplicity, we'll resize here and apply basic random augmentations.
                # For more advanced paired transforms, one might need a custom composed transform.
                
                # Resize first
                image = transforms.Resize((self.img_size, self.img_size))(image)
                mask = transforms.Resize((self.img_size, self.img_size), interpolation=transforms.InterpolationMode.NEAREST)(combined_mask)
                
                # Apply random augmentations (e.g., flip) to both
                if np.random.rand() < 0.5: # Random horizontal flip
                    image = transforms.functional.hflip(image)
                    mask = transforms.functional.hflip(mask)
                if np.random.rand() < 0.5: # Random vertical flip
                    image = transforms.functional.vflip(image)
                    mask = transforms.functional.vflip(mask)

                # Convert to Tensor and Normalize
                image = transforms.ToTensor()(image)
                mask = transforms.ToTensor()(mask)
                
                # Normalize image
                image = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image)

            else:
                # Default resize and ToTensor
                image = transforms.Compose([
                    transforms.Resize((self.img_size, self.img_size)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ])(image)
                mask = transforms.Compose([
                    transforms.Resize((self.img_size, self.img_size), interpolation=transforms.InterpolationMode.NEAREST),
                    transforms.ToTensor(),
                ])(combined_mask)
            
            return image, mask

print("ForgeryDataset class defined (using PIL and torchvision).")

In [None]:
# --- Augmentations (Simplified torchvision style) ---
# Basic transforms including normalization and conversion to tensor
def get_train_transforms(img_size):
    return transforms.Compose([
        transforms.Resize((img_size, img_size)),
        # Random augmentations like flips are handled in Dataset __getitem__ for paired transforms
        transforms.ToTensor(), # Converts image to CxHxW and scales pixels to [0, 1]
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # ImageNet stats
    ])

def get_val_test_transforms(img_size):
    return transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

print("Augmentation transforms defined (using torchvision).")

In [None]:
# --- Data Split ---
train_data, val_data = train_test_split(train_data_list, test_size=0.2, random_state=42)

# Pass transform functions to the Dataset
train_dataset = ForgeryDataset(train_data, IMG_SIZE, get_train_transforms(IMG_SIZE), is_test=False)
val_dataset = ForgeryDataset(val_data, IMG_SIZE, get_val_test_transforms(IMG_SIZE), is_test=False)
test_dataset = ForgeryDataset(test_image_paths, IMG_SIZE, get_val_test_transforms(IMG_SIZE), is_test=True)


train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}, Test samples: {len(test_dataset)}")
print("DataLoaders created.")

# --- Model ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

model = smp.Unet(
    encoder_name="resnet34",        
    encoder_weights="imagenet",     
    in_channels=3,                  
    classes=1,                      
)
model.to(DEVICE)

# --- Loss Function ---
class CombinedLoss(nn.Module):
    def __init__(self, dice_weight=0.5, bce_weight=0.5):
        super().__init__()
        self.dice_loss = smp.losses.DiceLoss(mode='binary', from_logits=True)
        self.bce_loss = smp.losses.SoftBCEWithLogitsLoss()
        self.dice_weight = dice_weight
        self.bce_weight = bce_weight

    def forward(self, pred, target):
        dice = self.dice_loss(pred, target)
        bce = self.bce_loss(pred, target)
        return self.dice_weight * dice + self.bce_weight * bce

loss_fn = CombinedLoss(dice_weight=0.5, bce_weight=0.5)

# --- Optimizer ---
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# --- Learning Rate Scheduler ---
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

print("Model, Loss, Optimizer, and Scheduler initialized.")

In [None]:
# --- Training Loop ---
best_val_loss = float('inf')
MODEL_SAVE_PATH = 'best_model.pth' 

print(f"Starting training for {NUM_EPOCHS} epochs...")
for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    for batch_idx, (images, masks) in enumerate(train_loader):
        images, masks = images.to(DEVICE), masks.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (batch_idx + 1) % 50 == 0: 
            print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Batch {batch_idx+1}/{len(train_loader)}, Train Loss: {running_loss / (batch_idx+1):.4f}")

    avg_train_loss = running_loss / len(train_loader)

    # --- Validation Loop ---
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, masks in val_loader:
            images, masks = images.to(DEVICE), masks.to(DEVICE)
            outputs = model(images)
            loss = loss_fn(outputs, masks)
            val_loss += loss.item()
    
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Avg Train Loss: {avg_train_loss:.4f}, Avg Val Loss: {avg_val_loss:.4f}")

    scheduler.step(avg_val_loss)

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f"Saved best model with validation loss: {best_val_loss:.4f}")

print("\nTraining complete!")

In [None]:
# --- Load Best Model for Prediction ---
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
model.eval()
print(f"Loaded best model from {MODEL_SAVE_PATH} for prediction.")

# --- Prediction and Submission ---
submission_data = []
PREDICTION_THRESHOLD = 0.5 

print("Generating predictions for test images...")
with torch.no_grad():
    for images, image_ids, original_sizes in test_loader:
        images = images.to(DEVICE)
        outputs = model(images)
        probabilities = torch.sigmoid(outputs).cpu().numpy() 

        for i in range(probabilities.shape[0]):
            img_id = image_ids[i]
            # original_sizes is a tuple of (H_tensor, W_tensor) for the batch
            original_h, original_w = original_sizes[0][i].item(), original_sizes[1][i].item()

            pred_mask = probabilities[i, 0] # Get the single channel mask (HxW)
            
            # Resize the predicted mask back to the original image size
            # Use interpolation=cv2.INTER_LINEAR here is more suitable for probabilities
            pred_mask_resized = transforms.ToPILImage()(torch.from_numpy(pred_mask)).resize((original_w, original_h), Image.BILINEAR)
            pred_mask_resized = np.array(pred_mask_resized) # Convert back to numpy array

            binary_mask = (pred_mask_resized > PREDICTION_THRESHOLD).astype(np.uint8)
            
            if np.sum(binary_mask) == 0: 
                submission_data.append({'case_id': img_id, 'annotation': 'authentic'})
            else:
                rle_encoded_mask = rle_encode(binary_mask)
                submission_data.append({'case_id': img_id, 'annotation': rle_encoded_mask})

submission_df = pd.DataFrame(submission_data)

SUBMISSION_FILE_PATH = 'submission.csv'
submission_df.to_csv(SUBMISSION_FILE_PATH, index=False)

print(f"Submission file created successfully at {SUBMISSION_FILE_PATH}")
print("\nFirst 5 rows of submission.csv:")
print(submission_df.head())