# Recod.ai/LUC - Scientific Image Forgery Detection
- Develop methods that can accurately detect and segment copy-move forgeries within biomedical research images.
- Link: https://www.kaggle.com/competitions/recodai-luc-scientific-image-forgery-detection/

## Imports

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import cv2
from glob import glob
from sklearn.model_selection import train_test_split
import json
from matplotlib import pyplot as plt
import random
random.seed(42)

## Basic Config

In [None]:
# ==================== CONFIGURATION ====================
IMG_SIZE = 256
BATCH_SIZE = 8
EPOCHS = 10
LEARNING_RATE = 0.0001
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATA_PATH="/kaggle/input/recodai-luc-scientific-image-forgery-detection/"
print(f"Using device: {DEVICE}")


## Dataset Class - The Data Handler

**What it does:**
- Loads images and masks from disk
- Resizes to 256×256 (faster training)
- Normalizes pixels to 0-1 range

In [None]:
# ==================== DATASET ====================
class ForgeryDataset(Dataset):
    def __init__(self, image_paths, mask_paths=None, is_authentic=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths if mask_paths is not None else [None] * len(image_paths)
        self.is_authentic = is_authentic if is_authentic is not None else [False] * len(image_paths)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Load image
        img = cv2.imread(self.image_paths[idx])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) / 255.0
        img = torch.from_numpy(img).float().permute(2, 0, 1)
        
        # Load or create mask
        if self.is_authentic[idx]:
            mask = np.zeros((IMG_SIZE, IMG_SIZE), dtype=np.float32)
        else:
            mask = np.load(self.mask_paths[idx])
            if mask.ndim == 3:
                mask = mask[0, :, :]  # FIX: Take first channel, not first column
            mask = cv2.resize(mask.astype(np.uint8), (IMG_SIZE, IMG_SIZE))
            mask = (mask > 0).astype(np.float32)
        
        mask = torch.from_numpy(mask).float().unsqueeze(0)
        return img, mask

# Load data
forged_images = sorted(glob(DATA_PATH + 'train_images/forged/*.png'))
train_masks = sorted(glob(DATA_PATH + 'train_masks/*.npy'))
authentic_images = sorted(glob(DATA_PATH + 'train_images/authentic/*.png'))

# Match forged images with masks
matched_forged_images = []
matched_masks = []
for img_path in forged_images:
    img_name = img_path.split('/')[-1].replace('.png', '')
    mask_path = DATA_PATH + f'train_masks/{img_name}.npy'
    if mask_path in train_masks:
        matched_forged_images.append(img_path)
        matched_masks.append(mask_path)

# Combine datasets
all_images = matched_forged_images + authentic_images
all_masks = matched_masks + [None] * len(authentic_images)
all_is_authentic = [False] * len(matched_forged_images) + [True] * len(authentic_images)

print(f"Total images: {len(all_images)}")
print(f"  Forged: {len(matched_forged_images)}")
print(f"  Authentic: {len(authentic_images)}")

# Train/val split
train_imgs, val_imgs, train_masks_split, val_masks_split, train_auth, val_auth = train_test_split(
    all_images, all_masks, all_is_authentic, test_size=0.2, random_state=42, stratify=all_is_authentic
)

train_dataset = ForgeryDataset(train_imgs, train_masks_split, train_auth)
val_dataset = ForgeryDataset(val_imgs, val_masks_split, val_auth)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"\nTrain: {len(train_dataset)} images ({sum(train_auth)} authentic, {len(train_auth) - sum(train_auth)} forged)")
print(f"Val: {len(val_dataset)} images ({sum(val_auth)} authentic, {len(val_auth) - sum(val_auth)} forged)")

## Visualization: Check Actual Masks

Let's first visualize some forged training images with their ground truth masks to understand the data.

In [None]:
import os
def visualize_ground_truth(image_paths, mask_paths, is_authentic, num_authentic=1, num_forged=5):
    """Visualize authentic and forged images with their ground truth masks."""
    # Select authentic and forged images
    authentic_indices = [i for i, auth in enumerate(is_authentic) if auth]
    forged_indices = [i for i, auth in enumerate(is_authentic) if not auth]
    
    selected_authentic = np.random.choice(authentic_indices, min(num_authentic, len(authentic_indices)), replace=False)
    selected_forged = np.random.choice(forged_indices, min(num_forged, len(forged_indices)), replace=False)
    
    selected_indices = list(selected_authentic) + list(selected_forged)
    
    fig, axes = plt.subplots(len(selected_indices), 2, figsize=(10, 5 * len(selected_indices)))
    if len(selected_indices) == 1:
        axes = axes.reshape(1, -1)
    
    for plot_idx, idx in enumerate(selected_indices):
        img_path = image_paths[idx]
        is_auth = is_authentic[idx]
        
        # Load image
        img = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Load mask
        if is_auth:
            mask = np.zeros((img_rgb.shape[0], img_rgb.shape[1]), dtype=np.uint8)
            label = "AUTHENTIC"
        else:
            mask_path = mask_paths[idx]
            mask = np.load(mask_path)
            if mask.ndim == 3:
                mask = mask[0, :, :]  # FIX: Take first channel, not first column
            mask = (mask > 0).astype(np.uint8)
            label = "FORGED"
        
        # Calculate statistics
        forged_pixels = mask.sum()
        total_pixels = mask.size
        forged_percent = (forged_pixels / total_pixels) * 100
        
        # Plot
        axes[plot_idx, 0].imshow(img_rgb)
        axes[plot_idx, 0].set_title(f"{label}: {os.path.basename(img_path)}", fontsize=10)
        axes[plot_idx, 0].axis('off')
        
        axes[plot_idx, 1].imshow(mask, cmap='gray')
        axes[plot_idx, 1].set_title(
            f"Ground Truth Mask\n{forged_pixels:,} forged pixels ({forged_percent:.2f}%)",
            fontsize=10
        )
        axes[plot_idx, 1].axis('off')
        
        print(f"Image {plot_idx+1} [{label}]: {forged_percent:.2f}% forged ({forged_pixels:,} pixels)")
    
    plt.tight_layout()
    plt.show()

print("Ground Truth Analysis:")
print("="*60)
visualize_ground_truth(train_imgs, train_masks_split, train_auth, num_authentic=1, num_forged=5)

## SimpleCNN - Encoder-Decoder Structure

**Encoder (Compress):**
- 256×256×3 → 128×128×32 → 64×64×64 → 64×64×128
- Extracts features: edges, textures, forgery patterns

**Decoder (Expand):**
- 64×64×128 → 128×128×64 → 256×256×32 → 256×256×1
- Reconstructs spatial info, outputs pixel-wise predictions

**Output:** Sigmoid gives 0-1 probability per pixel

In [None]:
# ==================== SIMPLE CNN MODEL ====================
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # Encoder (downsampling)
        self.enc1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)
        )
        self.enc2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)
        )
        self.enc3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(inplace=True)
        )
        
        # Decoder (upsampling)
        self.dec1 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(inplace=True)
        )
        self.dec2 = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(inplace=True)
        )
        
        # Output
        self.out = nn.Conv2d(32, 1, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Encoder
        x1 = self.enc1(x)
        x2 = self.enc2(x1)
        x3 = self.enc3(x2)
        
        # Decoder
        x = self.dec1(x3)
        x = self.dec2(x)
        
        # Output
        x = self.out(x)
        x = self.sigmoid(x)
        return x

model = SimpleCNN().to(DEVICE)
print(model)

## Training - The Learning Process

**Each iteration:**
1. **Forward**: Images → Model → Predictions
2. **Loss**: How wrong? (Binary Cross Entropy compares pixel-by-pixel)
3. **Backward**: Calculate gradients (how to improve)
4. **Update**: Adjust model weights

**Validation**: Test on unseen data to check performance

In [None]:
# ==================== TRAINING SETUP ====================
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# ==================== TRAIN ====================
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    
    for images, masks in loader:
        images, masks = images.to(DEVICE), masks.to(DEVICE)
        
        # Forward
        outputs = model(images)
        loss = criterion(outputs, masks)
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(loader)

In [None]:
def validate(model, loader, criterion):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for images, masks in loader:
            images, masks = images.to(DEVICE), masks.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, masks)
            total_loss += loss.item()
    
    return total_loss / len(loader)

In [None]:
print("Starting training...")
for epoch in range(EPOCHS):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)
    print(f"Epoch {epoch+1}/{EPOCHS} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

## Calculate Competition F1 Score on Validation Data

This function generates predictions and calculates the exact competition score (oF1) on your validation 

In [None]:
import scipy.optimize
import os
import json
import numba

class ParticipantVisibleError(Exception):
    pass


@numba.jit(nopython=True)
def _rle_encode_jit(x: np.ndarray, fg_val: int = 1) -> list:
    """Numba-jitted RLE encoder."""
    dots = np.where(x.T.flatten() == fg_val)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths


def rle_encode(masks: list, fg_val: int = 1) -> str:
    """
    Adapted from contrails RLE https://www.kaggle.com/code/inversion/contrails-rle-submission
    Args:
        masks: list of numpy array of shape (height, width), 1 - mask, 0 - background
    Returns: run length encodings as a string, with each RLE JSON-encoded and separated by a semicolon.
    """
    return ';'.join([json.dumps(_rle_encode_jit(x, fg_val)) for x in masks])


@numba.njit
def _rle_decode_jit(mask_rle: np.ndarray, height: int, width: int) -> np.ndarray:
    """
    s: numpy array of run-length encoding pairs (start, length)
    shape: (height, width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    """
    if len(mask_rle) % 2 != 0:
        # Numba requires raising a standard exception.
        raise ValueError('One or more rows has an odd number of values.')

    starts, lengths = mask_rle[0::2], mask_rle[1::2]
    starts -= 1
    ends = starts + lengths
    for i in range(len(starts) - 1):
        if ends[i] > starts[i + 1]:
            raise ValueError('Pixels must not be overlapping.')
    img = np.zeros(height * width, dtype=np.bool_)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img


def rle_decode(mask_rle: str, shape: tuple) -> np.ndarray:
    """
    mask_rle: run-length as string formatted (start length)
              empty predictions need to be encoded with '-'
    shape: (height, width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    """

    mask_rle = json.loads(mask_rle)
    mask_rle = np.asarray(mask_rle, dtype=np.int32)
    starts = mask_rle[0::2]
    if sorted(starts) != list(starts):
        raise ParticipantVisibleError('Submitted values must be in ascending order.')
    try:
        return _rle_decode_jit(mask_rle, shape[0], shape[1]).reshape(shape, order='F')
    except ValueError as e:
        raise ParticipantVisibleError(str(e)) from e


def calculate_f1_score(pred_mask: np.ndarray, gt_mask: np.ndarray):
    pred_flat = pred_mask.flatten()
    gt_flat = gt_mask.flatten()

    tp = np.sum((pred_flat == 1) & (gt_flat == 1))
    fp = np.sum((pred_flat == 1) & (gt_flat == 0))
    fn = np.sum((pred_flat == 0) & (gt_flat == 1))

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    if (precision + recall) > 0:
        return 2 * (precision * recall) / (precision + recall)
    else:
        return 0


def calculate_f1_matrix(pred_masks: list, gt_masks: list):
    """
    Parameters:
    pred_masks (np.ndarray):
            First dimension is the number of predicted instances.
            Each instance is a binary mask of shape (height, width).
    gt_masks (np.ndarray):
            First dimension is the number of ground truth instances.
            Each instance is a binary mask of shape (height, width).
    """

    num_instances_pred = len(pred_masks)
    num_instances_gt = len(gt_masks)
    f1_matrix = np.zeros((num_instances_pred, num_instances_gt))

    # Calculate F1 scores for each pair of predicted and ground truth masks
    for i in range(num_instances_pred):
        for j in range(num_instances_gt):
            pred_flat = pred_masks[i].flatten()
            gt_flat = gt_masks[j].flatten()
            f1_matrix[i, j] = calculate_f1_score(pred_mask=pred_flat, gt_mask=gt_flat)

    if f1_matrix.shape[0] < len(gt_masks):
        # Add a row of zeros to the matrix if the number of predicted instances is less than ground truth instances
        f1_matrix = np.vstack((f1_matrix, np.zeros((len(gt_masks) - len(f1_matrix), num_instances_gt))))

    return f1_matrix


def oF1_score(pred_masks: list, gt_masks: list):
    """
    Calculate the optimal F1 score for a set of predicted masks against
    ground truth masks which considers the optimal F1 score matching.
    This function uses the Hungarian algorithm to find the optimal assignment
    of predicted masks to ground truth masks based on the F1 score matrix.
    If the number of predicted masks is less than the number of ground truth masks,
    it will add a row of zeros to the F1 score matrix to ensure that the dimensions match.

    Parameters:
    pred_masks (list of np.ndarray): List of predicted binary masks.
    gt_masks (np.ndarray): Array of ground truth binary masks.
    Returns:
    float: Optimal F1 score.
    """
    f1_matrix = calculate_f1_matrix(pred_masks, gt_masks)

    # Find the best matching between predicted and ground truth masks
    row_ind, col_ind = scipy.optimize.linear_sum_assignment(-f1_matrix)
    # The linear_sum_assignment discards excess predictions so we need a separate penalty.
    excess_predictions_penalty = len(gt_masks) / max(len(pred_masks), len(gt_masks))
    return np.mean(f1_matrix[row_ind, col_ind]) * excess_predictions_penalty


def evaluate_single_image(label_rles: str, prediction_rles: str, shape_str: str) -> float:
    shape = json.loads(shape_str)
    label_rles = [rle_decode(x, shape=shape) for x in label_rles.split(';')]
    prediction_rles = [rle_decode(x, shape=shape) for x in prediction_rles.split(';')]
    return oF1_score(prediction_rles, label_rles)


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    """
    Args:
        solution (pd.DataFrame): The ground truth DataFrame.
        submission (pd.DataFrame): The submission DataFrame.
        row_id_column_name (str): The name of the column containing row IDs.
    Returns:
        float
    """
    df = solution.copy()
    df = df.rename(columns={'annotation': 'label'})

    df['prediction'] = submission['annotation']
    # Check for correct 'authentic' label
    authentic_indices = (df['label'] == 'authentic') | (df['prediction'] == 'authentic')
    df['image_score'] = ((df['label'] == df['prediction']) & authentic_indices).astype(float)

    df.loc[~authentic_indices, 'image_score'] = df.loc[~authentic_indices].apply(
        lambda row: evaluate_single_image(row['label'], row['prediction'], row['shape']), axis=1
    )
    return float(np.mean(df['image_score']))

print("Competition scoring functions loaded successfully!")

In [None]:
def calculate_competition_score_with_split(model, image_paths, mask_paths, is_authentic, device=DEVICE, img_size=IMG_SIZE):
    """
    Calculate competition F1 score on a dataset (train or val) with split by class.
    
    Args:
        model: Trained PyTorch model
        image_paths: List of image file paths
        mask_paths: List of mask file paths (None for authentic images)
        is_authentic: List of booleans indicating if image is authentic
        device: Device to run inference on
        img_size: Size to resize images for inference
    
    Returns:
        dict: Dictionary containing overall score and per-class scores
    """
    model.eval()
    
    # Prepare ground truth and predictions
    solution_data = []
    submission_data = []
    
    # Track authentic and forged separately
    authentic_correct = 0
    authentic_total = 0
    forged_solution_data = []
    forged_submission_data = []
    
    print(f"Generating predictions for {len(image_paths)} images...")
    
    with torch.no_grad():
        for idx, img_path in enumerate(image_paths):
            # Load image
            img = cv2.imread(img_path)
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_shape = img_rgb.shape[:2]
            
            # Get image ID
            image_id = os.path.basename(img_path).replace('.png', '')
            
            # Prepare ground truth
            if is_authentic[idx]:
                # Authentic image
                gt_annotation = "authentic"
                gt_shape = "authentic"
            else:
                # Forged image - load mask and encode to RLE
                gt_mask = np.load(mask_paths[idx])
                if gt_mask.ndim == 3:
                    gt_mask = gt_mask[:, :, 0]
                gt_mask = (gt_mask > 0).astype(np.uint8)
                
                # RLE encode ground truth
                gt_annotation = rle_encode([gt_mask], fg_val=1)
                gt_shape = json.dumps(list(original_shape))
            
            solution_data.append({
                'case_id': image_id,
                'annotation': gt_annotation,
                'shape': gt_shape
            })
            
            # Generate prediction
            img_resized = cv2.resize(img_rgb, (img_size, img_size)) / 255.0
            img_tensor = torch.from_numpy(img_resized).float().permute(2, 0, 1).unsqueeze(0)
            img_tensor = img_tensor.to(device)
            
            # Predict
            pred = model(img_tensor).cpu().numpy()[0, 0]
            
            # Resize back to original
            pred = cv2.resize(pred, (original_shape[1], original_shape[0]))
            pred_binary = (pred > 0.5).astype(np.uint8)
            
            # Check if authentic or forged
            if pred_binary.sum() == 0:
                pred_annotation = "authentic"
            else:
                # RLE encode prediction
                pred_annotation = rle_encode([pred_binary], fg_val=1)
            
            submission_data.append({
                'case_id': image_id,
                'annotation': pred_annotation
            })
            
            # Track by class
            if is_authentic[idx]:
                authentic_total += 1
                if pred_annotation == "authentic":
                    authentic_correct += 1
            else:
                # Store forged images separately for oF1 calculation
                forged_solution_data.append({
                    'case_id': image_id,
                    'annotation': gt_annotation,
                    'shape': gt_shape
                })
                forged_submission_data.append({
                    'case_id': image_id,
                    'annotation': pred_annotation
                })
    
    # Create dataframes
    solution_df = pd.DataFrame(solution_data)
    submission_df = pd.DataFrame(submission_data)
    
    # Calculate overall score
    overall_score = score(solution_df, submission_df, row_id_column_name='case_id')
    
    # Calculate authentic F1 (accuracy for authentic class)
    authentic_f1 = authentic_correct / authentic_total if authentic_total > 0 else 0.0
    
    # Calculate forged oF1 score
    if len(forged_solution_data) > 0:
        forged_solution_df = pd.DataFrame(forged_solution_data)
        forged_submission_df = pd.DataFrame(forged_submission_data)
        forged_f1 = score(forged_solution_df, forged_submission_df, row_id_column_name='case_id')
    else:
        forged_f1 = 0.0
    
    return {
        'overall': overall_score,
        'authentic_f1': authentic_f1,
        'forged_f1': forged_f1,
        'authentic_count': authentic_total,
        'forged_count': len(forged_solution_data)
    }

print("Competition scoring validation function with class split ready!")

In [None]:
# Calculate competition F1 score on validation data
print("\n" + "="*50)
print("CALCULATING COMPETITION F1 SCORE ON VALIDATION DATA")
print("="*50)

val_results = calculate_competition_score_with_split(
    model=model,
    image_paths=val_imgs,
    mask_paths=val_masks_split,
    is_authentic=val_auth,
    device=DEVICE,
    img_size=IMG_SIZE
)

print(f"\n{'='*50}")
print(f"VALIDATION RESULTS")
print(f"{'='*50}")
print(f"Overall F1 Score:     {val_results['overall']:.4f}")
print(f"\nPer-Class Performance:")
print(f"  Authentic F1:       {val_results['authentic_f1']:.4f}  ({val_results['authentic_count']} images)")
print(f"  Forged (oF1):       {val_results['forged_f1']:.4f}  ({val_results['forged_count']} images)")
print(f"{'='*50}\n")

## Making Predictions & Submission

**Steps:**
1. Load test image → Preprocess (same as training)
2. Model predicts → Get probability mask
3. Threshold at 0.5 → Binary mask (0 or 1)
4. Resize to original dimensions
5. **Check mask:**
   - If `sum == 0` (no forged pixels) → Label as `"authentic"`
   - If `sum > 0` (has forged pixels) → RLE encode mask

**Output:**
- Authentic images: `"authentic"` string
- Forged images: RLE compressed mask


In [None]:
# ==================== PREDICTION (AUTHENTIC vs RLE) ====================
# Using the official competition RLE encoder defined earlier

model.eval()
test_image_paths = sorted(glob(DATA_PATH+'test_images/*.png'))
test_predictions = {}

print(f"Predicting {len(test_image_paths)} test images...")
with torch.no_grad():
    for img_path in test_image_paths:
        # Load and preprocess
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        original_shape = img.shape[:2]
        
        img_resized = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) / 255.0
        img_tensor = torch.from_numpy(img_resized).float().permute(2, 0, 1).unsqueeze(0)
        img_tensor = img_tensor.to(DEVICE)
        
        # Predict
        pred = model(img_tensor).cpu().numpy()[0, 0]
        
        # Resize back to original
        pred = cv2.resize(pred, (original_shape[1], original_shape[0]))
        pred_binary = (pred > 0.5).astype(np.uint8)
        
        # Get image_id
        image_id = os.path.splitext(os.path.basename(img_path))[0]
        
        # Check if authentic or forged
        if pred_binary.sum() == 0:
            test_predictions[image_id] = "authentic"
        else:
            # Use official competition RLE encoder
            rle = rle_encode([pred_binary], fg_val=1)
            test_predictions[image_id] = rle

print(f"Processed {len(test_predictions)} test images")

# ==================== CREATE SUBMISSION ====================
submission_data = [{'case_id': k, 'annotation': v} for k, v in test_predictions.items()]
submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission.csv', index=False)

print("Submission created!")
print(f"\nSample predictions:")
print(submission_df.head(10))
print(f"\nAuthentic images: {(submission_df['annotation'] == 'authentic').sum()}")
print(f"Forged images: {(submission_df['annotation'] != 'authentic').sum()}")

In [None]:
submission_df[["case_id","annotation"]].head()

## Summary

**The score does not improve beyond 0.303 significantly. This may be due to the model’s limited ability to accurately predict forged pixels. Since the number of non-forged pixels is significantly higher than that of forged pixels, standard loss functions tend to bias toward the majority class, effectively ignoring the forged regions to minimize the overall loss. Hence, the model predicts the portion of the authetic samples from test-set accurately and hence a score of 0.303.**

1. **Problem Identified:** Baseline model has 0.0000 F1 in CV on forged images due to:
   - Severe pixel-level class imbalance (majority of the pixels are non-forged)
   - BCE loss may not be suitable for imbalanced data
   - Too simple CNN architecture 

2. **Solutions to be explored:**
   - Better loss fucntion (handles imbalance)
   - Better, Deeper architecture (Ex: U-Net)