In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image

In [None]:
import numpy as np

def rle_encode(mask, fg_val=1):
    """
    Convert a binary mask to RLE (Run-Length Encoding) using a competition-style format.

    Args:
        mask (np.ndarray): 2D binary mask where foreground pixels are fg_val.
        fg_val (int, optional): Value considered as foreground. Default is 1.

    Returns:
        list: RLE as a list of (start_position, run_length) pairs.
    """
    # Flatten the mask in column-major order (Fortran-style) and find foreground indices
    dots = np.where(mask.T.flatten() == fg_val)[0]

    run_lengths = []
    prev = -2

    for b in dots:
        if b > prev + 1:
            # Start a new run
            run_lengths.extend((b + 1, 0))  # RLE positions are 1-indexed
        run_lengths[-1] += 1  # Increase the length of the current run
        prev = b

    return run_lengths


In [None]:
import matplotlib.pyplot as plt

def visualize_mask(mask, title="Mask"):
    """
    Visualize a binary mask with grid and pixel values.

    Args:
        mask (np.ndarray): 2D array representing the mask.
        title (str): Title for the plot.
    """
    plt.figure(figsize=(6, 6))
    plt.imshow(mask, cmap='gray', vmin=0, vmax=1)
    plt.title(title)
    plt.axis('off')

    # Add grid for clarity
    for i in range(mask.shape[0] + 1):
        plt.axhline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
        plt.axvline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)

    # Show pixel values
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            plt.text(
                j, i, str(mask[i, j]),
                ha='center', va='center',
                color='blue' if mask[i, j] == 0 else 'white',
                fontweight='bold'
            )

    plt.show()


In [None]:
our_example = np.array([
    [1, 0],
    [1, 1]
])

print(f'Our example: {our_example}')
print(f"\nRLE encoding: {rle_encode(our_example)}")
visualize_mask(our_example, "Our mask")

In [None]:
# Create PLUS mask (9x9)
plus_mask = np.zeros((9, 9), dtype=np.uint8)
# Vertical line
plus_mask[2:7, 4] = 1
# Horizontal line  
plus_mask[4, 2:7] = 1

print(plus_mask)
print(f"\nRLE encoding: {rle_encode(plus_mask)}")
visualize_mask(plus_mask, "Plus - a mask for segmentation")

In [None]:
# Create MINUS mask (9x9)
minus_mask = np.zeros((9, 9), dtype=np.uint8)
# Horizontal line
minus_mask[4, 2:7] = 1

print(minus_mask)
print(f"\nRLE encoding: {rle_encode(minus_mask)}")
visualize_mask(minus_mask, "Minus - segmentation mask")

In [None]:
# Detailed RLE explanation for plus
print("Plus mask (9x9):")
for i in range(9):
    row = ''
    for j in range(9):
        row += f"{plus_mask[i, j]} "
    print(row)

print(f"\n1. Flatten to string:")
flat_plus = plus_mask.flatten()
print(' '.join(map(str, flat_plus)))

print(f"\n2. Split into sequences:")
# Add zeros at borders for correct boundary detection
padded = np.concatenate([[0], flat_plus, [0]])
changes = np.where(padded[1:] != padded[:-1])[0] + 1
runs = changes.copy()
runs[1::2] -= runs[::2]

print(f"Change positions: {changes}")
print(f"Sequence lengths: {runs}")

print(f"\n3. Final RLE: '{rle_encode(plus_mask)}'")

In [None]:
import numpy as np

# Example: Plus-shaped mask (9x9)
plus_mask = np.array([
    [0,0,1,0,0,1,0,0,0],
    [0,0,1,0,0,1,0,0,0],
    [1,1,1,1,1,1,1,1,1],
    [0,0,1,0,0,1,0,0,0],
    [0,0,1,0,0,1,0,0,0],
    [0,0,1,0,0,1,0,0,0],
    [0,0,1,0,0,1,0,0,0],
    [0,0,1,0,0,1,0,0,0],
    [0,0,1,0,0,1,0,0,0]
])

print("Step 0: Plus mask (9x9):")
for row in plus_mask:
    print(' '.join(map(str, row)))

# Step 1: Flatten mask
flat_plus = plus_mask.flatten()
print("\nStep 1: Flattened mask (row-major order):")
print(' '.join(map(str, flat_plus)))

# Step 2: Detect sequences for RLE
# Add zeros at boundaries to detect changes
padded = np.concatenate([[0], flat_plus, [0]])
changes = np.where(padded[1:] != padded[:-1])[0] + 1  # positions where value changes
runs = changes.copy()
runs[1::2] -= runs[::2]  # compute run lengths

print("\nStep 2: Sequence detection")
print(f"Change positions: {changes}")
print(f"Run lengths: {runs}")

# Step 3: Use your rle_encode function
def rle_encode(mask, fg_val=1):
    """
    Convert binary mask to RLE (1-indexed).
    """
    dots = np.where(mask.T.flatten() == fg_val)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

print("\nStep 3: Final RLE (competition format):")
print(rle_encode(plus_mask))


In [None]:
np.random.seed(61)

def mask_distribution():
    all_norm_positions = []
    heatmap_size = (100, 100)
    heatmap = np.zeros(heatmap_size, dtype=np.float32)
    
    train_masks_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/train_masks'
    
    if not os.path.exists(train_masks_dir):
        return (0.5, 0.5), None
    
    for mask_file in os.listdir(train_masks_dir):
        if mask_file.endswith('.npy'):
            mask_path = os.path.join(train_masks_dir, mask_file)
            try:
                mask = np.load(mask_path)
                
                if mask.ndim == 3:
                    if mask.shape[0] == 1:
                        mask = mask[0]
                    elif mask.shape[2] == 1:
                        mask = mask[:, :, 0]
                    else:
                        mask = (mask == 1).astype(np.uint8)
                        if mask.ndim == 3:
                            mask = mask[:, :, 0] if mask.shape[2] == 1 else mask[:, :, 0]
                
                if mask.ndim != 2:
                    continue
                
                y_coords, x_coords = np.where(mask > 0)
                
                if len(y_coords) > 0:
                    height, width = mask.shape
                    
                    for y, x in zip(y_coords, x_coords):
                        norm_y = y / height
                        norm_x = x / width
                        
                        heatmap_y = int(norm_y * heatmap_size[0])
                        heatmap_x = int(norm_x * heatmap_size[1])
                        
                        heatmap_y = min(heatmap_y, heatmap_size[0] - 1)
                        heatmap_x = min(heatmap_x, heatmap_size[1] - 1)
                        
                        heatmap[heatmap_y, heatmap_x] += 1
                        all_norm_positions.append((norm_x, norm_y))
                        
            except Exception as e:
                continue
    
    if all_norm_positions:
        max_heatmap_pos = np.unravel_index(np.argmax(heatmap), heatmap.shape)
        max_norm_y = max_heatmap_pos[0] / heatmap_size[0]
        max_norm_x = max_heatmap_pos[1] / heatmap_size[1]
        return (max_norm_x, max_norm_y), heatmap
    
    return (0.5, 0.5), heatmap

hottest_norm_pos, heatmap = mask_distribution()

if heatmap is not None:
    plt.figure(figsize=(10, 8))
    plt.imshow(heatmap, cmap='hot', interpolation='nearest')
    plt.colorbar()
    plt.title('Forgery Location Heatmap')
    plt.xlabel('Normalized X')
    plt.ylabel('Normalized Y')
    plt.show()

test_images_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/test_images'
sample_submission = pd.read_csv('/kaggle/input/recodai-luc-scientific-image-forgery-detection/sample_submission.csv')

submission_data = []
for case_id in sample_submission['case_id']:
    img_path = os.path.join(test_images_dir, f"{case_id}.png")
    
    with Image.open(img_path) as img:
        width, height = img.size
    
    if np.random.random() < 0.01:
        mask = np.zeros((height, width), dtype=np.uint8)
        
        center_x = int(hottest_norm_pos[0] * width)
        center_y = int(hottest_norm_pos[1] * height)
        
        center_x = max(4, min(center_x, width - 5))
        center_y = max(4, min(center_y, height - 5))
        
        max_mask_size = min(width, height) // 20
        h = min(8, max_mask_size)
        w = min(8, max_mask_size)
        
        y0 = center_y - h//2
        x0 = center_x - w//2
        
        mask[y0:y0+h, x0:x0+w] = 1
        
        RLE_res = rle_encode(mask)
        res = [int(x) for x in RLE_res]
        annotation = json.dumps(res)
    else:
        annotation = 'authentic'
    
    submission_data.append({
        'case_id': case_id,
        'annotation': annotation
    })

submission = pd.DataFrame(submission_data)
submission.to_csv('submission.csv', index=False)