<div style="
    background: linear-gradient(135deg, #1a1f2c 0%, #2d3748 50%, #4a5568 100%);
    border: 2px solid #63b3ed;
    border-radius: 15px;
    padding: 25px;
    margin: 20px 0;
    box-shadow: 0 0 30px rgba(99, 179, 237, 0.4),
                inset 0 0 20px rgba(255, 255, 255, 0.1);
    color: #f1f5f9;
    font-family: 'Segoe UI', system-ui, sans-serif;
    position: relative;
    overflow: hidden;
">

<div style="
    position: absolute;
    top: -20px;
    right: -20px;
    width: 100px;
    height: 100px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.25) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<div style="
    position: absolute;
    bottom: -40px;
    left: -40px;
    width: 120px;
    height: 120px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.2) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<h1 style="
    color: #63b3ed;
    margin: 0 0 20px 0;
    text-align: center;
    font-weight: 700;
    font-size: 1.8em;
    text-shadow: 0 0 15px rgba(99, 179, 237, 0.6);
    position: relative;
    z-index: 1;
">
    üìä Baseline Strategy: Understanding RLE & Simple Submission
</h1>

<div style="
    background: rgba(99, 179, 237, 0.1);
    border-left: 4px solid #63b3ed;
    border-radius: 8px;
    padding: 20px;
    margin: 20px 0;
    position: relative;
    z-index: 1;
">
    <h3 style="
        color: #63b3ed;
        margin-top: 0;
        font-size: 1.3em;
        display: flex;
        align-items: center;
        gap: 10px;
    ">
        üéØ What we'll do in this notebook:
    </h3>
    <ul style="
        color: #f1f5f9;
        font-size: 1.1em;
        line-height: 1.6;
        margin-bottom: 0;
    ">
        <li>üß™ Understand RLE metric with practical examples</li>
        <li>üöÄ Create a simple "authentic-only" submission</li>
        <li>üìà Learn why this strategy works in some competitions</li>
        <li>üé≤ Test our baseline on the leaderboard(score 0.30 or 30% f1-score)</li>
    </ul>
</div>

<div style="
    background: rgba(255, 255, 255, 0.05);
    border-radius: 10px;
    padding: 20px;
    position: relative;
    z-index: 1;
">
    <h3 style="
        color: #63b3ed;
        margin-top: 0;
        font-size: 1.3em;
        display: flex;
        align-items: center;
        gap: 10px;
    ">
        üí° Why "authentic-only" submission?
    </h3>
    <p style="color: #f1f5f9; font-size: 1.1em; line-height: 1.6;">
        <strong>Experienced competitors often start with this approach!</strong> In many datasets, 
        the majority of images don't contain any objects/forgeries. By submitting "authentic" for all images, 
        we get a baseline score that helps us understand the data distribution.
    </p>
    
<div style="
        background: rgba(99, 179, 237, 0.15);
        border-radius: 8px;
        padding: 15px;
        margin: 15px 0;
    ">
        <h4 style="color: #63b3ed; margin-top: 0;">When this strategy works well:</h4>
        <ul style="color: #f1f5f9; line-height: 1.5;">
            <li>üìä <strong>Imbalanced datasets</strong> - when most images are truly "authentic"</li>
            <li>‚ö° <strong>Quick baseline</strong> - to test submission pipeline</li>
            <li>üìà <strong>Metric understanding</strong> - see how the scoring system works</li>
            <li>üîç <strong>Data exploration</strong> - understand the competition dynamics</li>
        </ul>
</div>
    
<div style="
        background: rgba(247, 127, 127, 0.15);
        border-radius: 8px;
        padding: 15px;
        margin: 15px 0;
    ">
        <h4 style="color: #f77f7f; margin-top: 0;">‚ö†Ô∏è Important note:</h4>
        <p style="color: #f1f5f9; margin: 0;">
            This is just a <strong>starting point</strong>! While it gives us a quick baseline, 
            to actually compete we'll need to build proper segmentation models. But first, 
            let's make sure our submission pipeline works correctly!
        </p>
</div>
</div>
</div>

In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image

In [None]:
def rle_encode(mask, fg_val=1):
    """
    Convert binary mask to RLE using the competition metric format
    """
    dots = np.where(mask.T.flatten() == fg_val)[0]
    run_lengths = []
    prev = -2
    
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    
    return run_lengths

In [None]:
def visualize_mask(mask, title):
    """Visualize mask"""
    plt.figure(figsize=(6, 6))
    plt.imshow(mask, cmap='gray', vmin=0, vmax=1)
    plt.title(title)
    plt.axis('off')
    
    # Add grid for clarity
    for i in range(mask.shape[0] + 1):
        plt.axhline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
        plt.axvline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
    
    # Show pixel values
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            plt.text(j, i, str(mask[i, j]), ha='center', va='center', 
                    color='blue' if mask[i, j] == 0 else 'white', fontweight='bold')
    
    plt.show()

In [None]:
our_example = np.array([
    [1, 0],
    [1, 1]
])

print(f'Our example: {our_example}')
print(f"\nRLE encoding: {rle_encode(our_example)}")
visualize_mask(our_example, "Our mask")

In [None]:
# Create PLUS mask (9x9)
plus_mask = np.zeros((9, 9), dtype=np.uint8)
# Vertical line
plus_mask[2:7, 4] = 1
# Horizontal line  
plus_mask[4, 2:7] = 1

print(plus_mask)
print(f"\nRLE encoding: {rle_encode(plus_mask)}")
visualize_mask(plus_mask, "Plus - a mask for segmentation")

In [None]:
# Create MINUS mask (9x9)
minus_mask = np.zeros((9, 9), dtype=np.uint8)
# Horizontal line
minus_mask[4, 2:7] = 1

print(minus_mask)
print(f"\nRLE encoding: {rle_encode(minus_mask)}")
visualize_mask(minus_mask, "Minus - segmentation mask")

In [None]:
# Detailed RLE explanation for plus
print("Plus mask (9x9):")
for i in range(9):
    row = ''
    for j in range(9):
        row += f"{plus_mask[i, j]} "
    print(row)

print(f"\n1. Flatten to string:")
flat_plus = plus_mask.flatten()
print(' '.join(map(str, flat_plus)))

print(f"\n2. Split into sequences:")
# Add zeros at borders for correct boundary detection
padded = np.concatenate([[0], flat_plus, [0]])
changes = np.where(padded[1:] != padded[:-1])[0] + 1
runs = changes.copy()
runs[1::2] -= runs[::2]

print(f"Change positions: {changes}")
print(f"Sequence lengths: {runs}")

print(f"\n3. Final RLE: '{rle_encode(plus_mask)}'")

<div style="
    background: linear-gradient(135deg, #1a1f2c 0%, #2d3748 50%, #4a5568 100%);
    border: 2px solid #63b3ed;
    border-radius: 15px;
    padding: 25px;
    margin: 20px 0;
    box-shadow: 0 0 30px rgba(99, 179, 237, 0.4),
                inset 0 0 20px rgba(255, 255, 255, 0.1);
    color: #f1f5f9;
    font-family: 'Segoe UI', system-ui, sans-serif;
    position: relative;
    overflow: hidden;
">

<div style="
    position: absolute;
    top: -20px;
    right: -20px;
    width: 100px;
    height: 100px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.25) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<div style="
    position: absolute;
    bottom: -40px;
    left: -40px;
    width: 120px;
    height: 120px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.2) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<h1 style="
    color: #63b3ed;
    margin: 0 0 20px 0;
    text-align: center;
    font-weight: 700;
    font-size: 1.8em;
    text-shadow: 0 0 15px rgba(99, 179, 237, 0.6);
    position: relative;
    z-index: 1;
">
    Create sumission distributed by the most frequent position in the mask
</h1>

In [None]:
np.random.seed(73)

def mask_distribution():
    all_norm_positions = []
    heatmap_size = (100, 100)
    heatmap = np.zeros(heatmap_size, dtype=np.float32)
    
    train_masks_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/train_masks'
    
    if not os.path.exists(train_masks_dir):
        return (0.5, 0.5), None
    
    for mask_file in os.listdir(train_masks_dir):
        if mask_file.endswith('.npy'):
            mask_path = os.path.join(train_masks_dir, mask_file)
            try:
                mask = np.load(mask_path)
                
                if mask.ndim == 3:
                    if mask.shape[0] == 1:
                        mask = mask[0]
                    elif mask.shape[2] == 1:
                        mask = mask[:, :, 0]
                    else:
                        mask = (mask == 1).astype(np.uint8)
                        if mask.ndim == 3:
                            mask = mask[:, :, 0] if mask.shape[2] == 1 else mask[:, :, 0]
                
                if mask.ndim != 2:
                    continue
                
                y_coords, x_coords = np.where(mask > 0)
                
                if len(y_coords) > 0:
                    height, width = mask.shape
                    
                    for y, x in zip(y_coords, x_coords):
                        norm_y = y / height
                        norm_x = x / width
                        
                        heatmap_y = int(norm_y * heatmap_size[0])
                        heatmap_x = int(norm_x * heatmap_size[1])
                        
                        heatmap_y = min(heatmap_y, heatmap_size[0] - 1)
                        heatmap_x = min(heatmap_x, heatmap_size[1] - 1)
                        
                        heatmap[heatmap_y, heatmap_x] += 1
                        all_norm_positions.append((norm_x, norm_y))
                        
            except Exception as e:
                continue
    
    if all_norm_positions:
        max_heatmap_pos = np.unravel_index(np.argmax(heatmap), heatmap.shape)
        max_norm_y = max_heatmap_pos[0] / heatmap_size[0]
        max_norm_x = max_heatmap_pos[1] / heatmap_size[1]
        return (max_norm_x, max_norm_y), heatmap
    
    return (0.5, 0.5), heatmap

hottest_norm_pos, heatmap = mask_distribution()

if heatmap is not None:
    plt.figure(figsize=(10, 8))
    plt.imshow(heatmap, cmap='hot', interpolation='nearest')
    plt.colorbar()
    plt.title('Forgery Location Heatmap')
    plt.xlabel('Normalized X')
    plt.ylabel('Normalized Y')
    plt.show()

test_images_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/test_images'
sample_submission = pd.read_csv('/kaggle/input/recodai-luc-scientific-image-forgery-detection/sample_submission.csv')

In [None]:
submission_data = []
for case_id in sample_submission['case_id']:
    img_path = os.path.join(test_images_dir, f"{case_id}.png")
    
    with Image.open(img_path) as img:
        width, height = img.size
    
    if np.random.random() < 0.01:
        mask = np.zeros((height, width), dtype=np.uint8)
        
        offset_x = np.random.uniform(-0.3, 0.3) * width
        offset_y = np.random.uniform(-0.3, 0.3) * height
        
        center_x = int(hottest_norm_pos[0] * width + offset_x)
        center_y = int(hottest_norm_pos[1] * height + offset_y)
        
        h = 4
        w = 4
        
        y0 = max(0, center_y - h//2)
        x0 = max(0, center_x - w//2)
        y1 = min(height, y0 + h)
        x1 = min(width, x0 + w)
        
        actual_h = y1 - y0
        actual_w = x1 - x0
        
        if actual_h > 0 and actual_w > 0:
            mask[y0:y1, x0:x1] = 1
        
        RLE_res = rle_encode(mask)
        res = [int(x) for x in RLE_res]
        annotation = json.dumps(res)
    else:
        annotation = 'authentic'
    
    submission_data.append({
        'case_id': case_id,
        'annotation': annotation
    })

submission = pd.DataFrame(submission_data)
submission.to_csv('submission.csv', index=False)

<div style="
    background: linear-gradient(135deg, #1a1f2c 0%, #2d3748 50%, #4a5568 100%);
    border: 2px solid #63b3ed;
    border-radius: 15px;
    padding: 25px;
    margin: 20px 0;
    box-shadow: 0 0 30px rgba(99, 179, 237, 0.4),
                inset 0 0 20px rgba(255, 255, 255, 0.1);
    color: #f1f5f9;
    font-family: 'Segoe UI', system-ui, sans-serif;
    position: relative;
    overflow: hidden;
">

<div style="
    position: absolute;
    top: -20px;
    right: -20px;
    width: 100px;
    height: 100px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.25) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<div style="
    position: absolute;
    bottom: -40px;
    left: -40px;
    width: 120px;
    height: 120px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.2) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<h1 style="
    color: #63b3ed;
    margin: 0 0 20px 0;
    text-align: center;
    font-weight: 700;
    font-size: 1.8em;
    text-shadow: 0 0 15px rgba(99, 179, 237, 0.6);
    position: relative;
    z-index: 1;
">
    If i have mistake write pls in comments
</h1>