<div style="
    background: linear-gradient(135deg, #1a1f2c 0%, #2d3748 50%, #4a5568 100%);
    border: 2px solid #63b3ed;
    border-radius: 15px;
    padding: 25px;
    margin: 20px 0;
    box-shadow: 0 0 30px rgba(99, 179, 237, 0.4),
                inset 0 0 20px rgba(255, 255, 255, 0.1);
    color: #f1f5f9;
    font-family: 'Segoe UI', system-ui, sans-serif;
    position: relative;
    overflow: hidden;
">

<div style="
    position: absolute;
    top: -20px;
    right: -20px;
    width: 100px;
    height: 100px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.25) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<div style="
    position: absolute;
    bottom: -40px;
    left: -40px;
    width: 120px;
    height: 120px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.2) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<h1 style="
    color: #63b3ed;
    margin: 0 0 20px 0;
    text-align: center;
    font-weight: 700;
    font-size: 1.8em;
    text-shadow: 0 0 15px rgba(99, 179, 237, 0.6);
    position: relative;
    z-index: 1;
">
    üìä Baseline Strategy: Understanding RLE & Simple Submission
</h1>

<div style="
    background: rgba(99, 179, 237, 0.1);
    border-left: 4px solid #63b3ed;
    border-radius: 8px;
    padding: 20px;
    margin: 20px 0;
    position: relative;
    z-index: 1;
">
    <h3 style="
        color: #63b3ed;
        margin-top: 0;
        font-size: 1.3em;
        display: flex;
        align-items: center;
        gap: 10px;
    ">
        üéØ What we'll do in this notebook:
    </h3>
    <ul style="
        color: #f1f5f9;
        font-size: 1.1em;
        line-height: 1.6;
        margin-bottom: 0;
    ">
        <li>üß™ Understand RLE metric with practical examples</li>
        <li>üöÄ Create a simple "authentic-only" submission</li>
        <li>üìà Learn why this strategy works in some competitions</li>
        <li>üé≤ Test our baseline on the leaderboard(score 0.30 or 30% f1-score)</li>
    </ul>
</div>

<div style="
    background: rgba(255, 255, 255, 0.05);
    border-radius: 10px;
    padding: 20px;
    position: relative;
    z-index: 1;
">
    <h3 style="
        color: #63b3ed;
        margin-top: 0;
        font-size: 1.3em;
        display: flex;
        align-items: center;
        gap: 10px;
    ">
        üí° Why "authentic-only" submission?
    </h3>
    <p style="color: #f1f5f9; font-size: 1.1em; line-height: 1.6;">
        <strong>Experienced competitors often start with this approach!</strong> In many datasets, 
        the majority of images don't contain any objects/forgeries. By submitting "authentic" for all images, 
        we get a baseline score that helps us understand the data distribution.
    </p>
    
<div style="
        background: rgba(99, 179, 237, 0.15);
        border-radius: 8px;
        padding: 15px;
        margin: 15px 0;
    ">
        <h4 style="color: #63b3ed; margin-top: 0;">When this strategy works well:</h4>
        <ul style="color: #f1f5f9; line-height: 1.5;">
            <li>üìä <strong>Imbalanced datasets</strong> - when most images are truly "authentic"</li>
            <li>‚ö° <strong>Quick baseline</strong> - to test submission pipeline</li>
            <li>üìà <strong>Metric understanding</strong> - see how the scoring system works</li>
            <li>üîç <strong>Data exploration</strong> - understand the competition dynamics</li>
        </ul>
</div>
    
<div style="
        background: rgba(247, 127, 127, 0.15);
        border-radius: 8px;
        padding: 15px;
        margin: 15px 0;
    ">
        <h4 style="color: #f77f7f; margin-top: 0;">‚ö†Ô∏è Important note:</h4>
        <p style="color: #f1f5f9; margin: 0;">
            This is just a <strong>starting point</strong>! While it gives us a quick baseline, 
            to actually compete we'll need to build proper segmentation models. But first, 
            let's make sure our submission pipeline works correctly!
        </p>
</div>
</div>
</div>

In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image

In [None]:
def rle_encode(mask):
    """
    Convert binary mask to RLE string.
    Returns: string like "3 5 2 1" meaning [3 zeros, 5 ones, 2 zeros, 1 one]
    """
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
def visualize_mask(mask, title):
    """Visualize mask with pixel values and grid"""
    plt.figure(figsize=(6, 6))
    plt.imshow(mask, cmap='gray', vmin=0, vmax=1)
    plt.title(title)
    plt.axis('off')
    
    # Add grid
    for i in range(mask.shape[0] + 1):
        plt.axhline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
        plt.axvline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
    
    # Show pixel values
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            plt.text(j, i, str(mask[i, j]), ha='center', va='center', 
                    color='blue' if mask[i, j] == 0 else 'white', fontweight='bold')
    
    plt.show()

In [None]:
simple_mask = np.array([
    [1, 0],
    [1, 1]
])
print(f"Mask:\n{simple_mask}")
print(f"Flattened: {simple_mask.flatten()}")
print(f"RLE: '{rle_encode(simple_mask)}'")
visualize_mask(simple_mask, "Simple 2x2 Mask")

In [None]:
plus_mask = np.zeros((9, 9), dtype=np.uint8)
plus_mask[2:7, 4] = 1  # Vertical line
plus_mask[4, 2:7] = 1  # Horizontal line

print("Mask visualization:")
for i in range(9):
    print(' '.join(map(str, plus_mask[i])))

print(f"Flattened (first 20): {' '.join(map(str, plus_mask.flatten()[:20]))}...")
print(f"RLE: '{rle_encode(plus_mask)}'")
visualize_mask(plus_mask, "Plus Shape Mask")

In [None]:
minus_mask = np.zeros((9, 9), dtype=np.uint8)
minus_mask[4, 2:7] = 1  # Horizontal line

print("Mask visualization:")
for i in range(9):
    print(' '.join(map(str, minus_mask[i])))

print(f"RLE: '{rle_encode(minus_mask)}'")
visualize_mask(minus_mask, "Minus Shape Mask")

In [None]:
test_mask = np.array([[1, 1, 0, 0, 1, 0]])
print(f"Test mask: {test_mask.flatten()}")

# Step by step explanation
pixels = test_mask.flatten()
print(f"1. Flatten: {pixels}")

padded = np.concatenate([[0], pixels, [0]])
print(f"2. Add borders: {padded}")

changes = np.where(padded[1:] != padded[:-1])[0] + 1
print(f"3. Find changes: {changes}")

runs = changes.copy()
runs[1::2] -= runs[::2]
print(f"4. Calculate lengths: {runs}")

result = ' '.join(str(x) for x in runs)
print(f"5. Final RLE: '{result}'")

<div style="
    background: linear-gradient(135deg, #1a1f2c 0%, #2d3748 50%, #4a5568 100%);
    border: 2px solid #63b3ed;
    border-radius: 15px;
    padding: 25px;
    margin: 20px 0;
    box-shadow: 0 0 30px rgba(99, 179, 237, 0.4),
                inset 0 0 20px rgba(255, 255, 255, 0.1);
    color: #f1f5f9;
    font-family: 'Segoe UI', system-ui, sans-serif;
    position: relative;
    overflow: hidden;
">

<div style="
    position: absolute;
    top: -20px;
    right: -20px;
    width: 100px;
    height: 100px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.25) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<div style="
    position: absolute;
    bottom: -40px;
    left: -40px;
    width: 120px;
    height: 120px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.2) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<h1 style="
    color: #63b3ed;
    margin: 0 0 20px 0;
    text-align: center;
    font-weight: 700;
    font-size: 1.8em;
    text-shadow: 0 0 15px rgba(99, 179, 237, 0.6);
    position: relative;
    z-index: 1;
">
    Create sumission distributed by the most frequent position in the mask
</h1>

In [None]:
import os
import cv2
import json
import numpy as np
import pandas as pd

from PIL import Image
from scipy.stats import gaussian_kde

np.random.seed(83)

sample_submission_path = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/sample_submission.csv'
test_images_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/test_images'
train_masks_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/train_masks'

sample_submission = pd.read_csv(sample_submission_path)

def analyze_mask_distribution():
    if not os.path.exists(train_masks_dir):
        return None, None, None
    
    all_positions = []
    all_sizes = []
    all_aspect_ratios = []
    
    for mask_file in os.listdir(train_masks_dir):
        if mask_file.endswith('.npy'):
            mask_path = os.path.join(train_masks_dir, mask_file)
            try:
                mask = np.load(mask_path)
                
                if mask.ndim == 3:
                    if mask.shape[0] == 1:
                        mask = mask[0]
                    elif mask.shape[2] == 1:
                        mask = mask[:, :, 0]
                    else:
                        mask = (mask > 0).astype(np.uint8)
                        if mask.ndim == 3:
                            mask = mask[:, :, 0] if mask.shape[2] == 1 else mask[:, :, 0]
                
                if mask.ndim != 2 or np.sum(mask) == 0:
                    continue
                
                contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                
                for contour in contours:
                    if len(contour) >= 5:
                        ellipse = cv2.fitEllipse(contour)
                        center, axes, angle = ellipse
                        
                        height, width = mask.shape
                        norm_center_x = center[0] / width
                        norm_center_y = center[1] / height
                        
                        norm_major_axis = max(axes) / max(width, height)
                        norm_minor_axis = min(axes) / max(width, height)
                        
                        aspect_ratio = min(axes) / max(axes) if max(axes) > 0 else 1.0
                        
                        all_positions.append((norm_center_x, norm_center_y))
                        all_sizes.append((norm_major_axis, norm_minor_axis))
                        all_aspect_ratios.append(aspect_ratio)
                        
            except Exception:
                continue
    
    if not all_positions:
        return None, None, None
    
    positions_array = np.array(all_positions)
    if len(positions_array) > 1:
        kde = gaussian_kde(positions_array.T)
    else:
        kde = None
    
    sizes_array = np.array(all_sizes)
    aspect_ratios_array = np.array(all_aspect_ratios)
    
    size_mean = np.mean(sizes_array, axis=0) if len(sizes_array) > 0 else (0.02, 0.01)
    size_std = np.std(sizes_array, axis=0) if len(sizes_array) > 0 else (0.01, 0.005)
    aspect_mean = np.mean(aspect_ratios_array) if len(aspect_ratios_array) > 0 else 0.7
    aspect_std = np.std(aspect_ratios_array) if len(aspect_ratios_array) > 0 else 0.2
    
    return kde, (size_mean, size_std), (aspect_mean, aspect_std)

def generate_ellipse_mask(height, width, kde, size_stats, aspect_stats):
    size_mean, size_std = size_stats
    aspect_mean, aspect_std = aspect_stats
    
    if kde and len(kde.dataset.T) > 1:
        random_idx = np.random.randint(0, len(kde.dataset.T))
        center_x, center_y = kde.dataset.T[random_idx]
    else:
        center_x = np.random.normal(0.5, 0.2)
        center_y = np.random.normal(0.5, 0.2)
        center_x = np.clip(center_x, 0.1, 0.9)
        center_y = np.clip(center_y, 0.1, 0.9)
    
    major_axis = np.random.normal(size_mean[0], size_std[0])
    minor_axis = np.random.normal(size_mean[1], size_std[1])
    
    major_axis = np.clip(major_axis, 0.005, 0.1)
    minor_axis = np.clip(minor_axis, 0.003, 0.08)
    
    aspect_ratio = np.random.normal(aspect_mean, aspect_std)
    aspect_ratio = np.clip(aspect_ratio, 0.3, 0.95)
    minor_axis = major_axis * aspect_ratio
    
    mask = np.zeros((height, width), dtype=np.uint8)
    
    abs_center_x = int(center_x * width)
    abs_center_y = int(center_y * height)
    abs_major = int(major_axis * max(height, width))
    abs_minor = int(minor_axis * max(height, width))
    
    abs_major = max(abs_major, 2)
    abs_minor = max(abs_minor, 2)
    
    cv2.ellipse(mask, 
                (abs_center_x, abs_center_y),
                (abs_minor, abs_major),
                angle=np.random.uniform(0, 180),
                startAngle=0,
                endAngle=360,
                color=1,
                thickness=-1)
    
    if np.random.random() < 0.3:
        kernel_size = np.random.choice([1, 3])
        if kernel_size > 1:
            mask = cv2.GaussianBlur(mask.astype(np.float32), (kernel_size, kernel_size), 0)
            mask = (mask > 0.3).astype(np.uint8)
    
    return mask

def generate_irregular_mask(height, width, kde, size_stats):
    mask = generate_ellipse_mask(height, width, kde, size_stats, (0.7, 0.2))
    
    if np.random.random() < 0.5:
        kernel = np.ones((2, 2), np.uint8)
        if np.random.random() < 0.5:
            mask = cv2.erode(mask, kernel, iterations=1)
        else:
            mask = cv2.dilate(mask, kernel, iterations=1)
    
    return mask

def generate_multiple_masks(height, width, kde, size_stats, aspect_stats):
    num_masks = np.random.choice([1, 2, 3], p=[0.7, 0.2, 0.1])
    final_mask = np.zeros((height, width), dtype=np.uint8)
    
    for _ in range(num_masks):
        mask = generate_ellipse_mask(height, width, kde, size_stats, aspect_stats)
        final_mask = np.logical_or(final_mask, mask)
    
    return final_mask.astype(np.uint8)

def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

kde, size_stats, aspect_stats = analyze_mask_distribution()

submission_data = []

for case_id in sample_submission['case_id']:
    img_path = os.path.join(test_images_dir, f"{case_id}.png")
    
    if not os.path.exists(img_path):
        annotation = 'authentic'
    else:
        with Image.open(img_path) as img:
            width, height = img.size
        
        if np.random.random() < 0.01:
            mask_type = np.random.choice(['ellipse', 'irregular', 'multiple'], p=[0.6, 0.3, 0.1])
            
            if mask_type == 'ellipse':
                mask = generate_ellipse_mask(height, width, kde, size_stats, aspect_stats)
            elif mask_type == 'irregular':
                mask = generate_irregular_mask(height, width, kde, size_stats)
            else:
                mask = generate_multiple_masks(height, width, kde, size_stats, aspect_stats)
            
            if np.sum(mask) > 0:
                RLE_res = rle_encode(mask)
                res = [int(x) for x in RLE_res.split()]
                annotation = json.dumps(res)
            else:
                annotation = 'authentic'
        else:
            annotation = 'authentic'
    
    submission_data.append({
        'case_id': case_id,
        'annotation': annotation
    })

submission = pd.DataFrame(submission_data)
submission.to_csv('submission.csv', index=False)
submission.head()

<div style="
    background: linear-gradient(135deg, #1a1f2c 0%, #2d3748 50%, #4a5568 100%);
    border: 2px solid #63b3ed;
    border-radius: 15px;
    padding: 25px;
    margin: 20px 0;
    box-shadow: 0 0 30px rgba(99, 179, 237, 0.4),
                inset 0 0 20px rgba(255, 255, 255, 0.1);
    color: #f1f5f9;
    font-family: 'Segoe UI', system-ui, sans-serif;
    position: relative;
    overflow: hidden;
">

<div style="
    position: absolute;
    top: -20px;
    right: -20px;
    width: 100px;
    height: 100px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.25) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<div style="
    position: absolute;
    bottom: -40px;
    left: -40px;
    width: 120px;
    height: 120px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.2) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<h1 style="
    color: #63b3ed;
    margin: 0 0 20px 0;
    text-align: center;
    font-weight: 700;
    font-size: 1.8em;
    text-shadow: 0 0 15px rgba(99, 179, 237, 0.6);
    position: relative;
    z-index: 1;
">
    If i have mistake write pls in comments
</h1>