In [None]:
from pathlib import Path
import pandas as pd

PATH_DATASET = "/kaggle/input/recodai-luc-scientific-image-forgery-detection"
TEST_IMAGES_DIR = f'{PATH_DATASET}/test_images'
test_img_dir = Path(TEST_IMAGES_DIR) # Need test_img_dir to get all image names

# Get a list of all test image filenames (without extension)
test_image_stems = sorted([img_path.stem for img_path in test_img_dir.glob('*.png')])
print(f"Found images: {len(test_image_stems)}")

In [None]:
def rle_encode(mask, fg_val=1):
    """Convert binary mask to RLE using the competition metric format"""
    dots = np.where(mask.T.flatten() == fg_val)[0]
    run_lengths = []
    prev = -2
    
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    
    return run_lengths

In [None]:
import numpy as np
from tqdm.auto import tqdm

submissions = []

for img_stem in tqdm(test_image_stems, desc="Creating submission"):
    # mask_path = masks_dir / f"{img_stem}.npy"
    # mask = np.load(mask_path)

    mask = np.zeros((50, 50), dtype=int)
    # Randomly choose to generate a mask
    if np.random.rand() > 0.5:
        mask[10:15, 20:30] = 1

    rle_annotation = rle_encode(mask)
    print(rle_annotation)
    if rle_annotation: # If any pixels are marked as forgery
        submissions.append({'case_id': img_stem, 'annotation': f'"{repr(rle_annotation)}"'})
    else: # For authentic images, the annotation should be 'authentic' without quotes
        submissions.append({'case_id': img_stem, 'annotation': 'authentic'})


submission_df = pd.DataFrame(submissions)

In [None]:
from pprint import pprint

def write_submission_csv(data, filename):
    """Writes submission data to a CSV file using writelines by generating lines upfront."""
    lines = ["case_id,annotation\n"]  # Header line
    for row in data:
        # Ensure the annotation is correctly formatted with quotes if it's an RLE string
        annotation = row['annotation']
        lines.append(f"{row['case_id']},{annotation}\n")
    pprint(lines)
    with open(filename, 'w') as f:
        f.writelines(lines)


# Assuming 'submissions' list is already created from the previous inference step
# (or you can re-run the inference part to generate it)
write_submission_csv(submissions, 'submission.csv')

In [None]:
!head submission.csv