In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image, ImageSequence
from pathlib import Path
import zipfile
from io import BytesIO
from tqdm import tqdm
from skimage.filters import threshold_otsu
from skimage.morphology import remove_small_objects
from skimage.measure import label

base_dir = Path("/kaggle/input/vesuvius-challenge-surface-detection/")
test_img_dir = base_dir / "test_images"
test_csv_path = base_dir / "test.csv"
submission_zip_path = Path("/kaggle/working/submission.zip")

try:
    test_meta = pd.read_csv(test_csv_path)
    print(f" Found {len(test_meta)} test volumes in CSV.")
except FileNotFoundError:
    print(f" Error: Test CSV not found at {test_csv_path}")


def load_volume(path: Path) -> np.ndarray:
    """
    Load a multi-page TIFF into a 3D NumPy array: (slices, H, W)
    """
    try:
        with Image.open(path) as img:
            frames = [np.array(frame) for frame in ImageSequence.Iterator(img)]
        volume = np.stack(frames)
        return volume
    except Exception as e:
        raise RuntimeError(f"Error loading TIFF {path}: {e}")


def smarter_predict(volume: np.ndarray) -> np.ndarray:
    """
    A non-ML baseline using Otsu's threshold
    followed by morphological cleanup.
    """
    try:
        thresh = threshold_otsu(volume)
        mask = (volume > thresh)
    except ValueError:

        print("     Otsu thresholding failed, falling back to mean.")
        mean_val = volume.mean()
        mask = (volume > mean_val)

    labeled_mask = label(mask)
    cleaned_mask = remove_small_objects(labeled_mask, min_size=5000)
    
    final_mask = (cleaned_mask > 0).astype(np.uint8)
    
    return final_mask


def save_volume_to_zip(volume: np.ndarray, zip_file, filename: str):
    """
    Save 3D volume as a multi-page TIFF into an already-open ZIP.
    """
    try:
        slices = [Image.fromarray(v) for v in volume] # Already uint8
        buffer = BytesIO()
        slices[0].save(
            buffer,
            format="TIFF",
            save_all=True,
            append_images=slices[1:]
        )
        zip_file.writestr(filename, buffer.getvalue())
    except Exception as e:
        raise RuntimeError(f"Error saving {filename} to ZIP: {e}")


ZIP_COMPRESSION = zipfile.ZIP_DEFLATED
ZIP_COMPRESS_LEVEL = 9

print(f"\n Starting processing... Writing to {submission_zip_path}")
print(f"   Compression: {ZIP_COMPRESSION}, Level: {ZIP_COMPRESS_LEVEL}")

with zipfile.ZipFile(submission_zip_path, "w", 
                     compression=ZIP_COMPRESSION, 
                     compresslevel=ZIP_COMPRESS_LEVEL) as zf:
    
    for _, row in tqdm(test_meta.iterrows(), total=len(test_meta), desc="Processing volumes"):
        image_id = row["id"]
        filename = f"{image_id}.tif"
        img_path = test_img_dir / filename

        if not img_path.exists():
            print(f" File missing, skipping: {filename}")
            continue

        try:
            # 1. Load
            volume = load_volume(img_path)
            
            if volume.ndim != 3:
                print(f" Invalid volume shape {volume.shape}, skipping {filename}")
                continue

            # 2. Predict 
            mask = smarter_predict(volume)
            
            # 3. Save
            save_volume_to_zip(mask, zf, filename) 

        except Exception as e:
            print(f" Error processing {filename}: {e}")
            continue

print(f"\n : {submission_zip_path}")

 Found 1 test volumes in CSV.

 Starting processing... Writing to /kaggle/working/submission.zip
   Compression: 8, Level: 9


Processing volumes: 100%|██████████| 1/1 [00:11<00:00, 11.91s/it]


 : /kaggle/working/submission.zip



