In [None]:
import tifffile
from PIL import Image
import numpy as np
import pandas as pd
import os
from pathlib import Path
from tqdm import tqdm

# Configuration
TIFF_FILE = "../data/images/bGaslniO4a_a.tif"
ANNOTATION_CSV = "../data/train.csv"
OUTPUT_TILES_DIR = "tiles"
TILE_SIZE = 224
TILE_EXTENSION = ".png"
OUTPUT_CSV = "tiles_labels.csv"
VARIABILITY_THRESHOLD = 10
RECTANGLE_OUTPUT_FILE = "rectangles_overlay.png"

In [None]:
# Create output directory if it does not exist
if not os.path.exists(OUTPUT_TILES_DIR):
    os.makedirs(OUTPUT_TILES_DIR)

# Load WSI and annotations
with tifffile.TiffFile(TIFF_FILE) as tif:
    slide = tif.asarray()
annotations = pd.read_csv(ANNOTATION_CSV)

In [None]:
# Only keep rows that match the filename
annotations = annotations[annotations.filename == Path(TIFF_FILE).name]
annotations

In [None]:
def is_tile_positive(tile_x, tile_y, tile_size, annotations):
    for _, row in annotations.iterrows():
        x1, y1, x2, y2 = row["x1"], row["y1"], row["x2"], row["y2"]
        # Check if annotation box intersects with the tile
        if not (
            tile_x > x2
            or tile_x + tile_size < x1
            or tile_y > y2
            or tile_y + tile_size < y1
        ):
            return True
    return False


def calculate_tile_variability(tile):
    # Convert to grayscale if it's not already
    if tile.ndim == 3:
        tile = np.mean(tile, axis=2)
    # Calculate standard deviation of pixel values
    return np.std(tile)

In [None]:
# Get slide dimensions
slide_height, slide_width = slide.shape[:2]

tile_labels = []


# Generate tiles with progress bar
for x in tqdm(range(0, slide_width, TILE_SIZE), desc="Processing Tiles", unit="col"):
    for y in tqdm(
        range(0, slide_height, TILE_SIZE),
        desc="Processing Rows",
        unit="row",
        leave=False,
    ):
        # Define tile bounding box
        box = (x, y, min(x + TILE_SIZE, slide_width), min(y + TILE_SIZE, slide_height))

        # Extract tile from slide
        tile = slide[box[1] : box[3], box[0] : box[2]]

        # Calculate variability
        variability = calculate_tile_variability(tile)

        if variability < VARIABILITY_THRESHOLD:
            # Skip tiles with low variability
            continue

        # Convert tile to PIL Image and save as PNG
        tile_image = Image.fromarray(tile)
        tile_filename = f"tile_{x}_{y}{TILE_EXTENSION}"
        tile_path = os.path.join(OUTPUT_TILES_DIR, tile_filename)
        tile_image.save(tile_path)

        # Check if the tile is positive
        is_positive = is_tile_positive(x, y, TILE_SIZE, annotations)
        tile_labels.append(
            {
                "filename": tile_filename,
                "label": "positive" if is_positive else "negative",
                "x": x,
                "y": y,
            }
        )

# Create a DataFrame for the labels
labels_df = pd.DataFrame(tile_labels)
labels_df.to_csv(OUTPUT_CSV, index=False)

print(f"Tiles and labels have been saved to {OUTPUT_TILES_DIR} and {OUTPUT_CSV}")

In [None]:
tile_labels = pd.read_csv(OUTPUT_CSV)
tile_labels

In [None]:
H, W, _ = slide.shape
H, W = H // 224, W // 224
print(H, W)

In [None]:
image_mask = np.zeros((H, W))
image_mask.shape

In [None]:
for label, x, y in tile_labels[["label", "x", "y"]].values:
    if label == "positive":
        image_mask[x // 224, y // 224] = 255

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.imshow(image_mask, cmap="Greys")
plt.figure()