In [2]:
# notebooks/preprocess_tiles.ipynb

# --- Fix path to scripts ---
import sys
sys.path.append("../scripts")

# --- Import preprocessing function ---
from preprocessing import preprocess_tiles
import os
from torchvision import transforms

# --- Define Paths ---
tiles_folder = "../data/tiles/"
processed_tiles_folder = "../data/processed_tiles/"

# --- Set Threshold for Filtering ---
info_threshold = 3  # Keep more tiles

# --- Define Strong Augmentation Transform ---
augmentation_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomApply([
        transforms.ColorJitter(0.3, 0.3, 0.3),
        transforms.RandomAffine(degrees=30, shear=10, scale=(0.9, 1.1)),
        transforms.GaussianBlur(kernel_size=5)
    ], p=0.7),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ToTensor(),  # Convert to tensor BEFORE RandomErasing
    transforms.RandomErasing(p=0.5)  # Now safe to apply
])

# --- Preprocess and Augment Tiles ---
preprocess_tiles(
    input_dir=tiles_folder,
    output_dir=processed_tiles_folder,
    threshold=info_threshold,
    n_augments=5,
    transform=augmentation_transform
)

print("✅ Preprocessing complete. Augmented tiles are ready for training.")


Preprocessing classes:   0%|          | 0/3 [00:00<?, ?it/s]

Preprocessing classes: 100%|██████████| 3/3 [00:08<00:00,  2.98s/it]


✅ Preprocessing complete. Kept 1476/246 tiles (including augmentations)
✅ Preprocessing complete. Augmented tiles are ready for training.



