# Notebook overview
Detects foreground objects in resized high-resolution images using DINOv2 patch features and PCA, removes or recolors background, and saves object-focused images.

- Loads a pretrained DINOv2 model and preprocessing pipeline
- Extracts patch features, computes the first PCA component, and selects foreground patches by sign and connected-component filtering
- Performs a second PCA for visualization and creates a mask that is resized to image resolution
- Produces images with background removed or recolored and saves results for the dataset

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

from PIL import Image, ImageFilter
import random
import timm
import torch
import torchvision.transforms as T
import torchvision.transforms.functional as TF

import matplotlib.patches as patches
from scipy.ndimage import label, binary_fill_holes

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [None]:
# Folder to load Images
SOURCE_DIR_PATH = '/home/jleick/masterArbeitProjekt/final_release/data/images/download/high'
source_dir_path = Path(SOURCE_DIR_PATH)
if not source_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {SOURCE_DIR_PATH}")

# Folder to save Images
RESULT_DIR_PATH = '/home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/high'
result_dir_path = Path(RESULT_DIR_PATH)
if not result_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {RESULT_DIR_PATH}")

# Path to load Dataset - Image_paths
DATASET_DIR_PATH = '/home/jleick/masterArbeitProjekt/final_release/data/datasets_created'
dataset_dir_path = Path(DATASET_DIR_PATH)
if not dataset_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {DATASET_DIR_PATH}")

# Folder and Dataset Path

In [1]:
# Source Folder and Result Folder
FOLDER_PATH_SOURCE = '/home/jleick/masterArbeitProjekt/data/ami_images_adapted/ami_traps_resized'
FOLDER_PATH_RESULT = '/home/jleick/masterArbeitProjekt/data/ami_images_adapted/ami_traps_resized_and_object_detected_pca'

DATASET_PATH = '/home/jleick/masterArbeitProjekt/data/ami_dataset_created/traps_fine_grain_klassification/traps_fine-grained_embeddings_species_10_adapted_for_datasetClass.csv'

# Functions

### Function - load_model

In [None]:
def load_model():    
    ### Load the model
    dinov2_vits14 = timm.create_model("vit_small_patch14_dinov2.lvd142m", pretrained=True)
    dinov2_vits14.eval()

    # Patch size (518 / 14 * 518 / 14 = 1369 Patches)
    print("Patch size:", dinov2_vits14.patch_embed.patch_size)
    print("Default input size:", dinov2_vits14.default_cfg['input_size'])
    return dinov2_vits14

model = load_model()

Patch size: (14, 14)
Default input size: (3, 518, 518)


In [None]:
### Set device (NICHT GETESTET)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Training on device {device}.")

Training on device cpu.


### Function - load_preprocessing_pipeline

In [4]:
def load_preprocessing_pipeline(model):
    ### Load preprocessing pipeline
    data_cfg = timm.data.resolve_data_config(model.pretrained_cfg)
    transform = timm.data.create_transform(**data_cfg)
    print(f"transform pypline: {transform}")
    ### transform settings
    # model.pretrained_cfg
    # data_cfg # TODO is it usefull to do normalisation and if yes on default values
    return transform

transform = load_preprocessing_pipeline(model)

transform pypline: Compose(
    Resize(size=518, interpolation=bicubic, max_size=None, antialias=True)
    CenterCrop(size=(518, 518))
    MaybeToTensor()
    Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
)


### Function - load_patches

In [5]:
def load_patches(image, transform, model):
    image_transformed = transform(image).unsqueeze(0).to(device)
    output = model.forward_features(image_transformed)
    # image_cls = output[0,0,:]
    image_patches = output[0,1:,:]
    # image_patches.size()
    return image_patches

### Function - crop_on_original

In [None]:
# transform tensor back to normal image

def crop_on_original(image):
    image_transformed = transform(image).unsqueeze(0).to(device)

    # Prepear transformed image to [3, 518, 518]
    image_tensor = image_transformed.squeeze(0).cpu()

    # Reversal of normalization
    mean = torch.tensor([0.4850, 0.4560, 0.4060])
    std = torch.tensor([0.2290, 0.2240, 0.2250])
    image_unorm = image_tensor * std[:, None, None] + mean[:, None, None] # None creates new dimension 1
    # tensor([[[0.2290]],
    #        [[0.2240]],
    #        [[0.2250]]])

    # Clipping to [0, 1] (some values may be outside this range due to normalization)
    image_unorm = torch.clamp(image_unorm, 0, 1)

    # Tensor to PIL image
    to_pil = T.ToPILImage()
    image_transformed_back = to_pil(image_unorm)

    # plt.figure(figsize=(10, 8))
    # plt.imshow(image_transformed_back)
    # plt.axis("off")
    # plt.show()
    return image_transformed_back

### Function - choose_pca_sign

In [None]:
def choose_pca_sign(pca_map: np.ndarray, center_ratio: float = 0.3) -> str:
    """
    Decides whether to use positive or negative PCA values,
    based on the average in the central region of the image.

    Args:
        pca_map (np.ndarray): 2D array of the first PCA component in patch grid format.
        center_ratio (float): Proportion (0–1) of the central area (e.g., 0.4 for 40%).

    Returns:
        str: ‘positive’ or ‘negative’, depending on which region dominates in the center.
    """
    h, w = pca_map.shape
    ch, cw = int(h * center_ratio), int(w * center_ratio)
    start_h, start_w = (h - ch) // 2, (w - cw) // 2

    center = pca_map[start_h:start_h + ch, start_w:start_w + cw]

    # Calculate positive and negative mean values in the center
    positive_values = center[center > 0]
    negative_values = center[center < 0]

    mean_positive = positive_values.mean() if positive_values.size > 0 else 0
    mean_negative = abs(negative_values.mean()) if negative_values.size > 0 else 0

    sign_prediction = 'positive' if mean_positive >= mean_negative else 'negative'

    patch_size = 14
    rect = patches.Rectangle((start_w*patch_size - 0.5, start_h*patch_size - 0.5), cw*patch_size, ch*patch_size,
                            linewidth=2, edgecolor='red', facecolor='none')

    return sign_prediction, rect

### Function - filter_largest_connected_component_and_fill

In [None]:
def filter_largest_connected_component_and_fill(binary_mask: np.ndarray) -> np.ndarray:
    """
    1. Keep only the largest contiguous region.
    2. Fill holes (e.g., pixels that are completely surrounded by foreground patches).
    """
    # Step 1: Largest connected component
    labeled_mask, num_features = label(binary_mask)
    if num_features == 0:
        return binary_mask
    
    sizes = np.bincount(labeled_mask.ravel())
    sizes[0] = 0
    largest_label = sizes.argmax()
    largest_region = (labeled_mask == largest_label)

    # Step 2: Fill holes within the region
    filled_mask = binary_fill_holes(largest_region)

    return filled_mask.astype(np.uint8)

### Function - object_detection

In [9]:
# Objekt Detection
def object_detection(image_patches, center_ratio):
    # 1. First PCA on all patches
    pca1 = PCA(n_components=1)
    image_patches_pca1 = pca1.fit_transform(image_patches.detach().cpu())

    # 2. Threshold on the first component: Only keep patches with positive or negative values!!!
    image_patches_pca1_reshaped = image_patches_pca1.reshape(37,37)
    sign_prediction, rec = choose_pca_sign(image_patches_pca1_reshaped, center_ratio)
    if sign_prediction == 'positive':
    #     mask = image_patches_pca1[:, 0] > 0
    # else:
    #     mask = image_patches_pca1[:, 0] < 0

        binary_mask = (image_patches_pca1_reshaped > 0).astype(np.uint8)
    else:
        binary_mask = (image_patches_pca1_reshaped < 0).astype(np.uint8)

    # Filter only the largest contiguous region
    filtered_mask = filter_largest_connected_component_and_fill(binary_mask)  # shape: (37, 37)

    # Convert mask back to vector form (for patch indexing)
    mask = filtered_mask.flatten().astype(bool)  # shape: (1369,)


    foreground_patches = image_patches[mask]
    print(f"Behalte {foreground_patches.shape[0]} von {image_patches.shape[0]} Patches (Foreground)")

    # 3. Second PCA on the filtered patches
    pca2 = PCA(n_components=3)
    image_patches_pca2 = pca2.fit_transform(foreground_patches.detach().cpu())

    # 4. Normalize for visualization (RGB)
    image_patches_pca2 -= image_patches_pca2.min(axis=0)
    image_patches_pca2 /= image_patches_pca2.max(axis=0)

    # 5. Reconstruct the image from the PCA2 results.
    # We need to restore the original order and structure,
    # i.e., initialize an empty image and insert only the filtered patches.

    rgb_image = np.ones((image_patches.shape[0], 3))
    rgb_image[mask] = image_patches_pca2

    # Return to 2D image format
    rgb_image = rgb_image.reshape(37, 37, 3)


    # Create alpha channel
    alpha_channel = np.zeros(image_patches.shape[0])
    alpha_channel[mask] = 0.8                     
    alpha_channel = alpha_channel.reshape(37, 37)

    # Create RGBA image
    rgba_image = np.concatenate([rgb_image, alpha_channel[..., None]], axis=-1)

    # plt.figure(figsize=(10, 8))
    # plt.imshow(rgba_image)
    # plt.axis("off")
    # plt.title("Zweite PCA nach Foreground-Selektion via 1. Komponente (DINOv2)")
    # plt.show()
    return rgba_image, rec

### Function - remove_background_of_img

In [10]:

def remove_background_of_img(img_path):
    img = Image.open(img_path).convert("RGB")
    img_preprocessed = crop_on_original(img)
    img_patches = load_patches(img_preprocessed, transform, model)
    rgba_image, rec = object_detection(img_patches, 0.8)

    # --- Prepare image ---
    img_preprocessed = img_preprocessed.convert("RGB")
    image_tensor = TF.to_tensor(img_preprocessed)  # (3, H, W)

    # --- Extract mask (alpha channel: background = 0) ---
    mask_np = rgba_image[..., -1]  # Shape (37, 37), Werte 0 oder 1
    mask_tensor = torch.tensor(mask_np).unsqueeze(0).float()  # (1, 37, 37)

    # --- Adjust mask to image size ---
    _, H, W = image_tensor.shape
    mask_resized = torch.nn.functional.interpolate(mask_tensor.unsqueeze(0), size=(H, W), mode='bilinear', align_corners=False)
    mask_resized = mask_resized.squeeze(0)  # (1, H, W)

    # --- Optional: Calculate the average color of the object ---
    object_mask = mask_resized > 0.5
    if object_mask.sum() > 0:
        object_color = image_tensor[:, object_mask.squeeze(0)].mean(dim=1).view(3, 1, 1)
    else:
        object_color = torch.tensor([1.0, 1.0, 1.0]).view(3, 1, 1)

    # --- Alternatively: Set background color ---
    # object_color = torch.tensor([1.0, 1.0, 1.0]).view(3, 1, 1)  # weiß
    # object_color = torch.tensor([0.8, 0.8, 0.8]).view(3, 1, 1)  # hellgrau

    # --- Extend mask to 3 channels ---
    mask_3c = mask_resized.repeat(3, 1, 1)  # (3, H, W)

    # --- Combination: Keep object, color the rest ---
    result = image_tensor * mask_3c + object_color * (1 - mask_3c)

    # --- Back to PIL.Image ---
    result_img = TF.to_pil_image(result)

    return result_img

# Apply

### Load Image (img) Paths

In [None]:
data = pd.read_csv(DATASET_PATH)
file_names = data['image_path'].tolist()

### Apply functions in loop

In [None]:
folder_path_source = Path(FOLDER_PATH_SOURCE)
folder_path_result = Path(FOLDER_PATH_RESULT)

for file_name in file_names:
    file_name = Path(str(file_name).replace("ts", "png")) # to rename trap file file_name
    file_path_img = folder_path_source / Path(file_name)

    img_without_background = remove_background_of_img(file_path_img)
    
    file_path_result = folder_path_result / Path(file_name)

    file_folder_path_result = file_path_result.parent
    if not file_folder_path_result.exists():
        file_folder_path_result.mkdir(parents=True, exist_ok=True)
        print(f'Created new Folder to save results: {file_folder_path_result}')

    if file_path_result.exists():
        print(f"The file was not saved. The file path already exists:'{file_path_result}'")
    else:
        img_without_background.save(file_path_result)

Behalte 392 von 1369 Patches (Foreground)
Created new Folder to save results: /home/jleick/masterArbeitProjekt/data/ami_images_adapted/ami_traps_resized_and_object_detected_pca
Behalte 507 von 1369 Patches (Foreground)
Behalte 596 von 1369 Patches (Foreground)
Behalte 400 von 1369 Patches (Foreground)
Behalte 606 von 1369 Patches (Foreground)
Behalte 388 von 1369 Patches (Foreground)
Behalte 1032 von 1369 Patches (Foreground)
Behalte 396 von 1369 Patches (Foreground)
Behalte 470 von 1369 Patches (Foreground)
Behalte 316 von 1369 Patches (Foreground)
Behalte 591 von 1369 Patches (Foreground)
Behalte 628 von 1369 Patches (Foreground)
Behalte 420 von 1369 Patches (Foreground)
Behalte 384 von 1369 Patches (Foreground)
Behalte 559 von 1369 Patches (Foreground)
Behalte 376 von 1369 Patches (Foreground)
Behalte 1072 von 1369 Patches (Foreground)
Behalte 416 von 1369 Patches (Foreground)
Behalte 535 von 1369 Patches (Foreground)
Behalte 134 von 1369 Patches (Foreground)
Behalte 544 von 1369 Pa