### Bildvorverarbeitung mit ROI und einem sliding window

In [None]:
import cv2
import numpy as np
from pathlib import Path
from dataclasses import dataclass
import random
import shutil
import sys

@dataclass
class QRROIConfig:
    patch_size: int = 256
    roi_overlap: float = 0.75
    global_overlap: float = 0.75
    min_area: int = 256
    top_k: int = 50
    enable_global_search: bool = True
    global_scale_divisor: int = 4
    min_adaptive_size: int = 128
    debug_view: bool = True 

# --- Hilfsfunktionen (Unver√§ndert) ---

def get_square_patch(img, cx, cy, size, target_size=256):
    half = size // 2
    x0, y0 = cx - half, cy - half
    x1, y1 = x0 + size, y0 + size
    h, w = img.shape[:2]
    pad_top = max(0, -y0); pad_bottom = max(0, y1 - h)
    pad_left = max(0, -x0); pad_right = max(0, x1 - w)
    
    if any([pad_top, pad_bottom, pad_left, pad_right]):
        img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=[0,0,0])
        x0 += pad_left; x1 += pad_left; y0 += pad_top; y1 += pad_top
        patch = img_padded[y0:y1, x0:x1]
    else:
        patch = img[y0:y1, x0:x1]
        
    if patch.shape[0] != target_size or patch.shape[1] != target_size:
        patch = cv2.resize(patch, (target_size, target_size), interpolation=cv2.INTER_LANCZOS4)
    return patch

def generate_roi_patches(img, cand, cfg: QRROIConfig):
    patches, coords = [], []
    S = cfg.patch_size
    if cand["w"] <= S and cand["h"] <= S:
        crop_size = max(cand["w"], cand["h"])
        patches.append(get_square_patch(img, cand["cx"], cand["cy"], crop_size, S))
        coords.append((cand["cx"] - crop_size//2, cand["cy"] - crop_size//2, crop_size))
    else:
        stride = max(1, int(S * (1 - cfg.roi_overlap)))
        for y_s in range(cand["y"], cand["y"] + cand["h"] - S + stride, stride):
            for x_s in range(cand["x"], cand["x"] + cand["w"] - S + stride, stride):
                ax, ay = min(x_s, cand["x"] + cand["w"] - S), min(y_s, cand["y"] + cand["h"] - S)
                patches.append(get_square_patch(img, ax + S//2, ay + S//2, S, S))
                coords.append((ax, ay, S))
    return patches, coords

def generate_global_patches(img, cfg: QRROIConfig):
    h, w = img.shape[:2]
    patches, coords = [], []
    win_size = max(cfg.min_adaptive_size, min(w, h) // cfg.global_scale_divisor)
    stride = max(1, int(win_size * (1 - cfg.global_overlap)))
    for y in range(0, h - win_size + stride, stride):
        for x in range(0, w - win_size + stride, stride):
            cx, cy = min(x, w - win_size), min(y, h - win_size)
            patches.append(get_square_patch(img, cx + win_size//2, cy + win_size//2, win_size, cfg.patch_size))
            coords.append((cx, cy, win_size))
    return patches, coords

def detect_candidates(img, cfg: QRROIConfig):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 25))
    closed = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    candidates = []
    for cnt in contours:
        if cv2.contourArea(cnt) < cfg.min_area: continue
        x, y, w, h = cv2.boundingRect(cnt)
        candidates.append({"cx": x + w // 2, "cy": y + h // 2, "x": x, "y": y, "w": w, "h": h})
    return sorted(candidates, key=lambda x: (x["w"]*x["h"]), reverse=True)[:cfg.top_k]

# --- Hauptfunktion ---

def main():
    # --- 1. Pfad-Konfiguration ---
    input_dir = Path("~/DatenUbuntu/Studium/1. Semester/KI-Projekt/modeltest/pictures").expanduser()         # Quelle (Alle Bilder)
    test_picture_dir = Path("test_picture") # Ziel f√ºr Original-Kopien
    output_root = Path("test_patches")    # Ziel f√ºr Patches & Metadata
    
    cfg = QRROIConfig()
    
    # Ordner erstellen
    test_picture_dir.mkdir(parents=True, exist_ok=True)
    out_patches_root = output_root / "patches"
    out_patches_root.mkdir(parents=True, exist_ok=True)

    # --- 2. Bilder finden & Auswahl ---
    if not input_dir.exists():
        print(f"‚ùå Ordner '{input_dir}' existiert nicht.")
        return

    img_files = list(input_dir.glob("*.[jJ][pP][gG]")) + list(input_dir.glob("*.[pP][nN][gG]"))
    
    if not img_files:
        print(f"‚ùå Keine Bilder in '{input_dir}' gefunden.")
        return

    try:
        user_input = input(f"Wie viele Bilder aus '{len(img_files)}' zuf√§llig w√§hlen? (Zahl oder 'all'): ")
        if user_input.lower() == 'all':
            num_to_process = len(img_files)
        else:
            num_to_process = int(user_input)
            num_to_process = max(1, min(num_to_process, len(img_files)))
    except ValueError:
        print("‚ö†Ô∏è Ung√ºltige Eingabe. Verarbeite 1 Bild.")
        num_to_process = 1

    selected_files = random.sample(img_files, num_to_process)
    patch_metadata = []

    print(f"üöÄ Starte Verarbeitung von {num_to_process} Bildern...")

    # --- 3. Verarbeitungsschleife ---
    for img_file in selected_files:
        # A) Originalbild kopieren
        shutil.copy2(img_file, test_picture_dir / img_file.name)
        
        # B) Bild laden
        img = cv2.imread(str(img_file))
        if img is None: continue
        
        # Unterordner f√ºr Patches erstellen
        img_patch_dir = out_patches_root / img_file.stem
        img_patch_dir.mkdir(parents=True, exist_ok=True)
        
        h_orig, w_orig = img.shape[:2]
        vis_img = img.copy()
        
        # C) ROI Patches (Kandidaten)
        candidates = detect_candidates(img, cfg)
        for i, cand in enumerate(candidates):
            roi_ps, roi_coords = generate_roi_patches(img, cand, cfg)
            cv2.rectangle(vis_img, (cand["x"], cand["y"]), (cand["x"]+cand["w"], cand["y"]+cand["h"]), (0, 255, 0), 4)
            
            for p_idx, (p, (px, py, ps)) in enumerate(zip(roi_ps, roi_coords)):
                name = f"ROI_{i}_p{p_idx}.jpg"
                cv2.imwrite(str(img_patch_dir / name), p)
                
                # Metadata schreiben
                patch_metadata.append(f"{img_file.name};{img_file.stem}/{name};{px};{py};{ps};{w_orig};{h_orig}")
                
                cv2.rectangle(vis_img, (px, py), (px+ps, py+ps), (0, 255, 255), 2)

        # D) Globale Patches (Sliding Window √ºber alles)
        if cfg.enable_global_search:
            global_ps, global_coords = generate_global_patches(img, cfg)
            for g_idx, (gp, (gx, gy, gs)) in enumerate(zip(global_ps, global_coords)):
                name = f"GLOBAL_p{g_idx}.jpg"
                cv2.imwrite(str(img_patch_dir / name), gp)
                
                patch_metadata.append(f"{img_file.name};{img_file.stem}/{name};{gx};{gy};{gs};{w_orig};{h_orig}")
                
                cv2.rectangle(vis_img, (gx, gy), (gx+gs, gy+gs), (0, 0, 255), 1)

        print(f"-> {img_file.name}: Verarbeitet und Patches gespeichert.")

        # E) Vorschau (Optional)
        if cfg.debug_view:
            h, w = vis_img.shape[:2]
            scale = 800 / max(h, w)
            res_small = cv2.resize(vis_img, (int(w * scale), int(h * scale)))
            cv2.imshow("Preprocessing Vorschau (Auto-Run)", res_small)
            # 100ms warten, dann weiter. Mit 'q' abbrechen.
            if cv2.waitKey(100) & 0xFF == ord('q'): 
                break

    # Metadata im Hauptordner speichern
    with open(output_root / "metadata.txt", "w") as f:
        f.write("\n".join(patch_metadata))
    
    cv2.destroyAllWindows()
    print(f"‚úÖ Fertig! Originale in '{test_picture_dir}', Ergebnisse in '{output_root}'.")

if __name__ == "__main__":
    main()

### Bildvorverarbeitung mit nur sliding windows und 256px mindestpatchgr√∂√üte

In [None]:
import cv2
import numpy as np
from pathlib import Path
from dataclasses import dataclass, field
import random
import shutil
import sys

@dataclass
class MultiScaleConfig:
    patch_size: int = 256
    
    # --- Multi-Scale Konfiguration ---
    # Hier definieren wir 5 Stufen (von riesig bis klein).
    # Die Zahl ist der Teiler der Bildgr√∂√üe (min(w,h) / teiler).
    # 1.1 = Fast das ganze Bild
    # 6.0 = Ein Sechstel des Bildes
    scale_divisors: list = field(default_factory=lambda: [1.1, 1.5, 2.0, 3.0, 4.0])
    
    # Wieviel √úberlappung sollen die Fenster haben?
    overlap: float = 0.65 
    
    # WICHTIG: Das kleinste erlaubte Fenster.
    # Fenster, die kleiner sind als dieser Wert, werden ignoriert, 
    # um das "Upscaling"-Problem (Artefakte) zu verhindern.
    min_window_size: int = 256
    
    debug_view: bool = True

# --- Hilfsfunktionen ---

def get_square_patch(img, cx, cy, size, target_size=256):
    """Schneidet ein Quadrat aus und skaliert es auf target_size."""
    half = int(size // 2)
    x0, y0 = int(cx - half), int(cy - half)
    x1, y1 = int(x0 + size), int(y0 + size)
    h, w = img.shape[:2]
    
    # Padding berechnen
    pad_top = max(0, -y0); pad_bottom = max(0, y1 - h)
    pad_left = max(0, -x0); pad_right = max(0, x1 - w)
    
    if any([pad_top, pad_bottom, pad_left, pad_right]):
        img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=[0,0,0])
        x0 += pad_left; x1 += pad_left; y0 += pad_top; y1 += pad_top
        patch = img_padded[y0:y1, x0:x1]
    else:
        patch = img[y0:y1, x0:x1]
        
    # Resize auf 256x256
    if patch.shape[0] != target_size or patch.shape[1] != target_size:
        patch = cv2.resize(patch, (target_size, target_size), interpolation=cv2.INTER_LANCZOS4)
    return patch

def generate_multiscale_patches(img, cfg: MultiScaleConfig):
    """
    Erzeugt Patches basierend auf einer Liste von Skalierungs-Faktoren.
    Gibt Patches, Koordinaten und die Skalierungs-ID (f√ºrs F√§rben) zur√ºck.
    """
    h, w = img.shape[:2]
    base_size = min(h, w)
    
    all_patches = []
    all_coords = []
    all_scale_ids = [] # Um sp√§ter zu wissen, welche Gr√∂√üe das war (f√ºr Farben)

    print(f"   Bildgr√∂√üe: {w}x{h}")

    for scale_idx, divisor in enumerate(cfg.scale_divisors):
        # 1. Fenstergr√∂√üe berechnen
        win_size = int(base_size / divisor)
        
        # 2. Schutz gegen zu kleine Fenster (Upscaling verhindern)
        if win_size < cfg.min_window_size:
            print(f"   ‚ö†Ô∏è Scale {divisor} (Size {win_size}px) √ºbersprungen (zu klein, min ist {cfg.min_window_size}px).")
            continue
            
        stride = max(1, int(win_size * (1 - cfg.overlap)))
        
        count_for_scale = 0
        for y in range(0, h - win_size + stride, stride):
            for x in range(0, w - win_size + stride, stride):
                # Koordinaten berechnen (Top-Left Clamping)
                cx_top = min(x, w - win_size)
                cy_top = min(y, h - win_size)
                
                # Mittelpunkt
                center_x = cx_top + win_size // 2
                center_y = cy_top + win_size // 2
                
                patch = get_square_patch(img, center_x, center_y, win_size, cfg.patch_size)
                
                all_patches.append(patch)
                all_coords.append((cx_top, cy_top, win_size))
                all_scale_ids.append(scale_idx) # Speichern, welche Stufe das war
                count_for_scale += 1
        
        print(f"   ‚úÖ Scale {divisor} (Size {win_size}px): {count_for_scale} Patches.")

    return all_patches, all_coords, all_scale_ids

# --- Hauptfunktion ---

def main():
    # --- 1. Pfad-Konfiguration ---
    input_dir = Path("~/DatenUbuntu/Studium/1. Semester/KI-Projekt/modeltest/pictures").expanduser()
    test_picture_dir = Path("test_picture") 
    output_root = Path("test_patches")    
    
    cfg = MultiScaleConfig()
    
    # Farben f√ºr die Visualisierung (BGR)
    # Blau (riesig) -> Cyan -> Gelb -> Orange -> Rot (klein)
    colors = [
        (255, 0, 0),    # Scale 0 (Gro√ü) - Blau
        (255, 255, 0),  # Scale 1 - Cyan
        (0, 255, 255),  # Scale 2 - Gelb
        (0, 165, 255),  # Scale 3 - Orange
        (0, 0, 255)     # Scale 4 (Klein) - Rot
    ]

    # Ordner cleanen/erstellen
    if test_picture_dir.exists(): shutil.rmtree(test_picture_dir)
    if output_root.exists(): shutil.rmtree(output_root)
    
    test_picture_dir.mkdir(parents=True, exist_ok=True)
    out_patches_root = output_root / "patches"
    out_patches_root.mkdir(parents=True, exist_ok=True)

    # --- 2. Bilder finden ---
    if not input_dir.exists():
        print(f"‚ùå Ordner '{input_dir}' existiert nicht.")
        return

    img_files = list(input_dir.glob("*.[jJ][pP][gG]")) + list(input_dir.glob("*.[pP][nN][gG]"))
    
    if not img_files:
        print(f"‚ùå Keine Bilder gefunden.")
        return

    # Auswahl Input
    try:
        user_input = input(f"Wie viele Bilder aus '{len(img_files)}' zuf√§llig w√§hlen? (Zahl oder 'all'): ")
        if user_input.lower() == 'all':
            num_to_process = len(img_files)
        else:
            num_to_process = max(1, min(int(user_input), len(img_files)))
    except ValueError:
        num_to_process = 1

    selected_files = random.sample(img_files, num_to_process)
    patch_metadata = []

    print(f"üöÄ Starte Multi-Scale Sliding Window auf {num_to_process} Bildern...")

    # --- 3. Loop ---
    for img_file in selected_files:
        print(f"\nVerarbeite: {img_file.name}")
        
        # A) Original kopieren & Laden
        shutil.copy2(img_file, test_picture_dir / img_file.name)
        img = cv2.imread(str(img_file))
        if img is None: continue
        
        h_orig, w_orig = img.shape[:2]
        vis_img = img.copy()
        
        # Ordner erstellen
        img_patch_dir = out_patches_root / img_file.stem
        img_patch_dir.mkdir(parents=True, exist_ok=True)

        # B) Patches generieren (Alle Skalierungen)
        patches, coords, scale_ids = generate_multiscale_patches(img, cfg)
        
        # C) Speichern & Zeichnen
        for i, (p, (x, y, s), s_idx) in enumerate(zip(patches, coords, scale_ids)):
            # Dateiname enth√§lt Scale Info
            name = f"S{s_idx}_p{i}.jpg"
            cv2.imwrite(str(img_patch_dir / name), p)
            
            # Metadata
            patch_metadata.append(f"{img_file.name};{img_file.stem}/{name};{x};{y};{s};{w_orig};{h_orig}")
            
            # Visualisierung: W√§hle Farbe basierend auf Scale ID (Modulo falls mehr Scales als Farben)
            color = colors[s_idx % len(colors)]
            
            # Liniendicke: Gro√üe Fenster dicker, kleine d√ºnner
            thickness = max(1, int(s / 150))
            cv2.rectangle(vis_img, (x, y), (x+s, y+s), color, thickness)

        # D) Vorschau
        if cfg.debug_view:
            h, w = vis_img.shape[:2]
            scale_factor = 800 / max(h, w)
            res_small = cv2.resize(vis_img, (int(w * scale_factor), int(h * scale_factor)))
            
            # Text ins Bild malen (Legende)
            cv2.putText(res_small, "Blau=Riesig -> Rot=Klein", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            
            cv2.imshow("Multi-Scale Sliding Window", res_small)
            if cv2.waitKey(100) & 0xFF == ord('q'): 
                break

    # Metadata speichern
    with open(output_root / "metadata.txt", "w") as f:
        f.write("\n".join(patch_metadata))
    
    #if cfg.debug_view:
        #print("Dr√ºcke eine Taste im Vorschaufenster zum Beenden...")
        #cv2.waitKey(0)

    cv2.destroyAllWindows()
    print(f"‚úÖ Fertig!")

if __name__ == "__main__":
    main()