In [None]:
!pip3.12 install albumentations
!pip3.12 install ultralytics


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [7]:
#!/usr/bin/env python3
import os
import cv2
import numpy as np
import albumentations as A
from ultralytics import YOLO

# ──────────────────────────────────────────────────────────────────────────────
# 1. Custom preprocessing classes
# ──────────────────────────────────────────────────────────────────────────────

class HomomorphicFilter:
    """Enhances contrast by attenuating low-frequency and boosting high-frequency."""
    def __init__(self, a=0.5, b=1.5):
        self.a, self.b = float(a), float(b)
    def __butterworth_filter(self, shape, params):
        P, Q = shape[0]//2, shape[1]//2
        U, V = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
        D = (U-P)**2 + (V-Q)**2
        H = 1.0 / (1.0 + (D/(params[0]**2))**params[1])
        return 1.0 - H
    def __gaussian_filter(self, shape, params):
        P, Q = shape[0]//2, shape[1]//2
        U, V = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
        D = (U-P)**2 + (V-Q)**2
        H = np.exp(-D/(2*(params[0]**2)))
        return 1.0 - H
    def __apply_filter(self, I_fft, H):
        Hs = np.fft.fftshift(H)
        return (self.a + self.b*Hs) * I_fft
    def filter(self, I, filter_params, mode='butterworth', H_ext=None):
        if I.ndim != 2:
            raise ValueError("Input must be single-channel")
        I_log = np.log1p(I.astype(float))
        I_fft = np.fft.fft2(I_log)
        if mode == 'butterworth':
            H = self.__butterworth_filter(I_fft.shape, filter_params)
        elif mode == 'gaussian':
            H = self.__gaussian_filter(I_fft.shape, filter_params)
        elif mode == 'external' and H_ext is not None:
            H = H_ext
        else:
            raise ValueError(f"Unknown filter mode {mode}")
        I_filt = np.fft.ifft2(self.__apply_filter(I_fft, H))
        I_out = np.exp(np.real(I_filt)) - 1
        return np.uint8(np.clip(I_out, 0, 255))


def relief_transform(img: np.ndarray, bias: int = 128) -> np.ndarray:
    """Diagonal-difference relief filter to accentuate gradients."""
    if img.ndim != 2:
        raise ValueError("Expect grayscale")
    h, w = img.shape
    out = np.zeros((h, w), dtype=np.int16)
    out[1:-1,1:-1] = (
        img[:-2,:-2].astype(int)
        - img[2:,2:].astype(int)
        + bias
    )
    out = np.clip(out, 0, 255).astype(np.uint8)
    out[0,:], out[-1,:], out[:,0], out[:,-1] = img[0,:], img[-1,:], img[:,0], img[:,-1]
    return out


class HECRTransform(A.ImageOnlyTransform):
    """BGR → [gray | homo+CLAHE | relief+CLAHE] for weld-defect X-rays."""
    def __init__(self, clip_limit=5.0, bias=128, p=1.0):
        super().__init__(p=p)
        self.clip_limit = clip_limit
        self.bias = bias
        self.homo = HomomorphicFilter(a=0.75, b=1.25)
    def apply(self, img, **kwargs):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        eroded = cv2.erode(img, np.ones((3,3), np.uint8), iterations=1)
        h = self.homo.filter(eroded[:,:,0], filter_params=[30,2])
        clahe = cv2.createCLAHE(clipLimit=self.clip_limit)
        ch1 = clahe.apply(h)
        rel = relief_transform(gray, self.bias)
        ch2 = clahe.apply(rel)
        return np.stack([gray, ch1, ch2], axis=2)

# ──────────────────────────────────────────────────────────────────────────────
# 2. Configuration
# ──────────────────────────────────────────────────────────────────────────────
WEIGHTS     = "last-4.pt"
IMAGE_PATH  = "0-340-ls-14-d01.png"
OUTPUT_DIR  = "outputs"
IMG_SIZE    = 1024
CONF_THRES  = 0.05
TILE        = 1140    
STRIDE      = int(TILE * 0.8)  # overlap stride      

# ──────────────────────────────────────────────────────────────────────────────
# 3. Prediction on tiled image
# ──────────────────────────────────────────────────────────────────────────────
def predict_large_image():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    # Load model
    model = YOLO(WEIGHTS)
    model.fuse()

    # Read the large image
    img = cv2.imread(IMAGE_PATH)
    if img is None:
        raise FileNotFoundError(f"Image not found: {IMAGE_PATH}")
    h, w = img.shape[:2]

    transformer = A.Compose([HECRTransform(p=1.0)])
    all_preds = []
    y0 = 0
    y1 = y0 + TILE

    # Slide window horizontally (height matches TILE exactly)
    for x0 in range(0, w - TILE + 1, STRIDE):
        x1 = x0 + TILE
        crop = img[y0:y1, x0:x1]
        proc = transformer(image=crop)["image"]

        # Inference on crop
        results = model.predict(
            source=proc,
            imgsz=IMG_SIZE,
            conf=CONF_THRES,
            verbose=False
        )
        res = results[0]

        # Extract detections
        boxes = res.boxes.xyxy.cpu().numpy()    # (N,4)
        confs = res.boxes.conf.cpu().numpy()    # (N,)
        classes = res.boxes.cls.cpu().numpy().astype(int)  # (N,)

        # Offset boxes back to full-image coords
        for (xmin, ymin, xmax, ymax), conf, cls in zip(boxes, confs, classes):
            all_preds.append([
                xmin + x0,
                ymin + y0,
                xmax + x0,
                ymax + y0,
                conf,
                cls
            ])

    # Save all predictions
    pred_arr = np.array(all_preds)
    out_file = os.path.join(OUTPUT_DIR, "predictions.txt")
    np.savetxt(
        out_file,
        pred_arr,
        fmt=['%.2f','%.2f','%.2f','%.2f','%.4f','%d'],
        header='x1 y1 x2 y2 conf cls',
        comments=''
    )
    print(f"✅ Saved {len(all_preds)} predictions to {out_file}")



In [8]:
predict_large_image()

YOLO11n-seg summary (fused): 113 layers, 2,837,103 parameters, 0 gradients, 10.2 GFLOPs
✅ Saved 103 predictions to outputs/predictions.txt


In [9]:
def annotate_image_from_txt(image_path: str, preds_path: str, out_path: str):
    """
    Reads a predictions.txt (with header 'x1 y1 x2 y2 conf cls'),
    draws boxes & labels on the image, and saves to out_path.
    """
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image not found: {image_path}")

    # Load predictions (skip header)
    lines = open(preds_path).read().strip().splitlines()
    if not lines:
        print("No predictions to annotate.")
        return
    if lines[0].startswith('x1'):
        lines = lines[1:]

    for ln in lines:
        x1, y1, x2, y2, conf, cls = ln.split()
        x1, y1, x2, y2 = map(float, (x1, y1, x2, y2))
        conf = float(conf)
        cls = int(cls)
        # Draw rectangle
        color = tuple(int(c) for c in np.random.randint(0, 255, (3,)))
        cv2.rectangle(img,
                      (int(x1), int(y1)),
                      (int(x2), int(y2)),
                      color=color,
                      thickness=2)
        label = f"{cls}:{conf:.2f}"
        # Text background
        (tw, th), _ = cv2.getTextSize(label,
                                     cv2.FONT_HERSHEY_SIMPLEX,
                                     fontScale=0.5, thickness=1)
        cv2.rectangle(img,
                      (int(x1), int(y1)-th-4),
                      (int(x1)+tw, int(y1)),
                      color, -1)
        cv2.putText(img, label,
                    (int(x1), int(y1)-2),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (255,255,255), 1)

    cv2.imwrite(out_path, img)
    print(f"✅ Saved annotated image to {out_path}")

In [11]:
annotated_path = os.path.join(OUTPUT_DIR, "annotated_large_image.png")
annotate_image_from_txt(IMAGE_PATH, os.path.join(OUTPUT_DIR, "predictions.txt"), annotated_path)

✅ Saved annotated image to outputs/annotated_large_image.png
