In [1]:
!pip install ultralytics opencv-python-headless pyyaml



In [None]:
import cv2
import numpy as np
from functools import partial

class HomomorphicFilter:
    def __init__(self, a = 0.5, b = 1.5):
        self.a = float(a)
        self.b = float(b)

    # Filters
    def __butterworth_filter(self, I_shape, filter_params):
        P = I_shape[0]/2
        Q = I_shape[1]/2
        U, V = np.meshgrid(range(I_shape[0]), range(I_shape[1]), sparse=False, indexing='ij')
        Duv = (((U-P)**2+(V-Q)**2)).astype(float)
        H = 1/(1+(Duv/filter_params[0]**2)**filter_params[1])
        return (1 - H)

    def __gaussian_filter(self, I_shape, filter_params):
        P = I_shape[0]/2
        Q = I_shape[1]/2
        H = np.zeros(I_shape)
        U, V = np.meshgrid(range(I_shape[0]), range(I_shape[1]), sparse=False, indexing='ij')
        Duv = (((U-P)**2+(V-Q)**2)).astype(float)
        H = np.exp((-Duv/(2*(filter_params[0])**2)))
        return (1 - H)

    # Methods
    def __apply_filter(self, I, H):
        H = np.fft.fftshift(H)
        I_filtered = (self.a + self.b*H)*I
        return I_filtered

    def filter(self, I, filter_params, filter='butterworth', H = None):
        if len(I.shape) != 2:
            raise Exception('Improper image')

        I_log = np.log1p(np.array(I, dtype="float"))
        I_fft = np.fft.fft2(I_log)

        # Filters
        if filter=='butterworth':
            H = self.__butterworth_filter(I_shape = I_fft.shape, filter_params = filter_params)
        elif filter=='gaussian':
            H = self.__gaussian_filter(I_shape = I_fft.shape, filter_params = filter_params)
        elif filter=='external':
            print('external')
            if len(H.shape) != 2:
                raise Exception('Invalid external filter')
        else:
            raise Exception('Selected filter not implemented')
        

        I_fft_filt = self.__apply_filter(I = I_fft, H = H)
        I_filt = np.fft.ifft2(I_fft_filt)
        I = np.exp(np.real(I_filt))-1
        return np.uint8(I)


def relief_transform(img: np.ndarray, bias: int = 128) -> np.ndarray:
    if img.ndim != 2:
        raise ValueError("Expect a single-channel (grayscale) image")

    h, w = img.shape
    
    out = np.zeros_like(img, dtype=np.int16)


    out[1:-1, 1:-1] = (
        img[:-2, :-2].astype(np.int16)       # P(i-1, j-1)
        - img[2:, 2:].astype(np.int16)       # -P(i+1, j+1)
        + bias                               # + TH (128)
    )


    out = np.clip(out, 0, 255).astype(np.uint8)


    out[0, :], out[-1, :], out[:, 0], out[:, -1] = (
        img[0, :], img[-1, :], img[:, 0], img[:, -1]
    )

    return out

In [None]:

import cv2
import numpy as np
import torch
import albumentations as A
from functools import partial

from ultralytics import YOLO
from ultralytics.data.augment import Albumentations as UltralyticsAlbumentations
from ultralytics.models.yolo.segment.train import SegmentationTrainer  





class HECRTransform(A.ImageOnlyTransform):
    def __init__(self, clip_limit: float = 5.0, bias: int = 128,
                 p: float = 1.0):
        super().__init__(p=p)
        self.clip_limit = clip_limit
        self.bias = bias

    def apply(self, img: np.ndarray, **params) -> np.ndarray:

        gray_orig = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  


        kernel = np.ones((3, 3), np.uint8)
        eroded = cv2.erode(img, kernel, iterations=1)       

        homo = HomomorphicFilter(a=0.75, b=1.25)
        homo_out = homo.filter(I=eroded[:, :, 0], filter_params=[30, 2])
        clahe = cv2.createCLAHE(clipLimit=self.clip_limit)  
        ch1 = clahe.apply(homo_out)

        rel = relief_transform(gray_orig, self.bias)
        ch2 = clahe.apply(rel)

        out = np.stack((gray_orig, ch1, ch2), axis=2)      
        return out


class CustomAlbumentations(UltralyticsAlbumentations):
    def __init__(self, transform: A.Compose, contains_spatial: bool, p: float = 1.0):
        super().__init__(p)
        self.transform = transform
        self.transform.set_random_seed(torch.initial_seed())
        self.contains_spatial = contains_spatial 

    def __call__(self, labels):
        labels = super().__call__(labels)
        if "cls" in labels:
            labels["cls"] = labels["cls"].reshape(-1, 1)
        return labels

    def __repr__(self):
        return str(self.transform)



alb_transform = A.Compose([
        HECRTransform(p=1.0),         
        A.Blur(p=0.01, blur_limit=(3, 7)),
        A.MedianBlur(p=0.01, blur_limit=(3, 7)),
        A.ToGray(p=0.01, num_output_channels=3, method='weighted_average'),
        A.CLAHE(p=0.01, clip_limit=(1.0, 4.0))])

custom_albumentations = CustomAlbumentations(
    transform=alb_transform,
    contains_spatial=False      
)


class CustomSegmentationTrainer(SegmentationTrainer):
    def __init__(self, custom_albumentations, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._custom_alb = custom_albumentations

    def _close_dataloader_mosaic(self):
        super()._close_dataloader_mosaic()
        self._patch_dataset(self.train_loader.dataset)


    def build_dataset(self, img_path, mode="train", batch=None):
        ds = super().build_dataset(img_path, mode=mode, batch=batch)
        return self._patch_dataset(ds)

    def _patch_dataset(self, dataset):
        tfms = dataset.transforms.tolist()
        for i, t in enumerate(tfms):
            if isinstance(t, UltralyticsAlbumentations):
                tfms[i] = self._custom_alb
        return dataset

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


In [None]:
from ultralytics import YOLO

model = YOLO("yolo11n-seg.pt")
model.train(
    data="/kaggle/input/defect-detection-cropped/data.yaml",
    epochs=100,
    trainer=partial(CustomSegmentationTrainer,
                    custom_albumentations=custom_albumentations),
    imgsz=1024,
    batch=16,
    device=0,
    workers=4
)

Ultralytics 8.3.135 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/input/defect-detection-cropped/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.

100%|██████████| 5.35M/5.35M [00:00<00:00, 77.3MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 3.0±0.4 ms, read: 88.1±19.3 MB/s, size: 1081.1 KB)


[34m[1mtrain: [0mScanning /kaggle/input/defect-detection-cropped/labels... 25451 images, 9915 backgrounds, 0 corrupt: 100%|██████████| 25451/25451 [03:02<00:00, 139.68it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 1.1±0.3 ms, read: 116.6±13.8 MB/s, size: 1157.2 KB)


[34m[1mval: [0mScanning /kaggle/input/defect-detection-cropped/labels... 6363 images, 2487 backgrounds, 0 corrupt: 100%|██████████| 6363/6363 [00:43<00:00, 145.77it/s]


Plotting labels to runs/segment/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 90 weight(decay=0.0), 101 weight(decay=0.0005), 100 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 2 dataloader workers
Logging results to [1mruns/segment/train[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      1/100      6.87G      5.175      6.385      140.7      1.499         49       1024:   0%|          | 2/1591 [00:03<37:49,  1.43s/it]  

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


      1/100      6.87G      3.712      5.124      98.12      1.323         30       1024:   0%|          | 3/1591 [00:03<29:37,  1.12s/it]
100%|██████████| 755k/755k [00:00<00:00, 19.7MB/s]
      1/100      7.87G      1.167      1.779      11.11     0.9401         27       1024:  94%|█████████▍| 1497/1591 [40:19<02:16,  1.45s/it]