In [None]:
import os, cv2, numpy as np, shutil
from glob import glob
from collections import deque

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# ------------------ FOLDER PATHS ------------------
template_path       = "/content/drive/MyDrive/PCB_DATASET/PCB_USED/01.JPG"
input_folder        = "/content/drive/MyDrive/MY_PCB-FOLDERS/01_MISSING HOLE"
output_mask_folder  = "/content/drive/MyDrive/PCB_DATASET/Output_mask"
final_output_folder = "/content/drive/MyDrive/PCB_DATASET/PCB_USED/Missing_Hole_Defect"

In [None]:
# ---------------- PARAMETERS -------------------
TARGET_SIZE = (500, 500)
USE_OSTU = True
FIXED_THRESH = 10
MORPH_KERNEL = (3,3)
DILATE_AFTER = False

MIN_CONTOUR_AREA = 10  # Further lowered for testing
MAX_PATCHES_PER_IMAGE = 3
MAX_TOTAL_PATCHES = 800
PATCH_SIZE = 64
MEAN_BRIGHTNESS_THRESHOLD = 0  # Lowered to 0 for testing
SAVE_DEBUG = True
BINARY_THRESH_FOR_CONTOURS = 20

In [None]:
# Duplicate suppression
IOU_SUPPRESS_THRESHOLD = 0.8
HASH_SIZE = (16,16)

os.makedirs(output_mask_folder, exist_ok=True)
os.makedirs(final_output_folder, exist_ok=True)
if SAVE_DEBUG:
    debug_folder = os.path.join(output_mask_folder, "debug")
    os.makedirs(debug_folder, exist_ok=True)

In [None]:
# Helper functions (unchanged)
def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0]); yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2]); yB = min(boxA[3], boxB[3])
    interW = max(0, xB - xA); interH = max(0, yB - yA)
    interArea = interW * interH
    if interArea == 0: return 0.0
    boxAArea = (boxA[2]-boxA[0])*(boxA[3]-boxA[1])
    boxBArea = (boxB[2]-boxB[0])*(boxB[3]-boxB[1])
    return interArea / float(boxAArea + boxBArea - interArea)

def patch_hash(img):
    g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.ndim==3 else img
    r = cv2.resize(g, HASH_SIZE, interpolation=cv2.INTER_AREA)
    r = ((r - r.mean()) / (r.std() + 1e-6) * 16 + 128).astype(np.uint8)
    return r.tobytes()

In [None]:
# Load template
template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
if template is None:
    raise RuntimeError("Template not found: " + template_path)
template = cv2.resize(template, TARGET_SIZE, interpolation=cv2.INTER_AREA)
print("Template loaded:", template.shape)


Template loaded: (500, 500)


In [None]:
# Collect input files
input_files = sorted([f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg','.jpeg','.png'))])
print("Input images found:", len(input_files))
if len(input_files) == 0:
    print("ERROR: No input images in", input_folder)
    exit()


Input images found: 20


In [None]:
# Generate masks
saved_masks = 0
for i, fname in enumerate(input_files, start=1):
    img_path = os.path.join(input_folder, fname)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    print(f"[{i}] Loaded img: {img.shape if img is not None else 'None'} for {fname}")
    if img is None: continue
    img = cv2.resize(img, TARGET_SIZE, interpolation=cv2.INTER_AREA)

    diff = cv2.absdiff(template, img)
    diff = cv2.GaussianBlur(diff, (5,5), 0)
    diff = cv2.medianBlur(diff, 5)

    if USE_OSTU:
        _, thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    else:
        _, thresh = cv2.threshold(diff, FIXED_THRESH, 255, cv2.THRESH_BINARY)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, MORPH_KERNEL)
    mask = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    if DILATE_AFTER:
        mask = cv2.dilate(mask, kernel, iterations=1)

    out_mask_path = os.path.join(output_mask_folder, "mask_" + fname)
    if cv2.imwrite(out_mask_path, mask):
        saved_masks += 1
        print(f"[{i}] Saved mask -> {out_mask_path}")
    else:
        print(f"[{i}] FAILED to save mask -> {out_mask_path}")

print("Masks saved:", saved_masks)

In [None]:
# Patch extraction
global_id = 0
hash_set = set()
total_saved = 0

mask_files = sorted([f for f in os.listdir(output_mask_folder) if f.lower().endswith(('.png','.jpg','.jpeg'))])
print("Mask files found:", len(mask_files))
for m_i, mask_name in enumerate(mask_files, start=1):
    if total_saved >= MAX_TOTAL_PATCHES:
        print("Reached MAX_TOTAL_PATCHES limit:", MAX_TOTAL_PATCHES); break

    mask_path = os.path.join(output_mask_folder, mask_name)
    mask_img = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    if mask_img is None:
        print("Cannot read mask:", mask_name); continue

    _, binary = cv2.threshold(mask_img, BINARY_THRESH_FOR_CONTOURS, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    print(f"[{m_i}] Contours in {mask_name}: {len(contours)}")
    if len(contours) == 0:
        if SAVE_DEBUG:
            cv2.imwrite(os.path.join(debug_folder, "binary_"+mask_name), binary)
        continue


# Find original image
    if mask_name.lower().startswith("mask_"):
        orig_name = mask_name[len("mask_"):]
    else:
        orig_name = mask_name
    orig_path = os.path.join(input_folder, orig_name)
    if not os.path.exists(orig_path):
        base = os.path.splitext(orig_name)[0]
        found = None
        for ext in ('.jpg','.jpeg','.png','.JPG','.JPEG','.PNG'):
            cand = os.path.join(input_folder, base+ext)
            if os.path.exists(cand):
                found = cand; break
        if found: orig_path = found
    if not os.path.exists(orig_path):
        matches = [p for p in os.listdir(input_folder) if p.startswith(os.path.splitext(orig_name)[0])]
        if matches: orig_path = os.path.join(input_folder, matches[0])
        else:
            print(f"[{m_i}] Original not found for mask {mask_name}"); continue

    print(f"[{m_i}] Original path: {orig_path}")
    orig_img = cv2.imread(orig_path)
    if orig_img is None:
        print("Failed read original:", orig_path); continue
    if (orig_img.shape[0], orig_img.shape[1]) != (mask_img.shape[0], mask_img.shape[1]):
        orig_img = cv2.resize(orig_img, (mask_img.shape[1], mask_img.shape[0]), interpolation=cv2.INTER_AREA)

    contours_sorted = sorted(contours, key=cv2.contourArea, reverse=True)
    saved_this_image = 0
    saved_boxes = []

    for cnt_idx, cnt in enumerate(contours_sorted):
        if saved_this_image >= MAX_PATCHES_PER_IMAGE: break
        if total_saved >= MAX_TOTAL_PATCHES: break

        area = cv2.contourArea(cnt)
        print(f"  [{m_i}] Contour {cnt_idx+1} area: {area:.2f} (min: {MIN_CONTOUR_AREA})")
        if area < MIN_CONTOUR_AREA:
            print(f"    Skipped: Area too small")
            continue

        x,y,w,h = cv2.boundingRect(cnt)
        cx = x + w//2; cy = y + h//2
        half = PATCH_SIZE // 2
        x1 = cx - half; y1 = cy - half; x2 = cx + half; y2 = cy + half

        x1c = max(0, x1); y1c = max(0, y1); x2c = min(orig_img.shape[1], x2); y2c = min(orig_img.shape[0], y2)
        box = (x1c, y1c, x2c, y2c)

        too_much_overlap = False
        for sb in saved_boxes:
            iou_val = iou(box, sb)
            if iou_val > IOU_SUPPRESS_THRESHOLD:
                too_much_overlap = True
                print(f"    Skipped: IoU {iou_val:.2f} > {IOU_SUPPRESS_THRESHOLD} with saved box")
                break
        if too_much_overlap: continue

        top = max(0, -y1); left = max(0, -x1); bottom = max(0, y2 - orig_img.shape[0]); right = max(0, x2 - orig_img.shape[1])
        if any((top,left,bottom,right)):
            padded = cv2.copyMakeBorder(orig_img, top, bottom, left, right, borderType=cv2.BORDER_CONSTANT, value=[0,0,0])
            x1_p = x1 + left; y1_p = y1 + top; x2_p = x2 + left; y2_p = y2 + top
            patch = padded[y1_p:y2_p, x1_p:x2_p]
        else:
            patch = orig_img[y1:y2, x1:x2]

        if patch.size == 0:
            print(f"    Skipped: Patch is empty")
            continue
        if patch.shape[0] != PATCH_SIZE or patch.shape[1] != PATCH_SIZE:
            patch = cv2.resize(patch, (PATCH_SIZE, PATCH_SIZE), interpolation=cv2.INTER_AREA)

        patch_mean = patch.mean()
        print(f"    Patch shape: {patch.shape}, mean brightness: {patch_mean:.2f} (min: {MEAN_BRIGHTNESS_THRESHOLD})")
        if patch_mean < MEAN_BRIGHTNESS_THRESHOLD:
            print(f"    Skipped: Brightness too low")
            continue

        h = patch_hash(patch)
        if h in hash_set:
            print(f"    Skipped: Duplicate hash")
            continue

      # Passed all checks â€” save
        base_no_ext = os.path.splitext(os.path.basename(orig_path))[0]
        out_name = f"{base_no_ext}_defect_{global_id:05d}.png"
        out_path = os.path.join(final_output_folder, out_name)
        if cv2.imwrite(out_path, patch):
            hash_set.add(h)
            saved_boxes.append(box)
            saved_this_image += 1
            total_saved += 1
            global_id += 1
            print(f"[{m_i}] Saved patch: {out_path} area={int(area)}")
        else:
            print("Failed to save patch:", out_path)

    if SAVE_DEBUG:
        overlay = cv2.cvtColor(mask_img, cv2.COLOR_GRAY2BGR)
        cv2.drawContours(overlay, contours_sorted[:20], -1, (0,0,255), 1)
        cv2.imwrite(os.path.join(debug_folder, "overlay_"+mask_name), overlay)
        cv2.imwrite(os.path.join(debug_folder, "binary_"+mask_name), binary)

    if saved_this_image == 0:
        print(f"[{m_i}] No distinct patches saved for {mask_name}")

print("\nALL DONE")
print("Total patches saved:", total_saved)
print("Patches folder:", final_output_folder)
