In [1]:
# If needed
# %pip install --upgrade pip
%pip install "ultralytics>=8.3.0" opencv-python pillow scikit-learn albumentations


Defaulting to user installation because normal site-packages is not writeable
Collecting albumentations
  Using cached albumentations-2.0.8-py3-none-any.whl.metadata (43 kB)
Collecting albucore==0.0.24 (from albumentations)
  Using cached albucore-0.0.24-py3-none-any.whl.metadata (5.3 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Using cached opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached albumentations-2.0.8-py3-none-any.whl (369 kB)
Using cached albucore-0.0.24-py3-none-any.whl (15 kB)
Using cached opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl (38.9 MB)
Installing collected packages: opencv-python-headless, albucore, albumentations

   ---------------------------------------- 0/3 [opencv-python-headless]
   ---------------------------------------- 0/3 [opencv-python-headless]
   ------------- -------------------------- 1/3 [albucore]
   -------------------------- ------------- 2/3 [albumentations]
   -------

In [2]:
import os, json, shutil, random, math, time
from pathlib import Path

import cv2
import numpy as np
from PIL import Image, UnidentifiedImageError
import albumentations as A
from sklearn.model_selection import StratifiedShuffleSplit

ROOT = Path.cwd()
RAW_DATA = ROOT / "data"             # 4 class folders live here
YOLO_ROOT = ROOT / "yolo_data"
IM_TRAIN = YOLO_ROOT / "images" / "train"
IM_VAL   = YOLO_ROOT / "images" / "val"
LB_TRAIN = YOLO_ROOT / "labels" / "train"
LB_VAL   = YOLO_ROOT / "labels" / "val"
YOLO_YAML = YOLO_ROOT / "data.yaml"

# Split + augmentation
VAL_SPLIT = 0.15
AUG_MULTIPLIER = 5            # generate 5 augmented variants per original train image
RANDOM_SEED = 42
random.seed(RANDOM_SEED); np.random.seed(RANDOM_SEED)

# Center check for live
CENTER_BOX_FRAC = 0.30

# Training
MODEL_NAME = "yolov8n.pt"
IMG_SIZE   = 416
EPOCHS     = 40
BATCH      = 2
WORKERS    = 0


In [3]:
# classes = folder names (exactly the 4 you care about)
class_names = sorted([d.name for d in RAW_DATA.iterdir() if d.is_dir()])
assert len(class_names) == 4, f"Expected 4 classes, found {len(class_names)}: {class_names}"
class_to_idx = {c:i for i,c in enumerate(class_names)}
print("Classes:", class_names)

VALID_EXTS = {".jpg",".jpeg",".png",".bmp",".webp",".tif",".tiff"}

def list_images():
    items = []
    for cls in class_names:
        for p in (RAW_DATA/cls).rglob("*"):
            if p.suffix.lower() in VALID_EXTS:
                items.append((p, class_to_idx[cls]))
    return items

all_items = list_images()
assert len(all_items) > 0, "No images found under ./data/<class> folders."
print(f"Found {len(all_items)} images total")


Classes: ['Capacitor', 'IC', 'Processor', 'Tan_Cap']
Found 414 images total


In [4]:
def load_rgb(path):
    try:
        im = Image.open(path).convert("RGB")
        return np.array(im)
    except Exception as e:
        print(f"[skip] Unreadable: {path} ({e})")
        return None

def find_largest_box(img):
    """
    Returns (x1,y1,x2,y2) for the main object, or None if nothing decent.
    Stronger than simple edges: adaptive threshold + morphology + filters.
    """
    h, w = img.shape[:2]
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    gray = cv2.GaussianBlur(gray, (5,5), 0)

    th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                               cv2.THRESH_BINARY_INV, 31, 5)
    k = np.ones((3,3), np.uint8)
    th = cv2.morphologyEx(th, cv2.MORPH_OPEN, k, iterations=1)
    th = cv2.morphologyEx(th, cv2.MORPH_CLOSE, k, iterations=2)

    cnts, _ = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not cnts: return None

    area_min = 0.02 * (w*h)  # ignore <2% area
    cands = []
    for c in cnts:
        x,y,ww,hh = cv2.boundingRect(c)
        area = ww*hh
        if area < area_min: 
            continue
        ar = ww / max(1, hh)
        if ar < 0.2 or ar > 5.0: 
            continue
        cands.append((x,y,x+ww,y+hh, area))
    if not cands: 
        return None

    x1,y1,x2,y2,_ = max(cands, key=lambda t:t[4])
    # slight padding
    px, py = int(0.02*w), int(0.02*h)
    x1 = max(0, x1-px); y1 = max(0, y1-py)
    x2 = min(w-1, x2+px); y2 = min(h-1, y2+py)
    return (x1,y1,x2,y2)

def yolo_line_from_box(cls_id, box, w, h):
    x1,y1,x2,y2 = box
    bw = x2-x1; bh = y2-y1
    cx = x1 + bw/2; cy = y1 + bh/2
    return f"{cls_id} {cx/w:.6f} {cy/h:.6f} {bw/w:.6f} {bh/h:.6f}\n"


In [5]:
# Split indices
y = [cid for _, cid in all_items]
idx = np.arange(len(all_items))
sss = StratifiedShuffleSplit(n_splits=1, test_size=VAL_SPLIT, random_state=RANDOM_SEED)
train_idx, val_idx = next(sss.split(idx, y))
train_items = [all_items[i] for i in train_idx]
val_items   = [all_items[i] for i in val_idx]
print(f"Train: {len(train_items)} | Val: {len(val_items)}")

# Reset folders
if YOLO_ROOT.exists(): shutil.rmtree(YOLO_ROOT)
IM_TRAIN.mkdir(parents=True, exist_ok=True); IM_VAL.mkdir(parents=True, exist_ok=True)
LB_TRAIN.mkdir(parents=True, exist_ok=True); LB_VAL.mkdir(parents=True, exist_ok=True)

def write_split(items, im_dst, lb_dst):
    kept = 0
    for src_path, cls_id in items:
        img = load_rgb(src_path)
        if img is None: continue
        h, w = img.shape[:2]
        box = find_largest_box(img)
        if box is None:
            # fallback: centered box (medium size)
            bw, bh = int(0.5*w), int(0.5*h)
            x1 = (w-bw)//2; y1 = (h-bh)//2
            box = (x1,y1,x1+bw,y1+bh)

        out_img = im_dst / src_path.name
        Image.fromarray(img).save(out_img)

        yolo_line = yolo_line_from_box(cls_id, box, w, h)
        (lb_dst / (out_img.stem + ".txt")).write_text(yolo_line)
        kept += 1
    print(f"Wrote {kept} base images + labels -> {im_dst}")

write_split(train_items, IM_TRAIN, LB_TRAIN)
write_split(val_items,   IM_VAL,   LB_VAL)


Train: 351 | Val: 63
[skip] Unreadable: d:\Coding\Projects\Classifier\data\Processor\cpu_253658-1870 (3).jpg (cannot identify image file 'd:\\Coding\\Projects\\Classifier\\data\\Processor\\cpu_253658-1870 (3).jpg')
[skip] Unreadable: d:\Coding\Projects\Classifier\data\Processor\computer-microprocessor-cpu-close-up-isolated-white-backgrou.jpg (cannot identify image file 'd:\\Coding\\Projects\\Classifier\\data\\Processor\\computer-microprocessor-cpu-close-up-isolated-white-backgrou.jpg')




[skip] Unreadable: d:\Coding\Projects\Classifier\data\Processor\single-microchip-isolated-white-background_941600-88404(1).jpg (cannot identify image file 'd:\\Coding\\Projects\\Classifier\\data\\Processor\\single-microchip-isolated-white-background_941600-88404(1).jpg')
[skip] Unreadable: d:\Coding\Projects\Classifier\data\Processor\high-angle-view-computer-chip-against-white-background_10489 (1).jpg (cannot identify image file 'd:\\Coding\\Projects\\Classifier\\data\\Processor\\high-angle-view-computer-chip-against-white-background_10489 (1).jpg')
[skip] Unreadable: d:\Coding\Projects\Classifier\data\Processor\single-microchip-isolated-white-background_941600-88404.jpg (cannot identify image file 'd:\\Coding\\Projects\\Classifier\\data\\Processor\\single-microchip-isolated-white-background_941600-88404.jpg')
[skip] Unreadable: d:\Coding\Projects\Classifier\data\Processor\computer-microprocessor-cpu-close-up-isolated-white-backgrou(1).jpg (cannot identify image file 'd:\\Coding\\Proje

In [None]:
# ===== Stable, version-agnostic bbox augmentation =====
import albumentations as A
import cv2

# Safer ops only (no RandomResizedCrop / Sharpen version drama)
AUG_PIPE = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.25, contrast_limit=0.25, p=0.6),
    A.HueSaturationValue(hue_shift_limit=8, sat_shift_limit=20, val_shift_limit=15, p=0.4),
    A.GaussNoise(var_limit=(5.0, 25.0), p=0.2),
    A.MotionBlur(blur_limit=5, p=0.2),
    A.Affine(scale=(0.85, 1.15), translate_percent=(0.0, 0.05), rotate=(-10, 10),
             shear=(-5, 5), mode=cv2.BORDER_REFLECT_101, p=0.5),
    # Resize last to your training size
    A.Resize(height=int(IMG_SIZE), width=int(IMG_SIZE), interpolation=cv2.INTER_LINEAR),
], bbox_params=A.BboxParams(
    format="pascal_voc",
    label_fields=["class_labels"],
    min_visibility=0.4
))

def yolo_to_voc_px(cx, cy, bw, bh, w, h):
    x1 = (cx - bw/2) * w
    y1 = (cy - bh/2) * h
    x2 = (cx + bw/2) * w
    y2 = (cy + bh/2) * h
    return [x1, y1, x2, y2]

def voc_px_to_yolo(x1, y1, x2, y2, w, h):
    bw = max(1.0, x2 - x1); bh = max(1.0, y2 - y1)  # ≥1px
    cx = x1 + bw/2; cy = y1 + bh/2
    return [cx / w, cy / h, bw / w, bh / h]

def clamp_voc(x1, y1, x2, y2, w, h):
    # clamp to [0, w-1]/[0,h-1] and fix order
    x1 = max(0.0, min(float(w - 1), x1))
    y1 = max(0.0, min(float(h - 1), y1))
    x2 = max(0.0, min(float(w - 1), x2))
    y2 = max(0.0, min(float(h - 1), y2))
    if x2 <= x1: x2 = min(w - 1.0, x1 + 1.0)
    if y2 <= y1: y2 = min(h - 1.0, y1 + 1.0)
    return [x1, y1, x2, y2]

def augment_train_set(mult=AUG_MULTIPLIER):
    ims = sorted([p for p in IM_TRAIN.iterdir() if p.suffix.lower() in VALID_EXTS])
    count = 0
    for p in ims:
        lbp = LB_TRAIN / (p.stem + ".txt")
        if not lbp.exists():
            continue

        img_bgr = cv2.imread(str(p))
        if img_bgr is None:
            continue
        img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]

        # read YOLO -> VOC(px)
        lines = [ln.strip().split() for ln in lbp.read_text().strip().splitlines() if ln.strip()]
        if not lines:
            continue
        bboxes_voc = []
        class_labels = []
        for ln in lines:
            c = int(ln[0]); cx, cy, bw, bh = map(float, ln[1:5])
            x1, y1, x2, y2 = yolo_to_voc_px(cx, cy, bw, bh, w, h)
            bboxes_voc.append([x1, y1, x2, y2]); class_labels.append(c)

        for k in range(mult):
            # robust to random failures
            try:
                t = AUG_PIPE(image=img, bboxes=bboxes_voc, class_labels=class_labels)
            except Exception:
                continue

            im2 = t["image"]; bb2 = t["bboxes"]; cl2 = t["class_labels"]
            if len(bb2) == 0:
                continue

            h2, w2 = im2.shape[:2]
            filtered = []
            for (x1, y1, x2, y2), c in zip(bb2, cl2):
                x1, y1, x2, y2 = clamp_voc(x1, y1, x2, y2, w2, h2)
                bw = x2 - x1; bh = y2 - y1
                if bw * bh < 4:  # tiny → drop
                    continue
                filtered.append((c, x1, y1, x2, y2))
            if not filtered:
                continue

            out_img = IM_TRAIN / f"{p.stem}_aug{k}.jpg"
            cv2.imwrite(str(out_img), cv2.cvtColor(im2, cv2.COLOR_RGB2BGR))

            with open(LB_TRAIN / f"{p.stem}_aug{k}.txt", "w") as f:
                for c, x1, y1, x2, y2 in filtered:
                    cx, cy, bw, bh = voc_px_to_yolo(x1, y1, x2, y2, w2, h2)
                    # final clamp
                    cx = min(1.0, max(0.0, cx)); cy = min(1.0, max(0.0, cy))
                    bw = min(1.0, max(1e-6, bw)); bh = min(1.0, max(1e-6, bh))
                    f.write(f"{c} {cx:.6f} {cy:.6f} {bw:.6f} {bh:.6f}\n")
            count += 1
    print(f"Augmented images written: {count}")


ValueError: invalid literal for int() with base 10: '3.0'

In [None]:
YOLO_ROOT.mkdir(parents=True, exist_ok=True)
with open(YOLO_YAML, "w") as f:
    f.write(f"path: {str(YOLO_ROOT)}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write("names:\n")
    for i,n in enumerate(class_names):
        f.write(f"  {i}: {n}\n")

print(YOLO_YAML.read_text())


In [None]:
from ultralytics import YOLO

model = YOLO(MODEL_NAME)  # pretrained nano
results = model.train(
    data=str(YOLO_YAML),
    device="cpu",          # ← keep CPU for stability; change to 0 for GPU later
    imgsz=IMG_SIZE,
    epochs=EPOCHS,
    batch=BATCH,
    workers=WORKERS,
    amp=False,
    mosaic=0.0,
    cache=False,
    freeze=0,              # let backbone adapt (we augmented)
    patience=10,
    verbose=True,
)

best_path = getattr(model, "ckpt_path", None) or getattr(model.trainer, "best", None)
print("Best model:", best_path)


In [None]:
from ultralytics import YOLO
model = YOLO(best_path)
val_res = model.val(data=str(YOLO_YAML), imgsz=IMG_SIZE, device="cpu")
print(f"mAP50-95: {val_res.box.map:.4f} | mAP50: {val_res.box.map50:.4f}")
for i, n in enumerate(class_names):
    print(f"{n}: AP50-95={val_res.box.maps[i]:.4f}")


In [None]:
def is_centered(box_xyxy, frame_w, frame_h, frac=CENTER_BOX_FRAC):
    x1,y1,x2,y2 = box_xyxy
    cx = 0.5*(x1+x2); cy = 0.5*(y1+y2)
    fx = frame_w*0.5; fy = frame_h*0.5
    hw = frame_w*frac*0.5; hh = frame_h*frac*0.5
    return (fx-hw) <= cx <= (fx+hw) and (fy-hh) <= cy <= (fy+hh)

def predict_image(path, conf_th=0.10):
    mdl = YOLO(best_path if 'best_path' in globals() and best_path else MODEL_NAME)
    res = mdl.predict(source=path, imgsz=IMG_SIZE, conf=conf_th, verbose=False, device="cpu")
    for r in res:
        if r.boxes is None or len(r.boxes)==0:
            print("No detections.")
            continue
        for b in r.boxes:
            cid = int(b.cls.item()); conf = float(b.conf.item())
            xyxy = list(map(int, b.xyxy.squeeze().cpu().numpy().tolist()))
            print(f"{class_names[cid]} @ {conf:.2f} -> {xyxy}")
        r.show()  # view overlay


In [None]:
def live_video_classify(cam_index=0, conf_th=0.15):
    from collections import deque
    mdl = YOLO(best_path if 'best_path' in globals() and best_path else MODEL_NAME)

    cap = cv2.VideoCapture(cam_index)
    if not cap.isOpened():
        print("Could not open webcam."); return

    history = deque(maxlen=5)
    try:
        while True:
            ok, frame = cap.read()
            if not ok: break
            h, w = frame.shape[:2]

            res = mdl.predict(source=frame, imgsz=IMG_SIZE, conf=conf_th, verbose=False, device="cpu")
            preds = []
            for r in res:
                if r.boxes is None: continue
                for b in r.boxes:
                    cid = int(b.cls.item()); conf = float(b.conf.item())
                    xyxy = b.xyxy.squeeze().cpu().numpy().tolist()
                    preds.append((cid, conf, xyxy))

            # center window
            fx, fy = int(w*0.5), int(h*0.5)
            hw, hh = int(w*CENTER_BOX_FRAC*0.5), int(h*CENTER_BOX_FRAC*0.5)
            cv2.rectangle(frame, (fx-hw, fy-hh), (fx+hw, fy+hh), (128,128,128), 1)

            any_yes = False
            for (cid, conf, xyxy) in preds:
                x1,y1,x2,y2 = map(int, xyxy)
                centered = is_centered(xyxy, w, h, frac=CENTER_BOX_FRAC)
                any_yes |= centered
                color = (0,255,0) if centered else (0,165,255)
                cv2.rectangle(frame, (x1,y1), (x2,y2), color, 2)
                label = f"{class_names[cid]} {conf:.2f}"
                cv2.putText(frame, label, (x1, max(20,y1-6)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

            history.append(1 if any_yes else 0)
            stable_yes = sum(history) >= 3
            status = "YES" if stable_yes else "NO"
            cv2.putText(frame, status, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                        (0,255,0) if stable_yes else (0,0,255), 2)

            cv2.imshow("Live Detection (q to quit)", frame)
            if (cv2.waitKey(1) & 0xFF) == ord('q'): break
    finally:
        cap.release(); cv2.destroyAllWindows()


In [None]:
def predict_image(path, conf_th=0.25):
    mdl = YOLO(best_path if 'best_path' in globals() else MODEL_NAME)
    im = Image.open(path).convert("RGB")
    res = mdl.predict(source=im, imgsz=IMG_SIZE, conf=conf_th, verbose=False)
    for r in res:
        r.show()  # opens a window with predictions

predict_image("test/im9.jpg")
