In [None]:
import os
import random
import re
import shutil
from pathlib import Path

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import patches
import seaborn as sns

import xml.etree.ElementTree as ET

import yaml
import joblib
import kagglehub

sns.set_style('darkgrid')
sns.set_palette('pastel')

import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import random
import shutil

# Define paths
root_dir = os.getcwd()
input_dir = '/content/drive/MyDrive/archive (2)/PCB_DATASET'
# Change output location to Google Drive
output_dir = '/content/drive/MyDrive/PCB_DATASET_SPLIT'
dataset_dir = os.path.join(output_dir, 'dataset')
images_dir = os.path.join(dataset_dir, 'images')
labels_dir = os.path.join(dataset_dir, 'labels')

# Define train/val/test directories
train_images_dir = os.path.join(images_dir, 'train')
val_images_dir = os.path.join(images_dir, 'val')
test_images_dir = os.path.join(images_dir, 'test')
train_labels_dir = os.path.join(labels_dir, 'train')
val_labels_dir = os.path.join(labels_dir, 'val')
test_labels_dir = os.path.join(labels_dir, 'test')

TRAIN_TEST_SPLIT = 0.95
TRAIN_VAL_SPLIT = 0.9
EPOCHS = 150
random_seed = 42

def split_dataset():
    """Prepare and split dataset into train/val/test sets."""
    # Create directory structure
    os.makedirs(dataset_dir, exist_ok=True)
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)
    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(val_images_dir, exist_ok=True)
    os.makedirs(test_images_dir, exist_ok=True)
    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(val_labels_dir, exist_ok=True)
    os.makedirs(test_labels_dir, exist_ok=True)

    # Collect image paths
    img_path_list = []
    for defect_type in os.listdir(os.path.join(input_dir, 'images')):
        for file in os.listdir(os.path.join(input_dir, 'images', defect_type)):
            img_path_list.append((os.path.join(input_dir, 'images', defect_type, file), defect_type))

    # Split dataset
    random.seed(random_seed)
    random.shuffle(img_path_list)

    total_train_len = int(len(img_path_list) * TRAIN_TEST_SPLIT)
    train_len = int(total_train_len * TRAIN_VAL_SPLIT)
    val_len = total_train_len - train_len

    # Copy files to respective directories
    for i, (img_path, defect_type) in enumerate(img_path_list):
        img_filename = os.path.basename(img_path)
        label_filename = img_filename.replace('.jpg', '.txt')
        label_path = os.path.join(input_dir, 'labels', defect_type, label_filename)

        if i < train_len:
            # Train set
            shutil.copy(img_path, os.path.join(train_images_dir, img_filename))
            if os.path.exists(label_path):
                shutil.copy(label_path, os.path.join(train_labels_dir, label_filename))
        elif i < total_train_len:
            # Validation set
            shutil.copy(img_path, os.path.join(val_images_dir, img_filename))
            if os.path.exists(label_path):
                shutil.copy(label_path, os.path.join(val_labels_dir, label_filename))
        else:
            # Test set
            shutil.copy(img_path, os.path.join(test_images_dir, img_filename))
            if os.path.exists(label_path):
                shutil.copy(label_path, os.path.join(test_labels_dir, label_filename))

    return len(img_path_list), train_len, val_len, len(img_path_list) - total_train_len

# Prepare dataset
total_images, train_count, val_count, test_count = split_dataset()
print(f"Dataset split - Total: {total_images}, Train: {train_count}, Val: {val_count}, Test: {test_count}")
print(f"Dataset saved to: {output_dir}")

Dataset split - Total: 693, Train: 592, Val: 66, Test: 35
Dataset saved to: /content/drive/MyDrive/PCB_DATASET_SPLIT


In [None]:

def path_to_name(filepath):
    filename = re.sub(r'.+/([\w_]+\.jpg)', r'\1', filepath)
    return filename

def split_dataset():
    """Prepare and split dataset into train/val/test sets."""
    # Create directory structure
    os.makedirs(dataset_dir, exist_ok=True)
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    # Collect image paths
    img_path_list = []
    for defect_type in os.listdir(os.path.join(input_dir, 'images')):
        for file in os.listdir(os.path.join(input_dir, 'images', defect_type)):
            img_path_list.append(os.path.join(input_dir, 'images', defect_type, file))

    # Split dataset
    random.seed(random_seed)
    random.shuffle(img_path_list)

    total_train_len = int(len(img_path_list) * TRAIN_TEST_SPLIT)
    train_len = int(total_train_len * TRAIN_VAL_SPLIT)
    val_len = total_train_len - train_len

    return img_path_list, train_len, val_len

def copy_images_to_dir(img_paths, target_dir, train_len, val_len, images_dir):
    # Removing all sub-directories, if exists, to remove duplication
    if os.path.exists(os.path.join(target_dir, 'train')):
        shutil.rmtree(os.path.join(target_dir, 'train'))
        shutil.rmtree(os.path.join(target_dir, 'val'))
        shutil.rmtree(os.path.join(target_dir, 'test'))

    # Making new directories within target_dir
    os.makedirs(os.path.join(target_dir, 'train'), exist_ok=True)
    os.makedirs(os.path.join(target_dir, 'val'), exist_ok=True)
    os.makedirs(os.path.join(target_dir, 'test'), exist_ok=True)

    # Same structure for labels
    labels_train_dir = os.path.join(labels_dir, 'train')
    labels_val_dir = os.path.join(labels_dir, 'val')
    labels_test_dir = os.path.join(labels_dir, 'test')
    os.makedirs(labels_train_dir, exist_ok=True)
    os.makedirs(labels_val_dir, exist_ok=True)
    os.makedirs(labels_test_dir, exist_ok=True)

    # Copying image files
    for filepath in img_paths[:train_len]:
        filename = path_to_name(filepath)
        shutil.copy(filepath, os.path.join(images_dir, 'train', filename))

        # Get corresponding label file
        label_filename = filename.replace('.jpg', '.txt')
        # Extract defect type from filepath
        defect_type = filepath.split('/')[-2]
        label_path = os.path.join(input_dir, 'labels', defect_type, label_filename)
        if os.path.exists(label_path):
            shutil.copy(label_path, os.path.join(labels_dir, 'train', label_filename))

    for filepath in img_paths[train_len:train_len+val_len]:
        filename = path_to_name(filepath)
        shutil.copy(filepath, os.path.join(images_dir, 'val', filename))

        # Get corresponding label file
        label_filename = filename.replace('.jpg', '.txt')
        defect_type = filepath.split('/')[-2]
        label_path = os.path.join(input_dir, 'labels', defect_type, label_filename)
        if os.path.exists(label_path):
            shutil.copy(label_path, os.path.join(labels_dir, 'val', label_filename))

    for filepath in img_paths[train_len+val_len:]:
        filename = path_to_name(filepath)
        shutil.copy(filepath, os.path.join(images_dir, 'test', filename))

        # Get corresponding label file
        label_filename = filename.replace('.jpg', '.txt')
        defect_type = filepath.split('/')[-2]
        label_path = os.path.join(input_dir, 'labels', defect_type, label_filename)
        if os.path.exists(label_path):
            shutil.copy(label_path, os.path.join(labels_dir, 'test', label_filename))

    # Print summary
    print(f"Dataset split - Train: {train_len}, Val: {val_len}, Test: {len(img_paths)-train_len-val_len}")
    print(f"Dataset saved to: {output_dir}")

# Prepare dataset
img_path_list, train_len, val_len = split_dataset()
copy_images_to_dir(img_path_list, images_dir, train_len, val_len, images_dir)

Dataset split - Train: 592, Val: 66, Test: 35
Dataset saved to: /content/drive/MyDrive/PCB_DATASET_SPLIT


In [None]:
def parse_xml(xml_file):
    """Parse XML annotation file to extract bounding box information."""
    data = []
    tree = ET.parse(xml_file)
    root = tree.getroot()

    filename = root.find('filename').text
    width = int(root.find('size/width').text)
    height = int(root.find('size/height').text)

    for obj in root.findall('object'):
        name = obj.find('name').text
        xmin = int(obj.find('bndbox/xmin').text)
        ymin = int(obj.find('bndbox/ymin').text)
        xmax = int(obj.find('bndbox/xmax').text)
        ymax = int(obj.find('bndbox/ymax').text)

        data.append({
            'filename': filename,
            'width': width,
            'height': height,
            'class': name,
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmax,
            'ymax': ymax
        })
    return data


In [None]:
ANNOT_DIR = os.path.join(input_dir,'Annotations')
annot_type=os.listdir(ANNOT_DIR)

data=[]
all_data=[]
for type in annot_type:
    for file in os.listdir(os.path.join(ANNOT_DIR,type)):
        xml_file_path=os.path.join(os.path.join(ANNOT_DIR,type),file)
        data=parse_xml(xml_file_path)
        all_data.extend(data)
df_annot = pd.DataFrame(all_data)

df_annot

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,01_short_10.jpg,3034,1586,short,542,914,600,966
1,01_short_10.jpg,3034,1586,short,784,1200,841,1260
2,01_short_10.jpg,3034,1586,short,620,885,656,932
3,04_short_02.jpg,3056,2464,short,1579,1161,1677,1261
4,04_short_02.jpg,3056,2464,short,2021,1597,2110,1695
...,...,...,...,...,...,...,...,...
2948,05_missing_hole_08.jpg,2544,2156,missing_hole,2061,1265,2120,1323
2949,05_missing_hole_08.jpg,2544,2156,missing_hole,1652,798,1709,849
2950,01_missing_hole_13.jpg,3034,1586,missing_hole,1384,296,1460,362
2951,01_missing_hole_13.jpg,3034,1586,missing_hole,1340,738,1397,802


In [None]:
for set in ['train','val','test']:
    for filename in os.listdir(os.path.join(images_dir,set)):
        for i in df_annot[df_annot['filename']==filename].index:
              df_annot.loc[i,'set']=set

df_annot['box_width']=(df_annot['xmax']-df_annot['xmin'])
df_annot['box_height']=(df_annot['ymax']-df_annot['ymin'])

df_annot['x_center']=(df_annot['xmin']+df_annot['box_width']/2)/df_annot['width']
df_annot['y_center']=(df_annot['ymin']+df_annot['box_height']/2)/df_annot['height']

#Normalizing
df_annot['box_width']=df_annot['box_width']/df_annot['width']
df_annot['box_height']=df_annot['box_height']/df_annot['height']

REV_CLS= {
        'mouse_bite': 0,
        'spur': 1,
        'open_circuit': 2,
        'short': 3,
        'missing_hole': 4,
        'spurious_copper': 5
    }
df_annot['class_label']=df_annot['class'].map(REV_CLS)

df_annot

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,set,box_width,box_height,x_center,y_center,class_label
0,01_short_10.jpg,3034,1586,short,542,914,600,966,train,0.019117,0.032787,0.188200,0.592686,3
1,01_short_10.jpg,3034,1586,short,784,1200,841,1260,train,0.018787,0.037831,0.267798,0.775536,3
2,01_short_10.jpg,3034,1586,short,620,885,656,932,train,0.011866,0.029634,0.210283,0.572825,3
3,04_short_02.jpg,3056,2464,short,1579,1161,1677,1261,train,0.032068,0.040584,0.532723,0.491477,3
4,04_short_02.jpg,3056,2464,short,2021,1597,2110,1695,train,0.029123,0.039773,0.675884,0.668019,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2948,05_missing_hole_08.jpg,2544,2156,missing_hole,2061,1265,2120,1323,train,0.023192,0.026902,0.821737,0.600186,4
2949,05_missing_hole_08.jpg,2544,2156,missing_hole,1652,798,1709,849,train,0.022406,0.023655,0.660574,0.381957,4
2950,01_missing_hole_13.jpg,3034,1586,missing_hole,1384,296,1460,362,train,0.025049,0.041614,0.468688,0.207440,4
2951,01_missing_hole_13.jpg,3034,1586,missing_hole,1340,738,1397,802,train,0.018787,0.040353,0.451055,0.485498,4


In [None]:
def create_labels_from_df(label_dir, images_dir, df):

    # Remove existing label directories
    for subdir in ['train', 'val', 'test']:
        subdir_path = os.path.join(label_dir, subdir)
        if os.path.exists(subdir_path):
            shutil.rmtree(subdir_path)

    # Create new label directories
    os.makedirs(os.path.join(label_dir, 'train'), exist_ok=True)
    os.makedirs(os.path.join(label_dir, 'val'), exist_ok=True)
    os.makedirs(os.path.join(label_dir, 'test'), exist_ok=True)

    # Create label files for each image
    for subdir in ['train', 'val', 'test']:
        image_dir = os.path.join(images_dir, subdir)
        for filename in os.listdir(image_dir):
            txt_filename = f"{filename.split('.')[0]}.txt"
            txt_path = os.path.join(label_dir, subdir, txt_filename)

            # Create the label file if it doesn't exist
            if not os.path.exists(txt_path):
                with open(txt_path, 'w') as f:
                    image_df = df[df['filename'] == filename]
                    for _, row in image_df.iterrows():
                        f.write(f"{row['class_label']} {row['x_center']} {row['y_center']} {row['box_width']} {row['box_height']}\n")

create_labels_from_df(labels_dir,images_dir,df_annot)

In [None]:
CLASS_DICT = {
        0: 'mouse_bite',
        1: 'spur',
        2: 'open_circuit',
        3: 'short',
        4: 'missing_hole',
        5: 'spurious_copper'}

def create_yaml_config():
    """Create YAML configuration for YOLO training."""
    data_yaml = {
        'path': str(dataset_dir),
        'train': 'images/train',
        'val': 'images/val',
        'test': 'images/test',
        'nc': len(CLASS_DICT),
        'names': list(CLASS_DICT.values())
    }

    with open('data.yaml', 'w') as f:
        yaml.dump(data_yaml, f, default_flow_style=False)

    return data_yaml

# Create YAML config
data_yaml = create_yaml_config()

In [None]:
!git clone https://github.com/ultralytics/yolov5.git
%cd yolov5
!pip install -r requirements.txt
%cd ..


fatal: destination path 'yolov5' already exists and is not an empty directory.
/content/yolov5
/content


In [None]:
mkdir -p yolov5

In [None]:
%%writefile yolov5/hyp.pcb.yaml
lr0: 0.002  # Lower learning rate for small dataset
lrf: 0.2  # Final learning rate fraction
momentum: 0.937  # SGD momentum
weight_decay: 0.0005  # Regularization

# Warmup Strategy
warmup_epochs: 3.0
warmup_momentum: 0.8
warmup_bias_lr: 0.1

# Augmentations (Strong for small dataset)
mosaic: 1.0  # Enable mosaic augmentation (merging 4 images)
mixup: 0.2  # MixUp augmentation for better generalization
copy_paste: 0.1  # Useful for defect datasets
degrees: 0.5  # Slight rotation (avoid extreme angles)
translate: 0.1  # Minor shifts to prevent overfitting
scale: 0.5  # Scale variation (zoom in/out)
shear: 0.1  # Minor shearing for robustness
perspective: 0.0005  # Slight perspective changes
flipud: 0.1  # Flip images vertically (low value since PCBs are directional)
fliplr: 0.5  # Flip images horizontally
hsv_h: 0.015  # HSV-Hue variation
hsv_s: 0.7  # HSV-Saturation augmentation
hsv_v: 0.4  # HSV-Value augmentation

# Loss Functions (Higher weight on classification)
box: 0.05  # Box loss gain
cls: 0.7  # Higher weight for classification since defect types are important
cls_pw: 1.0
obj: 1.2  # Objectness loss gain (slightly increased for defect detection)
obj_pw: 1.0
iou_t: 0.2  # IoU threshold for training
anchor_t: 4.0  # Anchor-matching threshold

# Other
fl_gamma: 0.0  # No focal loss (can be tuned later)
anchors: 3  # Number of anchor clusters

Overwriting yolov5/hyp.pcb.yaml


In [None]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device count: {torch.cuda.device_count()}")
print(f"Current device: {torch.cuda.current_device()}")


CUDA available: True
Device count: 1
Current device: 0


In [None]:
os.environ['WANDB_MODE'] = 'disabled'

!python yolov5/train.py --img 640 --batch 16 --epochs 100 --data data.yaml --weights yolov5s.pt --cache --hyp yolov5/hyp.pcb.yaml

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  with torch.cuda.amp.autocast(amp):
      36/99      4.97G    0.08212    0.06194     0.0501        159        640: 100% 37/37 [00:11<00:00,  3.26it/s]
                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100% 3/3 [00:01<00:00,  2.38it/s]
                   all         66        289      0.601      0.625      0.636      0.266

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with tor

In [None]:
!pip install onnx onnx-tf tensorflow



In [None]:
!pip install tensorflow-addons==0.20.0



In [None]:
import sys
import os

# Add YOLOv5 directory to path
sys.path.append('./yolov5')

# Path to your trained model
model_path = 'yolov5/runs/train/exp/weights/best.pt'

# Export to ONNX format first
!python yolov5/export.py --weights {model_path} --include onnx --img 640 --batch 1

# Then convert ONNX to TFLite
# For this part, you'd need to use the previous ONNX to TFLite conversion code

[34m[1mexport: [0mdata=yolov5/data/coco128.yaml, weights=['yolov5/runs/train/exp/weights/best.pt'], imgsz=[640], batch_size=1, device=cpu, half=False, inplace=False, keras=False, optimize=False, int8=False, per_tensor=False, dynamic=False, cache=, simplify=False, mlmodel=False, opset=17, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['onnx']
YOLOv5 🚀 v7.0-416-gfe1d4d99 Python-3.11.12 torch-2.6.0+cu124 CPU

Fusing layers... 
Model summary: 157 layers, 7026307 parameters, 0 gradients, 15.8 GFLOPs

[34m[1mPyTorch:[0m starting from yolov5/runs/train/exp/weights/best.pt with output shape (1, 25200, 11) (13.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0...
[34m[1mONNX:[0m export success ✅ 1.8s, saved as yolov5/runs/train/exp/weights/best.onnx (27.2 MB)

Export complete (5.4s)
Results saved to [1m/content/yolov5/runs/train/exp/weights[0m
Detect:          python detect.py --weights yo

In [None]:
!pip install -U onnx-tf tensorflow

In [None]:
import sys
import torch
sys.path.append('./yolov5')
from models.experimental import attempt_load

# Load PyTorch model with correct parameters
model_path = 'yolov5/runs/train/exp/weights/best.pt'
device = torch.device('cpu')
model = attempt_load(model_path, device=device)  # Using 'device' parameter instead of 'map_location'
model.eval()
print("Model loaded successfully")

Fusing layers... 
Model summary: 157 layers, 7026307 parameters, 0 gradients, 15.8 GFLOPs


Model loaded successfully


In [None]:
# Make sure you're in the correct directory
import os
print(f"Current directory: {os.getcwd()}")
print(f"Directory contents: {os.listdir()}")

# Direct export from PyTorch to TFLite
model_path = 'yolov5/runs/train/exp/weights/best.pt'

# Export directly to TFLite
!python yolov5/export.py --weights {model_path} --include tflite --img 640

Current directory: /content
Directory contents: ['.config', 'drive', 'saved_model_dir', 'data.yaml', 'yolov5s.pt', 'yolov5', 'sample_data']
[34m[1mexport: [0mdata=yolov5/data/coco128.yaml, weights=['yolov5/runs/train/exp/weights/best.pt'], imgsz=[640], batch_size=1, device=cpu, half=False, inplace=False, keras=False, optimize=False, int8=False, per_tensor=False, dynamic=False, cache=, simplify=False, mlmodel=False, opset=17, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['tflite']
YOLOv5 🚀 v7.0-416-gfe1d4d99 Python-3.11.12 torch-2.6.0+cu124 CPU

Fusing layers... 
Model summary: 157 layers, 7026307 parameters, 0 gradients, 15.8 GFLOPs

[34m[1mPyTorch:[0m starting from yolov5/runs/train/exp/weights/best.pt with output shape (1, 25200, 11) (13.8 MB)
2025-04-26 10:45:20.538774: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register f

In [None]:
import tensorflow as tf

# Path to your TFLite model
tflite_model_path = "/content/yolov5/runs/train/exp/weights/best-fp16.tflite"

# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# Get model info
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Model Inputs:", input_details)
print("Model Outputs:", output_details)


Model Inputs: [{'name': 'serving_default_keras_tensor_121:0', 'index': 0, 'shape': array([  1, 640, 640,   3], dtype=int32), 'shape_signature': array([  1, 640, 640,   3], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
Model Outputs: [{'name': 'StatefulPartitionedCall_1:0', 'index': 530, 'shape': array([    1, 25200,    11], dtype=int32), 'shape_signature': array([    1, 25200,    11], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
