# structure of the dataset needed is :
 * train:
   - labels  
   - images 
 * val:
   - images  
   - labels
   
   
with labels being text files that match the image name and consist of lines of bounding boxes



> transfer learning 
https://docs.ultralytics.com/yolov5/tutorials/transfer_learning_with_frozen_layers/  
> train with custom dataset 
https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/  
> youtube tutorial 
https://www.youtube.com/watch?v=GRtgLlwxpc4

In [6]:
import os
import torch
import comet_ml
import numpy as np
import albumentations

from PIL import Image
from ultralytics import YOLO
from sklearn.model_selection import train_test_split

from utils import construct_dataframe,read_xml,visualize_data


comet_ml.init()

paths = [
os.path.join(os.getcwd(),'yolo_dataset'),
os.path.join(os.getcwd(),'yolo_dataset','train'),
os.path.join(os.getcwd(),'yolo_dataset','train','images'),
os.path.join(os.getcwd(),'yolo_dataset','train','labels'),
os.path.join(os.getcwd(),'yolo_dataset','val'),
os.path.join(os.getcwd(),'yolo_dataset','val','images'),
os.path.join(os.getcwd(),'yolo_dataset','val', 'labels'),
]

for path in paths:
    if not os.path.exists(path):
        os.makedirs(path)
    

In [2]:
#to-do : extract this from database and don't hard code it 
defect_categories = {
        'punching_hole':0,
        'welding_line':1,
        'crescent_gap':2,
        'water_spot':3,
        'oil_spot':4,
        'silk_spot':5,
        'inclusion':6,
        'rolled_pit':7,
        'crease':8,
        'waist_folding':9,
        }

In [3]:
####
#helper functions 
####

########################################

def calculate_bbox_parameters(xmin, xmax, ymin, ymax, image_width, image_height):
    width = xmax - xmin
    height = ymax - ymin
    x_center = (xmin + xmax) / (2 * image_width)
    y_center = (ymin + ymax) / (2 * image_height)
    width /= image_width
    height /= image_height
    return x_center, y_center, width, height

########################################

def transform_image_and_bbs(img_arr, bboxes, h, w):
    """
    :param img_arr: original image as a numpy array
    :param bboxes: bboxes as numpy array where each row is 'x_min', 'y_min', 'x_max', 'y_max', "class_id"
    :param h: resized height dimension of image
    :param w: resized weight dimension of image
    :return: dictionary containing {image:transformed, bboxes:['x_min', 'y_min', 'x_max', 'y_max', "class_id"]}
    """
    # create resize transform pipeline
    transform = albumentations.Compose(
        [albumentations.Resize(height=h, width=w, always_apply=True)],
        bbox_params=albumentations.BboxParams(format='pascal_voc'))

    transformed = transform(image=img_arr, bboxes=bboxes)

    return transformed

########################################

def construct_yolo_compat_data_structure(dataframe): #train or val 
    for i in ['train','val']:
        for index,row in dataframe.iterrows():
            #copy image to new directory
            source_file = os.path.join(os.getcwd(),row['img_path'])
            destination_file = os.path.join(os.getcwd(),'yolo_dataset',i,'images',f"{row['img_id']}.jpg")
            

            # Read the image using PIL and convert to PIL 
            image_as_numpy_arr = np.array(Image.open(source_file))
            bbs = []
            #convert bbs to shape that works with albumenations 
            for bb in row['bounding_boxes']:
                #current defect_type,xmin,ymin,xmax,ymax]) convert to  'x_min', 'y_min', 'x_max', 'y_max', "class_id"
                bbs.append([bb[1],bb[2],bb[3],bb[4],bb[0]])
            h,w=640,640
            transformed_img_and_bbs = transform_image_and_bbs(image_as_numpy_arr, np.array(bbs), h, w)
            
            transformed_img = transformed_img_and_bbs['image']            
            transformed_bbs = transformed_img_and_bbs['bboxes']            
            
            output_image = Image.fromarray(transformed_img)
            output_image.save(destination_file)

            #os.rename(current_name, new_name)
            bounding_boxes = ''

            for bb in bbs:
                category = bb[-1]
                x_center, y_center, width, height = calculate_bbox_parameters(bb[0],bb[2],bb[1],bb[3],row['dimensions'][0],row['dimensions'][1])
                bounding_boxes += f'{category} {x_center} {y_center} {width} {height}\n'

                
                
            labels_path = os.path.join(os.getcwd(),'yolo_dataset',i,'labels',f'{row["img_id"]}.txt')
            #write the bounding boxes to the new text structure 
            with open(labels_path, 'w') as file:
                file.write(bounding_boxes)


In [4]:
train_df, val_df = train_test_split(construct_dataframe(), shuffle = True,test_size=0.2, random_state=42)

construct_yolo_compat_data_structure(train_df)
construct_yolo_compat_data_structure(val_df)

# implementation

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #0 meaning gpu , else cpu 
print(f'training will be done on {"gpu" if device == "cuda" else "cpu"}')

# Load a model
#model = YOLO('yolov8n.yaml')  # build a new model from YAML
model = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)
#model = YOLO('yolov8n.yaml').load('yolov8n.pt')  # build from YAML and transfer weights

# Train the model
model.train(data='./yolov5/data/custom_dataset.yaml',
            epochs=100,
            batch = 2,
            imgsz=640,
            patience=20,
            device='cuda', #gpu
            project= 'project1', #project name 
            name = 'exp1', #experiment name
           
           )


training will be done on cpu


Ultralytics YOLOv8.0.137  Python-3.9.13 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce GTX 1070, 8192MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=./yolov5/data/custom_dataset.yaml, epochs=100, patience=20, batch=2, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=project1, name=exp1, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, opt

[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks with YOLOv8n...
[34m[1mAMP: [0mchecks passed 
[34m[1mtrain: [0mScanning C:\Users\marka\Documents\ml_projects\Metallic-Surface-Defect-Detection\yolo_dataset\train\labels... 228[0m
[34m[1mtrain: [0mNew cache created: C:\Users\marka\Documents\ml_projects\Metallic-Surface-Defect-Detection\yolo_dataset\train\labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mScanning C:\Users\marka\Documents\ml_projects\Metallic-Surface-Defect-Detection\yolo_dataset\val\labels... 2280 im[0m
[34m[1mval: [0mNew cache created: C:\Users\marka\Documents\ml_projects\Metallic-Surface-Defect-Detection\yolo_dataset\val\labels.cache
Plotting labels to project1\exp15\labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weig

KeyboardInterrupt: 

In [None]:

# Load a model
model = YOLO('path/to/last.pt')  # load a partially trained model

# Resume training
model.train(resume=True)


python train.py --img 640 --epochs 3 --batch 2 --device 0 --data custom_dataset.yaml --weights yolov5m.pt 
python train.py --img 640 --epochs 3 --batch 2 --data custom_dataset.yaml --weights yolov5m.pt 

