# 1. Mounting Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys
print(sys.version)

3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]


# 2. Installing Ultralytics

In [None]:
!pip install ultralytics==8.0.124

Collecting ultralytics==8.0.124
  Downloading ultralytics-8.0.124-py3-none-any.whl (612 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.6/612.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ultralytics
Successfully installed ultralytics-8.0.124


# 3. Importing Dependencies

In [None]:
from ultralytics import YOLO
from xml.dom import minidom
import os
import glob
from ultralytics.vit import SAM
from ultralytics.yolo.data.annotator import auto_annotate

# 4. XML to YOLO(.txt) Conversion
if you have object detection (boundary box) labels in **xml** format then this section will convert your labels from **xml to txt**


In [None]:
lut={}
lut["bin_red"] =0
lut["bin_yellow"] =1


def convert_coordinates(size, box):
  dw = 1.0/size[0]
  dh = 1.0/size[1]
  x = (box[0]+box[1])/2.0
  y = (box[2]+box[3])/2.0
  w = box[1]-box[0]
  h = box[3]-box[2]
  x = x*dw
  w = w*dw
  y = y*dh
  h = h*dh
  return (x,y,w,h)


def convert_xml2yolo( lut ):

  for fname in glob.glob("../content/drive/MyDrive/project/Data(xml_to_txt)/*.xml"):

      xmldoc = minidom.parse(fname)

      fname_out = (fname[:-4]+'.txt')

      with open(fname_out, "w") as f:

          itemlist = xmldoc.getElementsByTagName('object')
          size = xmldoc.getElementsByTagName('size')[0]
          width = int((size.getElementsByTagName('width')[0]).firstChild.data)
          height = int((size.getElementsByTagName('height')[0]).firstChild.data)

          for item in itemlist:
              # get class label
              classid =  (item.getElementsByTagName('name')[0]).firstChild.data
              if classid in lut:
                  label_str = str(lut[classid])
              else:
                  label_str = "-1"
                  print ("warning: label '%s' not in look-up table" % classid)

              # get bbox coordinates
              xmin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmin')[0]).firstChild.data
              ymin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymin')[0]).firstChild.data
              xmax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmax')[0]).firstChild.data
              ymax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymax')[0]).firstChild.data
              b = (float(xmin), float(xmax), float(ymin), float(ymax))
              bb = convert_coordinates((width,height), b)
              #print(bb)

              f.write(label_str + " " + " ".join([("%.6f" % a) for a in bb]) + '\n')

      print ("wrote %s" % fname_out)



def main():
  convert_xml2yolo( lut )


if __name__ == '__main__':
  main()

wrote ../content/drive/MyDrive/project/Data(xml_to_txt)/sample1.txt
wrote ../content/drive/MyDrive/project/Data(xml_to_txt)/sample2.txt


# 5. Object Detection Model training

In [None]:
!yolo task=detect mode=train model=yolov8n.pt data=../content/drive/MyDrive/project/dataset_OD/custom_data.yaml epochs=35 imgsz=640 copy_paste=0.7

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to yolov8n.pt...
100% 6.23M/6.23M [00:00<00:00, 70.4MB/s]
New https://pypi.org/project/ultralytics/8.0.203 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.124 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=../content/drive/MyDrive/project/dataset_OD/custom_data.yaml, epochs=35, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, 

# 6. Auto Annotation of segmentation label
Using SAM model for Auto annotation. Boundary box from Object Detection [det_model] is used as prompt for SAM model.

In [None]:
auto_annotate(data="../content/drive/MyDrive/project/Auto_Annotation(Seg)/input/*", det_model="../content/drive/MyDrive/project/output/OD_model.pt", sam_model='sam_b.pt', output_dir='../content/drive/MyDrive/project/Auto_Annotation(Seg)/Output/Labels')

Ultralytics YOLOv8.0.124 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/sam_b.pt to sam_b.pt...
100%|██████████| 358M/358M [00:05<00:00, 71.7MB/s]

image 1/2 /content/../content/drive/MyDrive/project/Auto_Annotation(Seg)/input/sample1.jpg: 480x640 1 Bin_red, 1 Bin_yellow, 65.6ms
image 2/2 /content/../content/drive/MyDrive/project/Auto_Annotation(Seg)/input/sample2.jpg: 640x480 1 Bin_red, 1 Bin_yellow, 67.2ms
Speed: 2.8ms preprocess, 66.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)


# 7. Instance Segmentation Model Training
Using transferred learning method for training. And copy_paste=0.7 is used for image augmentation.
Training weight file can be obtained in :

In [None]:
!yolo task=segment mode=train model=yolov8s-seg.pt data=../content/drive/MyDrive/project/dataset_Seg/custom_data.yaml epochs=35 imgsz=640 copy_paste=0.7

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt to yolov8s-seg.pt...
100% 22.8M/22.8M [00:00<00:00, 77.6MB/s]
New https://pypi.org/project/ultralytics/8.0.185 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.124 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
[34m[1myolo/engine/trainer: [0mtask=segment, mode=train, model=yolov8s-seg.pt, data=../content/drive/MyDrive/project/dataset_Seg/custom_data.yaml, epochs=35, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True,

# 8. Testing OD & Segmentation Model
Store the weight file obtained from the step 7 in the MyDrive/project/output folder
Testing both Object detection and Instance Segmentation model on Image dataset. Can be extended for videos.


In [None]:
!yolo task=detect mode=predict model="../content/drive/MyDrive/project/output/OD_model.pt" conf=0.6 source='../content/drive/MyDrive/project/dataset_OD/test_OD/*'

In [None]:
!yolo task=segment mode=predict model="../content/drive/MyDrive/project/output/seg_model.pt" conf=0.6 source='../content/drive/MyDrive/project/dataset_Seg/test_seg/*'

Ultralytics YOLOv8.0.124 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8s-seg summary (fused): 195 layers, 11780374 parameters, 0 gradients

image 1/19 /content/../content/drive/MyDrive/project/dataset_Seg/test_seg/601.jpg: 640x480 5 Bin_reds, 7 Bin_yellows, 124.8ms


**Auto Annotation function**

In [None]:
import cv2
import numpy as np
from pathlib import Path

from ultralytics import YOLO
from ultralytics.vit.sam import PromptPredictor, build_sam
from ultralytics.yolo.utils.torch_utils import select_device

import matplotlib.pyplot as plt


def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
    """
    Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
    Args:
        data (str): Path to a folder containing images to be annotated.
        det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
        sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
        device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
        output_dir (str | None | optional): Directory to save the annotated results.
            Defaults to a 'labels' folder in the same directory as 'data'.
    """
    device = select_device(device)
    det_model = YOLO(det_model)
    sam_model = build_sam(sam_model)
    det_model.to(device)
    sam_model.to(device)

    if not output_dir:
        output_dir = Path(str(data)).parent / 'labels'
    Path(output_dir).mkdir(exist_ok=True, parents=True)

    prompt_predictor = PromptPredictor(sam_model)
    det_results = det_model(data, stream=True)

    for result in det_results:
        boxes = result.boxes.xyxy  # Boxes object for bbox outputs
        class_ids = result.boxes.cls.int().tolist()  # noqa
        if len(class_ids):
            prompt_predictor.set_image(result.orig_img)
            masks, _, _ = prompt_predictor.predict_torch(
                point_coords=None,
                point_labels=None,
                boxes=prompt_predictor.transform.apply_boxes_torch(boxes, result.orig_shape[:2]),
                multimask_output=False,
            )

            result.update(masks=masks.squeeze(1))
            segments = result.masks.xyn  # noqa

            with open(str(Path(output_dir) / Path(result.path).stem) + '.txt', 'w') as f:
                for i in range(len(segments)):
                    s = segments[i]
                    if len(s) == 0:
                        continue
                    segment = map(str, segments[i].reshape(-1).tolist())
                    f.write(f'{class_ids[i]} ' + ' '.join(segment) + '\n')