Check out https://github.com/meituan/YOLOv6/blob/4364f29bf3244f2e73d0c42a103cd7a9cbb16ca9/docs/Train_custom_data.md for more detailed guidance on how to organize custom dataset, how to train, and how to infer. I generated my dataset in "Text Generator.ipynb". Below I show training, evaluating model performance on validation set, and doing single image inference.

# Import Packages

In [None]:
import os
import cv2
from google.colab.patches import cv2_imshow
import torch
import imageio

# Supplemental Functions

In [None]:
# create a GIF from the images
def create_gif(image_dir, gif_path, duration = 1):
    images = []

    for filename in os.listdir(image_dir):
        file_path = os.path.join(image_dir, filename)
        images.append(imageio.v2.imread(file_path))

    imageio.mimsave(gif_path, images, duration = duration)

In [None]:
# create a mp4 video from the images
def create_video(image_dir, video_path, fps=1):
    images = []

    for filename in os.listdir(image_dir):
        file_path = os.path.join(image_dir, filename)
        images.append(cv2.imread(file_path))

    height, width, layers = images[0].shape
    video = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    for image in images:
        video.write(image)

    video.release()

# Clone Git Repo & Install Dependencies

In [None]:
%cd /content/drive/MyDrive/Gradient Health/Text Detection Project
!git clone https://github.com/meituan/YOLOv6.git
%cd YOLOv6
%pip install -r requirements.txt

# YOLOv6-S

In [None]:
# download the pretrained model
torch.hub.download_url_to_file('https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt', 'yolov6s.pt')

In [None]:
%%time
# train custom data (single GPU) on train set
# caution: need to modify data/dataset.yaml to only one class ['text']
!python tools/train.py --batch 32 --conf configs/yolov6s_finetune.py --data data/dataset.yaml --fuse_ab --device 0

In [None]:
%%time
# evaluation on validation set
# caution: need to modify best_ckpt.pt file path
!python tools/eval.py --data data/dataset.yaml  --weights runs/train/exp1/weights/best_ckpt.pt --task val --device 0

In [None]:
%%time
# inference on test set
# caution: need to modify best_ckpt.pt file path
for i in range(1,101):
    filename = '../custom_dataset/images/test/test ' + str(i) + '.jpg'
    !python tools/infer.py --weights runs/train/exp1/weights/best_ckpt.pt --source '{filename}' --yaml data/dataset.yaml --device 0

In [None]:
# make the images annotated by YOLO to a video for better checking
image_dir = 'runs/inference/exp'
video_path = 'runs/inference/exp/annotation video.mp4'
create_video(image_dir, video_path)

In [None]:
# infer single images
# caution: need to modify best_ckpt.pt file path
!python tools/infer.py --weights runs/train/exp1/weights/best_ckpt.pt --source '../custom_dataset/images/test/infer 1.jpg' --yaml data/dataset.yaml --device 0
!python tools/infer.py --weights runs/train/exp1/weights/best_ckpt.pt --source '../custom_dataset/images/test/infer 2.jpg' --yaml data/dataset.yaml --device 0
# show image
img1 = cv2.imread('runs/inference/exp/infer 1.jpg')
cv2_imshow(img1)
img2 = cv2.imread('runs/inference/exp/infer 2.jpg')
cv2_imshow(img2)