In [24]:
from ultralytics import YOLO
import cv2
import numpy as np
import matplotlib.pyplot as plt

import os
from sklearn.model_selection import train_test_split

# Training

Load the model

In [25]:
# Load YOLO8 model
model = YOLO("YOLOv8n.pt")

Create a function to add each image of the dataset onto a manuscript background at a random position

In [26]:
def create_synthetic_image(digit_image, background_image):
    bg = cv2.imread(background_image)
    digit = cv2.imread(digit_image, cv2.IMREAD_UNCHANGED)

    # Resize digit and randomly place on the background
    bg = cv2.resize(bg, (1500, 2000))
    x, y = np.random.randint(0, bg.shape[1] - digit.shape[1]), np.random.randint(0, bg.shape[0] - digit.shape[0])
    bg[y:y+digit.shape[0], x:x+digit.shape[1]] = digit

    x_center = x + digit.shape[1] / 2
    y_center = y + digit.shape[0] / 2
    width = digit.shape[1]
    height = digit.shape[0]

    return bg, (x_center, y_center, width, height)

In [27]:
# Test the create_synthetic_image function
digit_image_path = '../datasets/DIDA/0/0_26.jpg'
background_image_path = './white_background.jpg'

synthetic_image, _ = create_synthetic_image(digit_image_path, background_image_path)

# Display the synthetic image
plt.imshow(cv2.cvtColor(synthetic_image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

<Figure size 640x480 with 1 Axes>

## Create train/test data with 2000 images from DIDA dataset

In [28]:
# Define data size for each digit
images_per_digit = 200

# Define paths
dataset_path = '../datasets/DIDA'
train_path = './train'
val_path = './val'

# Create train and val directories
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

# Split the dataset into train and validation sets
for digit in range(10):
	print(f"Processing digit {digit}...")
	digit_path = os.path.join(dataset_path, str(digit))
	images = [os.path.join(digit_path, img) for img in os.listdir(digit_path) if img.endswith('.jpg')]
	images_sample = np.random.choice(images, size=images_per_digit, replace=False)
	train_images, val_images = train_test_split(images_sample, test_size=0.2, random_state=42)

	for img in train_images:
		synthetic_image, coord = create_synthetic_image(img, background_image_path)
				
		img_png = img.replace('.jpg', '.png')
		cv2.imwrite(os.path.join(train_path, f'{os.path.basename(img_png)}'), synthetic_image)

		label_file = img.replace('.jpg', '.txt')
		label_path = os.path.join(train_path, os.path.basename(label_file))

		label = f"{digit} {coord[0] / 1500} {coord[1] / 2000} {coord[2] / 1500} {coord[3] / 2000}"

		with open(label_path, 'w') as f:
			f.write(label)
    
	for img in val_images:
		synthetic_image, coord = create_synthetic_image(img, background_image_path)
				
		img_png = img.replace('.jpg', '.png')
		cv2.imwrite(os.path.join(val_path, f'{os.path.basename(img_png)}'), synthetic_image)

		label_file = img.replace('.jpg', '.txt')
		label_path = os.path.join(val_path, os.path.basename(label_file))

		label = f"{digit} {coord[0] / 1500} {coord[1] / 2000} {coord[2] / 1500} {coord[3] / 2000}"

		with open(label_path, 'w') as f:
			f.write(label)
    
	print("Done.")

Processing digit 0...
Done.
Processing digit 1...
Done.
Processing digit 2...
Done.
Processing digit 3...
Done.
Processing digit 4...
Done.
Processing digit 5...
Done.
Processing digit 6...
Done.
Processing digit 7...
Done.
Processing digit 8...
Done.
Processing digit 9...
Done.


Create the YOLO configuration file

In [29]:
# Create the YOLO configuration file
config = f"""
train: {os.path.abspath(train_path)}
val: {os.path.abspath(val_path)}

nc: 10
names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
"""

with open('DIDA.yaml', 'w') as f:
    f.write(config)

Train the model

In [30]:
results = model.train(data='./DIDA.yaml', epochs=10, batch=16)

New https://pypi.org/project/ultralytics/8.3.106 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.75  Python-3.9.13 torch-2.6.0+cpu CPU (Intel Core(TM) i5-9400F 2.90GHz)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=YOLOv8n.pt, data=./DIDA.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_fr

[34m[1mtrain: [0mScanning D:\OneDrive\Documents\Julien\Documents\!ESILV\A4\PI2\digits_detection_in_ancient_manuscripts\test_with_yolo\train... 1600 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1600/1600 [00:10<00:00, 154.61it/s]


[34m[1mtrain: [0mNew cache created: D:\OneDrive\Documents\Julien\Documents\!ESILV\A4\PI2\digits_detection_in_ancient_manuscripts\test_with_yolo\train.cache


[34m[1mval: [0mScanning D:\OneDrive\Documents\Julien\Documents\!ESILV\A4\PI2\digits_detection_in_ancient_manuscripts\test_with_yolo\val... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:02<00:00, 162.34it/s]

[34m[1mval: [0mNew cache created: D:\OneDrive\Documents\Julien\Documents\!ESILV\A4\PI2\digits_detection_in_ancient_manuscripts\test_with_yolo\val.cache





Plotting labels to runs\detect\train2\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train2[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      1.102      7.371     0.8068         14        640: 100%|██████████| 100/100 [08:41<00:00,  5.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:52<00:00,  4.07s/it]

                   all        400        400     0.0155      0.982      0.141      0.106






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G     0.8273      5.108     0.7987         15        640: 100%|██████████| 100/100 [08:17<00:00,  4.97s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:44<00:00,  3.39s/it]

                   all        400        400      0.104      0.778      0.171      0.137






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G     0.7639      4.189     0.7931         16        640: 100%|██████████| 100/100 [08:26<00:00,  5.07s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:52<00:00,  4.01s/it]

                   all        400        400      0.121       0.63      0.184       0.15






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G     0.6787      3.617     0.7991         11        640: 100%|██████████| 100/100 [09:30<00:00,  5.71s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:57<00:00,  4.41s/it]

                   all        400        400      0.141      0.629       0.18      0.153






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G     0.6423      3.166     0.7867         16        640: 100%|██████████| 100/100 [08:36<00:00,  5.16s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:52<00:00,  4.01s/it]

                   all        400        400      0.121      0.755       0.19      0.164






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G     0.5878      2.891     0.7877         15        640: 100%|██████████| 100/100 [08:30<00:00,  5.10s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:44<00:00,  3.40s/it]

                   all        400        400      0.112      0.826      0.197      0.174






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G     0.5627      2.669     0.7863         16        640: 100%|██████████| 100/100 [16:48<00:00, 10.09s/it]  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:57<00:00,  4.46s/it]

                   all        400        400      0.128       0.83      0.208      0.187






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G     0.5392      2.555     0.7829         12        640: 100%|██████████| 100/100 [09:10<00:00,  5.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:57<00:00,  4.42s/it]

                   all        400        400      0.134      0.747      0.222      0.196






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G     0.5104      2.436     0.7827         15        640: 100%|██████████| 100/100 [09:12<00:00,  5.52s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:14<00:00,  5.72s/it]

                   all        400        400      0.137      0.696      0.241      0.219






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G     0.4783      2.377     0.7818         13        640: 100%|██████████| 100/100 [10:44<00:00,  6.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:57<00:00,  4.42s/it]

                   all        400        400      0.148       0.72      0.239      0.218






10 epochs completed in 1.790 hours.
Optimizer stripped from runs\detect\train2\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train2\weights\best.pt, 6.2MB

Validating runs\detect\train2\weights\best.pt...
Ultralytics 8.3.75  Python-3.9.13 torch-2.6.0+cpu CPU (Intel Core(TM) i5-9400F 2.90GHz)
Model summary (fused): 168 layers, 3,007,598 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:42<00:00,  3.25s/it]


                   all        400        400      0.139      0.692      0.241      0.219
                     0         40         40      0.169       0.55      0.257      0.218
                     1         40         40      0.156      0.875      0.421      0.394
                     2         40         40      0.108        0.6      0.139      0.124
                     3         40         40      0.121      0.847      0.176      0.159
                     4         40         40      0.101        0.7      0.121      0.107
                     5         40         40      0.243      0.675      0.508      0.458
                     6         40         40     0.0987      0.375      0.146      0.133
                     7         40         40      0.131      0.725      0.199       0.19
                     8         40         40      0.114      0.725      0.141      0.124
                     9         40         40      0.145       0.85      0.301      0.279
Speed: 1.7ms preproce

Save the trained model

In [31]:
# Save the trained model
model.save('yolo8_trained_on_dida.pt')