In [None]:
# test annotations
import random
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from PIL import ImageDraw


random.seed(37)

def plot_bounding_box(image, annotation_list):
    annotations = np.array(annotation_list)
    w, h = image.size

    plotted_image = ImageDraw.Draw(image)

    transformed_annotations = np.copy(annotations)
    transformed_annotations[:,[1,3]] = annotations[:,[1,3]] * w
    transformed_annotations[:,[2,4]] = annotations[:,[2,4]] * h

    transformed_annotations[:,1] = transformed_annotations[:,1] - (transformed_annotations[:,3] / 2)
    transformed_annotations[:,2] = transformed_annotations[:,2] - (transformed_annotations[:,4] / 2)
    transformed_annotations[:,3] = transformed_annotations[:,1] + transformed_annotations[:,3]
    transformed_annotations[:,4] = transformed_annotations[:,2] + transformed_annotations[:,4]

    for ann in transformed_annotations:
        obj_cls, x0, y0, x1, y1 = ann
        plotted_image.rectangle(((x0,y0), (x1,y1)))

        plotted_image.text((x0, y0 - 10), "marginalia")

    plt.imshow(np.array(image))
    plt.show()

# Get any random annotation file
annotation_file = random.choice(os.listdir("/project_data_yolov7rnd1/annotations/"))
annotation_file = "/project_data_yolov7rnd1/annotations/" + annotation_file

print(annotation_file)
with open(annotation_file, "r") as file:
    annotation_list = file.read().split("\n")[:-1]
    annotation_list = [x.split(" ") for x in annotation_list]
    annotation_list = [[float(y) for y in x ] for x in annotation_list]

#Get the corresponding image file
image_file = annotation_file.replace("annotations", "images").replace("txt", "png")
print(image_file)
assert os.path.exists(image_file)

#Load the image
image = Image.open(image_file)

#Plot the Bounding Box
plot_bounding_box(image, annotation_list)


In [None]:
# add negative examples to training data
import os
import shutil

source = '/calisphere_imgs_metadata'
dest = '/project_data_yolov7rnd1/images'

neg_examples = ["413-n1hw23.jpg",
                "405-n17s3z.jpg",
                "390-n1dw25.jpg",
                "389-n1x01g.jpg",
                "389-n1v30h.jpg",
                "389-n1rw28.jpg",
                "388-n1fw2g.jpg",
                "236-n10w2f.jpg",
                "210-n1fw2g.jpg",
                "35-n13w2c.jpg",
                "24-n13s3q.jpg",
                "100-n1ws3v.jpg",
                "40-n1bs3w.jpg",
                "226-n1zs3g.jpg",
                "161-n17s3z.jpg",
                "42-n13s3q.jpg",
                "115-n15s3b.jpg",
                "23-n1gk5t.jpg",
                "85-n1gk5t.jpg",
                "65-n1nk5c.jpg"]

for img in neg_examples:
  # add images
  shutil.copy(os.path.join(source, img), dest)

  # add empty annotation files
  fn = os.path.splitext(img)[0]
  fp = os.path.join("/project_data_yolov7rnd1/annotations", fn + ".txt")
  open(fp, 'a').close()


In [None]:
# split dataset
import os
from sklearn.model_selection import train_test_split

# Read images and annotations
images = [os.path.join('/project_data_yolov7rnd1/images/', x) for x in os.listdir('/project_data_yolov7rnd1/images/')]
annotations = [os.path.join('/project_data_yolov7rnd1/annotations/', x) for x in os.listdir('/project_data_yolov7rnd1/annotations/') if x[-3:] == "txt"]
images.sort()
annotations.sort()

# actual split
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.15, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.3, random_state = 1)

In [None]:
# verify data split correctly
temp_trainimgs = []
temp_trainlbls = []

for i in train_images:
  temp_trainimgs.append(i[:-4].split('/')[6])

for i in train_annotations:
  temp_trainlbls.append(i[:-4].split('/')[6])
print(len(temp_trainimgs))
print(len(set(temp_trainimgs) & set(temp_trainlbls)))

In [None]:
# move files
import os.path
from os import path

os.mkdir('/YOLOv7_85-15/train/')
os.mkdir('/YOLOv7_85-15/train/images')
os.mkdir('/YOLOv7_85-15/train/labels')

os.mkdir('/YOLOv7_85-15/test/')
os.mkdir('/YOLOv7_85-15/test/images')
os.mkdir('/YOLOv7_85-15/test/labels')

os.mkdir('/YOLOv7_85-15/valid/')
os.mkdir('/YOLOv7_85-15/valid/images')
os.mkdir('/YOLOv7_85-15/valid/labels')


In [None]:
import shutil
# Utility function to copy images
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.copy(f, destination_folder)
        except:
            print(f)
            assert False

# Move the splits into their folders
move_files_to_folder(train_images, '/YOLOv7_85-15/train/images')
move_files_to_folder(test_images, '/YOLOv7_85-15/test/images')
move_files_to_folder(val_images, '/YOLOv7_85-15/valid/images')

move_files_to_folder(train_annotations, '/YOLOv7_85-15/train/labels')
move_files_to_folder(test_annotations, '/YOLOv7_85-15/test/labels')
move_files_to_folder(val_annotations, '/YOLOv7_85-15/valid/labels')

# !mv annotations labels
# %cd ../

In [None]:
# augment training data to synthetically increase size
# TODO: if author does not accept pull request, create own repo
!git clone https://github.com/MinoruHenrique/data_augmentation_yolov7.git

In [None]:
cd /data_augmentation_yolov7

In [None]:
!pip install -r requirements.txt

In [None]:
!python3 main.py --images '/YOLOv7_85-15/train/images' --labels '/YOLOv7_85-15/train/labels' --output '/YOLOv7_85-15/train-data-aug' --nprocess 6 -eo True

In [None]:
import shutil
import os

img_src = '/YOLOv7_85-15/train-data-aug/images'
img_dest = '/YOLOv7_85-15/train/images'

lbl_src = '/YOLOv7_85-15/train-data-aug/labels'
lbl_dest = '/YOLOv7_85-15/train/labels'

img_fn = os.listdir(img_src)
lbl_fn = os.listdir(lbl_src)

for i in img_fn:
    shutil.move(os.path.join(img_src, i), img_dest)

for l in lbl_fn:
    shutil.move(os.path.join(lbl_src, l), lbl_dest)

In [None]:
len(os.listdir("/YOLOv7_85-15/train/images"))

In [None]:
# remove translations and gaussian blur-- not helpful for marginalia training
import os

for f in os.listdir('/YOLOv7_85-15/train/images'):
  if "translation" in f or "gaussian_blur" in f:
    fp = "/YOLOv7_85-15/train/images/" + f
    os.remove(fp)

for f in os.listdir('/YOLOv7_85-15/train/labels'):
  if "translation" in f or "gaussian_blur" in f:
    fp = "/YOLOv7_85-15/train/labels/" + f
    os.remove(fp)

len(os.listdir("/YOLOv7_85-15/train/images"))


In [None]:
os.rmdir("/YOLOv7_85-15/train-data-aug/images")
os.rmdir("/YOLOv7_85-15/train-data-aug/labels")
os.rmdir("/YOLOv7_85-15/train-data-aug")

In [None]:
# training the model
!git clone https://github.com/WongKinYiu/yolov7.git

In [None]:
cd yolov7

In [None]:
!pip install -r requirements.txt

In [None]:
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6_training.pt

In [None]:
# train model
!python train_aux.py --workers 8 --device 0 --batch-size 16 --data ../YOLOv7_85-15/data.yaml --img 1280 1280 --cfg cfg/training/yolov7-e6.yaml --weights 'yolov7-e6_training.pt' --cache --name yolov7-e6 --hyp data/hyp.scratch.custom.yaml --epochs 150

In [None]:
# test model
!python test.py --data ../YOLOv7_85-15/data.yaml --img 1280 --batch 16 --conf 0.001 --iou 0.5 --device 0 --weights runs/yolov7-e6-ft/weights/best.pt --name yolov7e6_1280_test

In [None]:
# detect images in chaobook folder
!python detect.py --weight runs/yolov7-e6-ft/weights/best.pt --conf 0.05 --img-size 1152 --source ../nls_chapbook_imgs/