In [None]:
import json
import re
import numpy as np
import os
import glob
import sklearn as sk
from sklearn.model_selection import train_test_split
import shutil

In [None]:
def get_txt_annotation(path_to_file, path_to_txt_folder, filename):
  
  '''
Create a txt file with normalized coordinates of single-class polygon annotations from geojson
Создаёт текстовый файл с нормализованными координатами полигональных аннотаций единственного класса из файл формата geojson

  path_to_file: path to geojson/json file with img annotations
  filename: name of existing/new txt file used as final txt file with img annotations
  '''
  coords_list = []
  str_coords_list = []
  f = open(os.path.join(path_to_txt_folder, filename),'w')
  with open(path_to_file) as file:
        j_file = json.load(file)

  for dict_ in j_file:
        geometry = dict_['geometry']
        coords = geometry['coordinates']
        coords_list.append(coords)

  for coord in coords_list:
        coord = str(coord)
        str_coord = re.sub('[^\d\.]',' ', coord)
        str_coords_list.append(str_coord)

  for s in str_coords_list:
        # получить списки с координатами масок на изображении внутри цикла for
        a = [float(x) for x in s.split()]
        # создадим список с нормализованными значениями
        norm_list = []
        for coord in a:
            if a.index(coord) % 2:
                norm_list.append(coord/942)
            else:
                norm_list.append(coord/1716)

        # удалить символы запятых и квадратных скобок
        str_a = ' '.join(map(str,norm_list))
        # добавить метку единственного класса 0
        str_a = '0 ' + str_a
        #записать в txt файл каждую строку с новой строки
        f.write(str_a + "\n")

In [None]:
path_to_folder = 'synovial_annots_json/*'
path_to_txt_folder = 'txt_annots'

In [16]:
for path_to_file in glob.glob(path_to_folder):
    txt_filename = os.path.basename(path_to_file).rstrip('json') + 'txt'
    try:
        get_txt_annotation(path_to_file, path_to_txt_folder, txt_filename)
        pass
    except:
        continue

In [17]:
img_names = [] # create a list of file names

for file in glob.glob(path_to_txt_folder + '/*'):
    file_name = os.path.basename(file)
    img_names.append(file_name)

In [None]:
# create YAML-format dirs
os.makedirs("yolo_v8/valid/labels")
os.makedirs("yolo_v8/train/labels")
os.makedirs("yolo_v8/valid/images")
os.makedirs("yolo_v8/train/images")

In [18]:
# split file names to train and valid 
X_train, X_val, y_train, y_val = sk.model_selection.train_test_split(img_names, img_names, train_size=0.8, random_state=42)

In [19]:
path_to_img = 'synovial_images'

In [20]:
#val_labels, images
for txt_id in X_val:
    shutil.move(os.path.join(path_to_txt_folder, txt_id), 'yolo_v8/valid/labels')
    shutil.move(os.path.join(path_to_img, txt_id.rstrip('.txt') + '.tif'), 'yolo_v8/valid/images')
    

#train_labels, images
for txt_id in X_train:
    shutil.move(os.path.join(path_to_txt_folder, txt_id), 'yolo_v8/train/labels')
    shutil.move(os.path.join(path_to_img, txt_id.rstrip('.txt') + '.tif'), 'yolo_v8/train/images')

In [22]:
! pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.62-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m890.6 kB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting numpy<2.0.0,>=1.23.0 (from ultralytics)
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
Collecting opencv-python>=4.6.0 (from ultralytics)
  Downloading opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.18.1-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting tqdm>=4.64.0 (from ultralytics)
  Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m 