In [None]:
import glob
import numpy as np
import pandas as pd

In [None]:
#load in images and text file paths
img_paths = [p for p in glob.glob('/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/images_256_256_corrected_orientations_flat/*')]
txt_paths = [p for p in glob.glob('/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/yolo_annotations_flat/*')]

img_paths.sort()
txt_paths.sort()

print(len(img_paths))
print(len(txt_paths))

#create a dataframe of absolute image paths and their corresponding txt (yolo annotation file) paths
dataset_df = pd.DataFrame({'image_path': img_paths,
                     'yolo_annotation_path': txt_paths})

dataset_df.head()

In [None]:
#split into train/val/test and create respective directories
from sklearn.model_selection import train_test_split

#Create a subset of data for testing
X1, X_test, y1, y_test = train_test_split(dataset_df['image_path'], dataset_df['yolo_annotation_path'], test_size = 0.20, random_state = 0)

#Get train and val sets
X_train, X_val, y_train, y_val = train_test_split(X1, y1, test_size = 0.10, random_state = 0)

#create dataframes out of our train/test/val splits
train_df, val_df, test_df = dataset_df.iloc[X_train.index], dataset_df.iloc[X_val.index], dataset_df.iloc[X_test.index]

print(train_df.shape)
print(val_df.shape)
print(test_df.shape)

In [None]:
#save train/val/test csvs
os.makedirs('/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/dataset_splits_yolo/csvs', exist_ok=True)

train_df.to_csv('/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/dataset_splits_yolo/csvs/train.csv', index=False)
val_df.to_csv('/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/dataset_splits_yolo/csvs/val.csv', index=False)
test_df.to_csv('/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/dataset_splits_yolo/csvs/test.csv', index=False)

In [None]:
#design helper function to save images and yolo annotation txt files in train/val/test folders
def save_images_and_annotations(df, old_im_folder, old_ann_folder, new_img_folder, new_ann_folder):
  
  os.makedirs(new_img_folder, exist_ok=True)
  os.makedirs(new_ann_folder, exist_ok=True)

  for i, row in df.iterrows():
    old_img_path, old_ann_path = row['image_path'], row['yolo_annotation_path']

    new_img_path = old_img_path.replace(old_im_folder, new_img_folder)
    new_ann_path = old_ann_path.replace(old_ann_folder, new_ann_folder)

    shutil.copy(old_img_path, new_img_path)
    shutil.copy(old_ann_path, new_ann_path)

  return

old_im_folder = '/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/images_256_256_corrected_orientations_flat/'
old_ann_folder = '/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/yolo_annotations_flat/'

new_split_folder = '/fs/ess/PAS2136/Butterfly/annotated_cvat_segmentation_data/dataset_splits_yolo/'

save_images_and_annotations(train_df, old_im_folder, old_ann_folder, new_split_folder + 'images/train/', new_split_folder + 'labels/train/')
save_images_and_annotations(val_df, old_im_folder, old_ann_folder, new_split_folder + 'images/val/', new_split_folder + 'labels/val/')
save_images_and_annotations(test_df, old_im_folder, old_ann_folder, new_split_folder + 'images/test/', new_split_folder + 'labels/test/')