# Various Utilities

## Train Test Split
Split a list of images and their annotations (xmls) into train and validation sets for training

In [10]:
from sklearn.model_selection import train_test_split
from pathlib import Path
from shutil import move

In [4]:
X = list(Path("images").glob("*.jpeg"))
y = list(Path("annotations/xmls/").glob("*.xml"))

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
def reorganize_train_val(train_files, val_files, base_folder):
    for folder, files in [("train", train_files), ("val", val_files)]:
        dest_dir = Path(base_folder)/folder
        dest_dir.mkdir(parents=True, exist_ok=True)
        for f in files: move(f.as_posix(), dest_dir)

In [16]:
reorganize_train_val(X_train, X_val, "images/")
reorganize_train_val(y_train, y_val, "annotations/xmls/")

## XML To CSV
Create a CSV with information of annotations from XML generated by LabelImg app

In [17]:
from xml_to_csv import xml_to_csv

In [20]:
xml_to_csv("annotations/xmls/train/").to_csv("annotations/train_labels.csv", index=False)
xml_to_csv("annotations/xmls/val/").to_csv("annotations/val_labels.csv", index=False)