In [1]:
import sys
print(sys.executable)

/Users/jordanwhite/Projects/yolov8/yolo-v8/bin/python


In [5]:
%%capture
!python -m venv env
!pip install ultralytics
!pip install clearml
!pip install split-folders
!pip install colorama

In [6]:
import os
import shutil
import splitfolders
import pandas as pd
import numpy as np
from tqdm import tqdm
from colorama import Fore

In [26]:
IMAGE_PATH = "archive/images" # The path to the folder with images.
TARGET_PATH = "archive/annotation/annotation" # The path to the folder with the annotation (labels). 

In [27]:
def create_dataset(data_path: str, target_path: str) -> pd.DataFrame:
    assert isinstance(data_path, str) 
    assert isinstance(target_path, str)
    
    dict_paths = {
        "image": [],
        "annotation": []
    }
    
    for dir_name, _, filenames in os.walk(data_path):
        for filename in tqdm(filenames):
            name = filename.split('.')[0]
            dict_paths["image"].append(f"{data_path}/{name}.jpg")
            dict_paths["annotation"].append(f"{target_path}/{name}.txt")

    
    dataframe = pd.DataFrame(
        data=dict_paths,
        index=np.arange(0, len(dict_paths["image"]))
    )
    
    return dataframe

In [28]:
def prepare_dirs(dataset_path: str,
                 annotation_path: str,
                 images_path: str) -> None:
    if not os.path.exists(dataset_path):
        os.mkdir(path=dataset_path)
        os.mkdir(path=annotation_path)
        os.mkdir(path=images_path)

In [29]:
def copy_dirs(dataframe: pd.DataFrame, 
             data_path: str,
             target_path: str) -> None:
    
    assert isinstance(dataframe, pd.DataFrame)
    assert isinstance(data_path, str) 
    assert isinstance(target_path, str)
    
    for i in tqdm(range(len(dataframe))):
        image_path, annotation_path = dataframe.iloc[i]
        shutil.copy(image_path, data_path)
        shutil.copy(annotation_path, target_path)

In [30]:
def finalizing_preparation(dataset_path: str, ladd_path: str):
    assert os.path.exists(f"{dataset_path}")
    
    example_structure = [
        "dataset", 
        "train", "labels", "images",
        "test","labels", "images",
        "val", "labels", "images"
    ]
    
    dir_bone = (
        dirname.split("/")[-1]
        for dirname, _, filenames in os.walk('/kaggle/working/dataset')
        if dirname.split("/")[-1] in example_structure
    )
    
    try:
        print("\n~ Lacmus Dataset Structure ~\n")
        print(
        f"""
      ├── {next(dir_bone)}
      │   │
      │   ├── {next(dir_bone)}
      │   │   └── {next(dir_bone)}
      │   │   └── {next(dir_bone)}
      │   │        
      │   ├── {next(dir_bone)}
      │   │   └── {next(dir_bone)}
      │   │   └── {next(dir_bone)}
      │   │
      │   ├── {next(dir_bone)}
      │   │   └── {next(dir_bone)}
      │   │   └── {next(dir_bone)}
        """
        )
    except StopIteration as e:
        print(e)
    else:
        print(Fore.GREEN + "-> Success")
    finally:
        os.system(f"rm -rf {ladd_path}")

In [13]:
df = create_dataset(
    data_path=IMAGE_PATH,
    target_path=TARGET_PATH
)

In [24]:
dataset_path = "working/dataset"
ladd_path = "working/ladd"
annotation_path = "working/ladd/labels"
image_path = "working/ladd/images"

In [25]:
prepare_dirs(
    dataset_path=ladd_path,
    annotation_path=annotation_path,
    images_path=image_path
)


In [20]:
copy_dirs(
    dataframe=df, 
    data_path=image_path,
    target_path=annotation_path
)

0it [00:00, ?it/s]


In [22]:
os.makedirs(ladd_path, exist_ok=True)
os.makedirs(annotation_path, exist_ok=True)
os.makedirs(image_path, exist_ok=True)


In [32]:
splitfolders.ratio(
    input=ladd_path,
    output=dataset_path,
    seed=42,
    ratio=(0.80, 0.10, 0.10),
    group_prefix=None,
    move=True
) 

Copying files: 2730 files [00:00, 14840.84 files/s]


In [33]:
finalizing_preparation(
    dataset_path,
    ladd_path
)


~ Lacmus Dataset Structure ~


