In [31]:
import pathlib as pl
from PIL import Image
from typing import Callable, Generator
import csv


def read_convert_dataset(
    dataset_dir: pl.Path,
    csv_read: Callable[[pl.Path], Generator[tuple[Image.Image, str], None, None]],
    out_dir: pl.Path,
):
    """
    csv_read: takes csv file path and yields (file path),(class/output folder name) tuples
    """
    dirs = (dir for dir in dataset_dir.iterdir() if dir.is_dir())
    last_imgs = {dir.name: max(dir.iterdir(), key=lambda f: f.name) for dir in dirs}

    for doc_file in dataset_dir.rglob("*.csv"):

        for img, cls in csv_read(doc_file):
            i = last_imgs.get(cls, 0)
            last_imgs[cls] = i + 1
            out_path = out_dir.joinpath(cls, f"{i}.jpg")
            out_path.parent.mkdir(parents=True, exist_ok=True)
            img.save(out_path)


def square_bbox(bbox: list[str | int]) -> list[int]:
    if isinstance(bbox[0], str):
        bbox = map(int, bbox)

    x1, y1, x2, y2 = bbox
    width = x2 - x1
    height = y2 - y1
    if width > height:
        mid = y1 + height // 2
        y1 = mid - width // 2
        y2 = mid + width // 2
    else:
        mid = x1 + width // 2
        x1 = mid - height // 2
        x2 = mid + height // 2
    return [x1, y1, x2, y2]


def converter(
    csv_file: pl.Path, class_mapping: dict[str, str]
) -> Generator[tuple[Image.Image, str], None, None]:
    with csv_file.open() as fp:
        rdr = csv.reader(fp)
        next(rdr)  # skip header
        for row in rdr:
            if len(row) < 3 or row[3] not in class_mapping.keys():
                continue
            with Image.open(csv_file.parent / row[0]) as img:
                yield img.crop(square_bbox(row[4:])).resize((240, 240)), class_mapping[
                    row[3]
                ]

## furny.v3i.tensorflow


In [33]:
# https://universe.roboflow.com/mover/furny/dataset/3
furny = lambda path: converter(

    path, {"Chair": "chair", "Table": "table", "Sofa": "sofa", "Lamp": "lamp"}
)


read_convert_dataset(pl.Path("furny.v3i.tensorflow"), furny, pl.Path("images"))

## Bulky.v1i.tensorflow


In [32]:
# https://universe.roboflow.com/school-xysvc/bulky/dataset/1
bulky = lambda path: converter(path, {"chair": "chair", "i": "table", "sofa": "sofa"})

read_convert_dataset(pl.Path("Bulky.v1i.tensorflow"), bulky, pl.Path("images"))