In [1]:
%pip install python-dotenv
%pip install roboflow
%pip install supervision



In [2]:
# loads dataset
from roboflow import Roboflow
from dotenv import load_dotenv
import os

load_dotenv()  # loads variables from .env into the environment

api_key = os.getenv("YF_API_KEY")

rf = Roboflow(api_key=api_key)
project = rf.workspace("caretech").project("food-dataset-uj20h-w2s4m")
version = project.version(1)
dataset = version.download("yolov8")

loading Roboflow workspace...
loading Roboflow project...


In [3]:
# script to split training dataset
import os
import shutil
from pathlib import Path
import supervision as sv


# creates a new folder with just the images and labels
dataset_path = '/content/Food-Dataset-1'
new_dir_path = '/content/all'
split_path = '/content/split'

dir = Path(dataset_path)
unified_dir = Path(new_dir_path)
new_img_dir = unified_dir / 'images'
new_label_dir = unified_dir / 'labels'

new_img_dir.mkdir(parents=True, exist_ok=True)
new_label_dir.mkdir(parents=True, exist_ok=True)

# write to a flattened folder
for file in list(dir.rglob('*.jpg')):
    shutil.copy(file, new_img_dir)

for file in list(dir.rglob('*.txt')):
    shutil.copy(file, new_label_dir)

# this loads a DetectionDataset object
ds = sv.DetectionDataset.from_yolo(
    images_directory_path=f"{str(new_img_dir)}",
    annotations_directory_path=f"{str(new_label_dir.name)}",
    data_yaml_path=f"{dataset_path}/data.yaml"
)

# we can split this dataset deterministically
train_ds, rest_ds = ds.split(split_ratio=0.8, random_state=1, shuffle=True)
test_ds, val_ds = rest_ds.split(split_ratio=0.5, random_state=1, shuffle=True)

# save new datasets in yolo format
train_ds.as_yolo(
    images_directory_path=f"{str(split_path)}/train/images",
    annotations_directory_path=f"{str(split_path)}/train/labels",
    data_yaml_path=f"{dataset_path}/data.yaml"
)
test_ds.as_yolo(
    images_directory_path=f"{str(split_path)}/test/images",
    annotations_directory_path=f"{str(split_path)}/test/labels",
    data_yaml_path=f"{dataset_path}/data.yaml"
)
val_ds.as_yolo(
    images_directory_path=f"{str(split_path)}/val/images",
    annotations_directory_path=f"{str(split_path)}/val/labels",
    data_yaml_path=f"{dataset_path}/data.yaml"
)

# write the manifest files
def write_manifest(ds: sv.DetectionDataset, split: str, output_path: Path):
  with open(output_path, "w") as f:
    for img_path, _, _ in ds:
      f.write(f"{str(Path(img_path).name)}\n")

  print(f"Wrote to {output_path}")

write_manifest(train_ds, "train", Path(split_path) / "train.txt")
write_manifest(test_ds, "test", Path(split_path) / "test.txt")
write_manifest(val_ds, "val", Path(split_path) / "val.txt")


Wrote to /content/split/train.txt
Wrote to /content/split/test.txt
Wrote to /content/split/val.txt
