## Create Paths Variables

In [1]:
# UECFOOD100 to YOLOv8 converter

import os
import shutil
import random
from pathlib import Path
from tqdm import tqdm

# Paths
base_dir = Path("UECFOOD100")
images_dir = base_dir / "images"
category_file = base_dir / "category.txt"
multiple_food_file = base_dir / "multiple_food.txt"
out_dir = Path("UECFOOD100_YOLO")

# Create output folders
for split in ['train', 'val']:
    (out_dir / 'images' / split).mkdir(parents=True, exist_ok=True)
    (out_dir / 'labels' / split).mkdir(parents=True, exist_ok=True)

## Load & Split Train/Val Datasets

In [3]:
# 1. Parse category.txt
id2name = {}
with open(category_file, 'r', encoding='utf-8') as f:
    lines = f.readlines()

for line in lines[1:]:  # <-- Skip header
    parts = line.strip().split()
    if len(parts) >= 2:
        id2name[int(parts[0])] = ' '.join(parts[1:])

# 2. Parse multiple_food.txt
multiple_food = set()
with open(multiple_food_file, 'r', encoding='utf-8') as f:
    for line in f:
        multiple_food.add(line.strip())

# 3. Collect all image annotations
data = []
for class_folder in tqdm(sorted(images_dir.iterdir()), desc="Processing folders"):
    if not class_folder.is_dir():
        continue

    class_id = int(class_folder.name)
    bb_info_file = class_folder / "bb_info.txt"


# Read bounding box infos
    img2bboxes = {}
    with open(bb_info_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    for line in lines[1:]:  # skip header
        parts = line.strip().split()
        if len(parts) >= 5:
            img_name = parts[0] + '.jpg'  # <-- ADD .jpg manually
            bbox = list(map(int, parts[1:5]))  # x_min, y_min, x_max, y_max
            img2bboxes.setdefault(img_name, []).append((class_id, bbox))

  # Process images
    for img_name, annotations in img2bboxes.items():
        img_path = class_folder / img_name
        if not img_path.exists():
            continue

        data.append({
            'img_path': img_path,
            'annotations': annotations,
            'multi_label': img_name in multiple_food
        })

# 4. Shuffle and split into train/val
random.shuffle(data)
split_idx = int(0.8 * len(data))
train_data = data[:split_idx]
val_data = data[split_idx:]



Processing folders:   0%|          | 0/100 [00:00<?, ?it/s]

Processing folders: 100%|██████████| 100/100 [00:00<00:00, 354.25it/s]


## Save Image & Labels

In [4]:
# 5. Save images and labels
def convert_and_save(dataset, split):
    for item in tqdm(dataset, desc=f"Saving {split}"):
        img_path = item['img_path']
        annotations = item['annotations']

        # New filename
        new_name = img_path.stem + img_path.suffix

        # Copy image
        shutil.copy(img_path, out_dir / 'images' / split / new_name)

        # Open image to get width and height
        from PIL import Image
        with Image.open(img_path) as im:
            w, h = im.size

        # Write label file
        label_path = out_dir / 'labels' / split / (img_path.stem + '.txt')
        with open(label_path, 'w') as f:
            for class_id, bbox in annotations:
                x_min, y_min, x_max, y_max = bbox
                # Convert to YOLO format
                x_center = (x_min + x_max) / 2 / w
                y_center = (y_min + y_max) / 2 / h
                width = (x_max - x_min) / w
                height = (y_max - y_min) / h
                f.write(f"{class_id-1} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

convert_and_save(train_data, 'train')
convert_and_save(val_data, 'val')

# 6. Create data.yaml
yaml_content = f"""
path: {out_dir}
train: images/train
val: images/val

names:
"""


Saving train: 100%|██████████| 11488/11488 [00:04<00:00, 2741.67it/s]
Saving val: 100%|██████████| 2873/2873 [00:01<00:00, 2741.14it/s]


## Create Yaml file for dataset

In [None]:

for i in range(len(id2name)):
    yaml_content += f"  {i}: {id2name[i+1]}\n"

with open(out_dir / 'data.yaml', 'w', encoding='utf-8') as f:
    f.write(yaml_content)

print("Done! Dataset prepared for YOLOv11.")


Done! Dataset prepared for YOLOv8.


## Train Model

In [None]:
import cv2
from ultralytics import YOLO
model = YOLO('yolo11l.pt')  # Ensure this is the correct model path and name

# Train
model.train(
    data=str('datasets/UECFOOD100_YOLO/data.yaml'),  # Path to data.yaml
    epochs=75,                       # Number of training epochs
    imgsz=640,                       # Image size (YOLO default is 640x640)
    batch=64,                        # Batch size
    name='UECFOOD100_YOLOV11_10',    # Experiment name
    lr0 = 5e-4,                      # Initial learning rate
    lrf=0.05,                        # Final learning rate
    optimizer ='AdamW',              # AdamW optimizer
    cos_lr = True,
    warmup_epochs = 5,
    weight_decay = 0.001,            # weight decay
    dropout=0.01,                    # dropout
    device=5,                        # GPU id (or 'cpu')
    degrees=15,         
    patience=15,                     # early stopping patience
    translate=0.1,                   # Transform -> translate
    shear=0.1,                       # Transform -> shear
    perspective=0.001,               # Transform -> perspective
    scale = 0.5,                     # Transform -> scale
    fliplr=0.5,                      # Transform -> flip
    hsv_h=0.03,
    hsv_s=0.55,
    workers = 12                     # Change this param to change how many cpu's get used during training
)

New https://pypi.org/project/ultralytics/8.3.125 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.119 🚀 Python-3.10.12 torch-2.7.0+cu126 CUDA:1 (NVIDIA RTX 6000 Ada Generation, 48546MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11l.pt, data=datasets/UECFOOD100_YOLO/data.yaml, epochs=100, time=None, patience=100, batch=64, imgsz=640, save=True, save_period=-1, cache=False, device=1, workers=8, project=None, name=UECFOOD100_YOLOV11_3, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=Fal

[34m[1mtrain: [0mScanning /home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/labels/train.cache... 10354 images, 0 backgrounds, 0 corrupt: 100%|██████████| 10354/10354 [00:00<?, ?it/s]

[34m[1mtrain: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/train/11783.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/train/11784.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/train/11789.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/train/12078.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/train/12291.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/train/12454.jpg: 1 duplicate labels removed
[34m[1mtrain: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/train/12699.jpg: corrupt JPEG restored and sav




[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1444.5±932.2 MB/s, size: 64.2 KB)


[34m[1mval: [0mScanning /home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/labels/val.cache... 2793 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2793/2793 [00:00<?, ?it/s]

[34m[1mval: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/val/5171.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/val/5854.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/val/81.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/val/83.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/val/84.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/val/8560.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/public/shuynh/comp_vis/food_new_data/datasets/UECFOOD100_YOLO/images/val/86.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/home/public/shuy




Plotting labels to runs/detect/UECFOOD100_YOLOV11_3/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=1e-05, momentum=0.937) with parameter groups 167 weight(decay=0.0), 174 weight(decay=0.0005), 173 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/UECFOOD100_YOLOV11_3[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      40.3G       1.22      3.975       1.81        282        640: 100%|██████████| 162/162 [01:41<00:00,  1.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:12<00:00,  1.71it/s]


                   all       2793       2836     0.0089      0.614     0.0203     0.0128

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100      39.1G      1.143      3.679      1.713        356        640:   6%|▌         | 10/162 [00:06<01:33,  1.62it/s]Exception in thread Thread-11 (_pin_memory_loop):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/public/shuynh/comp_vis/food_new_data/newenv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/home/public/shuynh/comp_vis/food_new_data/newenv/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 61, in _pin_memory_loop
    do_one_step()
  File "/home/public/shuynh/comp_vis/food_new_data/newenv/lib/python3.10/site-packages/torch/utils/data/_utils/pin_memory.py", line 37, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.10/multiprocessing/q

KeyboardInterrupt: 

## Real-Time Inference
Uncomment this code to run real time inference, provided that you have a camera on your device

In [None]:
# import cv2
# from ultralytics import YOLO

# # Load your trained YOLO model
# model = YOLO('runs/train/run3/weights/best.pt')  # Replace with the correct path to your trained model

# # Start webcam capture
# cap = cv2.VideoCapture(0)

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break
    
#     # Perform inference using the YOLO model
#     results = model(frame)  # model processes the frame and returns detections

#     # Extract the results (detections, bounding boxes, labels, etc.)
#     # You can access results such as labels, confidence, and coordinates
#     boxes = results.xywh[0].cpu().numpy()  # x, y, w, h coordinates for detections
#     confidences = results.conf[0].cpu().numpy()  # confidence scores
#     class_ids = results.cls[0].cpu().numpy()  # class IDs of detected objects

#     # Draw bounding boxes and labels on the frame
#     for i in range(len(boxes)):
#         if confidences[i] > 0.5:  # Only draw boxes with confidence above a threshold
#             x, y, w, h = boxes[i]
#             x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)
#             label = str(results.names[int(class_ids[i])])  # Object label (e.g., 'person')
#             cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#             cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2)

#     # Show the frame with bounding boxes
#     cv2.imshow("YOLO Object Detection", frame)

#     # Break the loop if 'q' is pressed
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# # Release the webcam and close the window
# cap.release()
# cv2.destroyAllWindows()

FileNotFoundError: [Errno 2] No such file or directory: 'runs/train/run3/weights/best.pt'