## Create Paths Variables

In [None]:
# UECFOOD100 to YOLOv8 converter

import os
import shutil
import random
from pathlib import Path
from tqdm import tqdm

# Paths
base_dir = Path("UECFOOD100")
images_dir = base_dir / "images"
category_file = base_dir / "category.txt"
multiple_food_file = base_dir / "multiple_food.txt"
out_dir = Path("UECFOOD100_YOLO")

# Create output folders
for split in ['train', 'val']:
    (out_dir / 'images' / split).mkdir(parents=True, exist_ok=True)
    (out_dir / 'labels' / split).mkdir(parents=True, exist_ok=True)


## Load & Split Train/Val Datasets

In [None]:
# 1. Parse category.txt
id2name = {}
with open(category_file, 'r', encoding='utf-8') as f:
    lines = f.readlines()

for line in lines[1:]:  # <-- Skip header
    parts = line.strip().split()
    if len(parts) >= 2:
        id2name[int(parts[0])] = ' '.join(parts[1:])

# 2. Parse multiple_food.txt
multiple_food = set()
with open(multiple_food_file, 'r', encoding='utf-8') as f:
    for line in f:
        multiple_food.add(line.strip())

# 3. Collect all image annotations
data = []
for class_folder in tqdm(sorted(images_dir.iterdir()), desc="Processing folders"):
    if not class_folder.is_dir():
        continue

    class_id = int(class_folder.name)
    bb_info_file = class_folder / "bb_info.txt"


# Read bounding box infos
    img2bboxes = {}
    with open(bb_info_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    for line in lines[1:]:  # skip header
        parts = line.strip().split()
        if len(parts) >= 5:
            img_name = parts[0] + '.jpg'  # <-- ADD .jpg manually
            bbox = list(map(int, parts[1:5]))  # x_min, y_min, x_max, y_max
            img2bboxes.setdefault(img_name, []).append((class_id, bbox))

  # Process images
    for img_name, annotations in img2bboxes.items():
        img_path = class_folder / img_name
        if not img_path.exists():
            continue

        data.append({
            'img_path': img_path,
            'annotations': annotations,
            'multi_label': img_name in multiple_food
        })

# 4. Shuffle and split into train/val
random.shuffle(data)
split_idx = int(0.8 * len(data))
train_data = data[:split_idx]
val_data = data[split_idx:]



## Save Image & Labels

In [None]:
# 5. Save images and labels
def convert_and_save(dataset, split):
    for item in tqdm(dataset, desc=f"Saving {split}"):
        img_path = item['img_path']
        annotations = item['annotations']

        # New filename
        new_name = img_path.stem + img_path.suffix

        # Copy image
        shutil.copy(img_path, out_dir / 'images' / split / new_name)

        # Open image to get width and height
        from PIL import Image
        with Image.open(img_path) as im:
            w, h = im.size

        # Write label file
        label_path = out_dir / 'labels' / split / (img_path.stem + '.txt')
        with open(label_path, 'w') as f:
            for class_id, bbox in annotations:
                x_min, y_min, x_max, y_max = bbox
                # Convert to YOLO format
                x_center = (x_min + x_max) / 2 / w
                y_center = (y_min + y_max) / 2 / h
                width = (x_max - x_min) / w
                height = (y_max - y_min) / h
                f.write(f"{class_id-1} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

convert_and_save(train_data, 'train')
convert_and_save(val_data, 'val')

# 6. Create data.yaml
yaml_content = f"""
path: {out_dir}
train: images/train
val: images/val

names:
"""


## Create Yaml File For Dataset

In [None]:

for i in range(len(id2name)):
    yaml_content += f"  {i}: {id2name[i+1]}\n"

with open(out_dir / 'data.yaml', 'w', encoding='utf-8') as f:
    f.write(yaml_content)

print("Done! Dataset prepared for YOLOv8.")


## Check Use GPU device

In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device


device(type='cuda')

## Load YOLOv8 Model

In [4]:
from ultralytics import YOLO

# Load a pre-trained YOLOv8 model (e.g., yolov8n.pt - Nano version)
model = YOLO('yolov8n.pt')  # or yolov8s.pt for small, yolov8m.pt for medium, etc.

model

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_s

## Run Baseline Model

In [2]:
from ultralytics import YOLO

# Load your trained model
model = YOLO(r"C:\Users\achia\OneDrive\Documents\Graduate Years\Year 2\Spring 2025\Computer Vision I\Project\Second Project\runs\detect\UECFOOD100_YOLO4\weights\best.pt")



## Compute Predictions

In [3]:
# Run prediction on an image
results = model.predict(source=r"C:\Users\achia\OneDrive\Documents\Graduate Years\Year 2\Spring 2025\Computer Vision I\Project\Second Project\some_sushi.jpg", save=True, conf=0.1)  # conf=0.5 = confidence threshold


image 1/1 C:\Users\achia\OneDrive\Documents\Graduate Years\Year 2\Spring 2025\Computer Vision I\Project\Second Project\some_sushi.jpg: 480x640 1 potato salad, 1 green salad, 36.8ms
Speed: 3.7ms preprocess, 36.8ms inference, 92.4ms postprocess per image at shape (1, 3, 480, 640)
Results saved to [1mruns\detect\predict2[0m


## Real Time Inference
Uncomment this code to compute real-time inference, provided that you have a camera on your device

In [None]:
# import cv2
# from ultralytics import YOLO

# # Load your trained YOLO model
# model = YOLO('runs\detect\UECFOOD100_YOLO4\weights\best.pt')  # Replace with the correct path to your trained model

# # Start webcam capture
# cap = cv2.VideoCapture(0)

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break
    
#     # Perform inference using the YOLO model
#     results = model(frame)  # model processes the frame and returns detections

#     # Extract the results (detections, bounding boxes, labels, etc.)
#     # You can access results such as labels, confidence, and coordinates
#     boxes = results.xywh[0].cpu().numpy()  # x, y, w, h coordinates for detections
#     confidences = results.conf[0].cpu().numpy()  # confidence scores
#     class_ids = results.cls[0].cpu().numpy()  # class IDs of detected objects

#     # Draw bounding boxes and labels on the frame
#     for i in range(len(boxes)):
#         if confidences[i] > 0.5:  # Only draw boxes with confidence above a threshold
#             x, y, w, h = boxes[i]
#             x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)
#             label = str(results.names[int(class_ids[i])])  # Object label (e.g., 'person')
#             cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
#             cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2)

#     # Show the frame with bounding boxes
#     cv2.imshow("YOLO Object Detection", frame)

#     # Break the loop if 'q' is pressed
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# # Release the webcam and close the window
# cap.release()
# cv2.destroyAllWindows()