In [None]:
# 1. ✅ Run this to import the environment

from env import UArmEnv

pybullet build time: Jan 29 2025 23:16:28


In [None]:
# To capture the image to make sure the camera is working properly
# ❌ This is not necessary to train, it's just for testing the environment and moving the arm to the desired position, 
# Use this to move and test the arm manually, or to capture images for training YOLO model

env = UArmEnv(render=False)  # use render=True if you want to see it
env.reset()

action = [0.0, 0.8, -1.57,0]

# Let the arm settle
for _ in range(10):
    env.step(action)
env.get_camera_image()
env.capture_single_image("test_image.png")
env.estimate_camera_bounds()
# Capture one image

env.close()


✅ Saved image to test_image.png
Camera Position: [ 0.02       -0.30593071  0.36003174]
Camera Forward: [ 6.34136173e-07 -7.97458758e-04 -9.99999682e-01]
Camera Target:  [ 0.02000063 -0.30672817 -0.63996794]
✅ Camera visible bounds:
X_MIN = -0.170, X_MAX = 0.210
Y_MIN = -0.490, Y_MAX = -0.120


In [None]:
# 2. ✅ Run this to generate the YOLO dataset

from env import UArmEnv
import os

def generate_yolo_dataset(n=1000, base_dir="./yolo/train_dataset"):
    env = UArmEnv(render=False)

    os.makedirs(os.path.join(base_dir, "images"), exist_ok=True)
    os.makedirs(os.path.join(base_dir, "labels"), exist_ok=True)

    for i in range(n):
        env.reset()
        action = [0.0, 0.8, -1.57, 0]
        for _ in range(10):
            env.step(action)
        env.get_camera_image()
        env.capture_image_and_dummy_label(base_dir, i)
        if i % 20 == 0:
            print(f"📸 Saved {i}/{n} samples...")

    env.close()
    print(f"✅ Dataset generation complete: {n} samples saved to '{base_dir}/'")


pybullet build time: Jan 29 2025 23:16:28


In [None]:
# 3. ✅ Run this to generate the YOLO dataset

generate_yolo_dataset(n=1000, base_dir="./yolo/train_dataset")


camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0000.png, train_dataset/labels/image_0000.txt
📸 Saved 0/1000 samples...
camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0001.png, train_dataset/labels/image_0001.txt
camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0002.png, train_dataset/labels/image_0002.txt
camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0003.png, train_dataset/labels/image_0003.txt
camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0004.png, train_dataset/labels/image_0004.txt
camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0005.png, train_dataset/labels/image_0005.txt
camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0006.png, train_dataset/labels/image_0006.txt
camera_link✅ Saved image + YOLO label to: train_dataset/images/image_0007.png, train_dataset/labels/image_0007.txt
camera_link✅ Saved image + YOLO label to: train_datase

In [None]:
# ❌ This is not necessary to for training purposes
# Visualise the captured image with bounding boxes
import cv2
import matplotlib.pyplot as plt

img = cv2.imread("./yolo/train_dataset/images/image_0000.png")
h, w, _ = img.shape

with open("./yolo/train_dataset/labels/image_0000.txt") as f:
    for line in f:
        cls, cx, cy, bw, bh = map(float, line.strip().split())
        cx, cy, bw, bh = cx * w, cy * h, bw * w, bh * h
        x1, y1 = int(cx - bw / 2), int(cy - bh / 2)
        x2, y2 = int(cx + bw / 2), int(cy + bh / 2)
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, f"Class {int(cls)}", (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title("YOLO Bounding Boxes")
plt.show()


In [None]:
# 4. ✅ Create the data.yaml file for YOLO training
yaml_content = """
train: ./yolo/train_dataset/images
val: ./train_dataset/images  # You can later create a val split

nc: 3
names: ['small', 'medium', 'large']
"""

with open("data.yaml", "w") as f:
    f.write(yaml_content.strip())

print("✅ data.yaml saved!")


✅ data.yaml saved!


In [None]:
# 
# in bash terminal, run the following command to train the YOLO model:
# yolo task=detect mode=train model=yolov8n.pt data=./yolo/data.yaml epochs=100 imgsz=640

# And youre done with ttraining the YOLO model! 


In [None]:
# loading the trained YOLO model and runs a test image

from ultralytics import YOLO

model = YOLO("./yolo/runs/detect/train/weights/best.pt")
results = model("./yolo/test_dataset/images/image_0000.png")

# Print results for first image
for box in results[0].boxes:
    cls = int(box.cls[0])
    conf = float(box.conf[0])
    x_center, y_center, w, h = map(float, box.xywhn[0])
    print(f"Class: {cls}, Confidence: {conf:.2f}, X: {x_center:.3f}, Y: {y_center:.3f}")



image 1/1 /home/fatduck/git/ai_project/main/UArm_robot_working/test_dataset/images/image_0000.png: 320x320 4 smalls, 4 mediums, 1 large, 4.3ms
Speed: 0.6ms preprocess, 4.3ms inference, 110.8ms postprocess per image at shape (1, 3, 320, 320)
Class: 0, Confidence: 0.99, X: 0.264, Y: 0.649
Class: 1, Confidence: 0.99, X: 0.448, Y: 0.460
Class: 1, Confidence: 0.98, X: 0.264, Y: 0.459
Class: 0, Confidence: 0.98, X: 0.448, Y: 0.270
Class: 2, Confidence: 0.98, X: 0.079, Y: 0.649
Class: 1, Confidence: 0.98, X: 0.080, Y: 0.270
Class: 1, Confidence: 0.98, X: 0.263, Y: 0.270
Class: 0, Confidence: 0.98, X: 0.447, Y: 0.649
Class: 0, Confidence: 0.95, X: 0.079, Y: 0.459


In [None]:
# I forgot why I did this. Is it the same as the first cell?

from env import UArmEnv
env = UArmEnv(render=False)
env.reset()
action = [0.0, 0.8, -1.57, 0]
for _ in range(10):
    env.step(action)
env.get_camera_image()
env.capture_image_label_and_gt("./yolo/test_dataset", image_index=0)
env.close()


In [None]:
from env import UArmEnv
from ultralytics import YOLO
import cv2
import numpy as np
import os

# Step 1: Run environment and capture ground truth + label
env = UArmEnv(render=False)
env.reset()
action = [0.0, 0.8, -1.57, 0]
for _ in range(10):
    env.step(action)

env.get_camera_image()
env.capture_image_label_and_gt("./yolo/test_dataset", image_index=0)
env.close()

# Step 2: Run YOLOv8 prediction on the saved image
image_path = "./yolo/test_dataset/images/image_0000.png"
model = YOLO("./yolo/runs/detect/train3/weights/best.pt")
results = model(image_path)

# Step 3: Convert YOLO predictions back to world coordinates
X_MIN, X_MAX = -0.170, 0.210
Y_MIN, Y_MAX = -0.490, -0.120

print("\n📍 YOLO Predicted World Coordinates:")
for box in results[0].boxes:
    cls = int(box.cls[0])
    conf = float(box.conf[0])
    x_center_n, y_center_n = map(float, box.xywhn[0][:2])

    # Convert back to world coordinates (reverse normalisation)
    x_world = X_MIN + (1.0 - x_center_n) * (X_MAX - X_MIN)
    y_world = Y_MIN + y_center_n * (Y_MAX - Y_MIN)

    

    print(f"Class {cls}, Confidence: {conf:.2f}, X: {x_world:.3f}, Y: {y_world:.3f}")


camera_linkGoal position set to: [    0.28907    -0.08025        0.03]
camera_link✅ Saved image, label, and ground truth for image_0000

image 1/1 /home/fatduck/git/ai_project/main/UArm_robot_working/test_dataset/images/image_0000.png: 640x640 4 smalls, 3 mediums, 2 larges, 5.5ms
Speed: 2.0ms preprocess, 5.5ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

📍 YOLO Predicted World Coordinates:
Class 2, Confidence: 0.99, X: 0.110, Y: -0.390
Class 2, Confidence: 0.99, X: 0.180, Y: -0.320
Class 1, Confidence: 0.99, X: 0.110, Y: -0.250
Class 0, Confidence: 0.99, X: 0.108, Y: -0.321
Class 0, Confidence: 0.99, X: 0.039, Y: -0.320
Class 1, Confidence: 0.99, X: 0.180, Y: -0.250
Class 0, Confidence: 0.99, X: 0.039, Y: -0.389
Class 0, Confidence: 0.99, X: 0.040, Y: -0.251
Class 1, Confidence: 0.99, X: 0.180, Y: -0.390
