# Try to Detect Alephs with Yolo

In [1]:
from ultralytics import YOLO


## Training

In [5]:
model = YOLO("yolo11m.yaml").load("yolo11m.pt")  # build from YAML and transfer weights

# Train the model
results = model.train(data="asc_dataset.yaml", epochs=300, imgsz=640, batch=1)


Transferred 649/649 items from pretrained weights
New https://pypi.org/project/ultralytics/8.3.65 available ðŸ˜ƒ Update with 'pip install -U ultralytics'
Ultralytics 8.3.62 ðŸš€ Python-3.12.2 torch-2.5.1 


ValueError: Invalid CUDA 'device=0' requested. Use 'device=cpu' or pass valid CUDA device(s) if available, i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.

torch.cuda.is_available(): False
torch.cuda.device_count(): 1
os.environ['CUDA_VISIBLE_DEVICES']: 0


## Validation

In [3]:
model = YOLO("runs/detect/train/weights/best.pt")  # load a custom model

# Validate the model
metrics = model.val()  # no arguments needed, dataset and settings remembered


Ultralytics 8.3.62 ðŸš€ Python-3.12.2 torch-2.5.1 CUDA:0 (NVIDIA A40, 45416MiB)
YOLO11m summary (fused): 303 layers, 20,030,803 parameters, 0 gradients


[34m[1mval: [0mScanning /home/suliman/measure_ratios/dataset_sliced/labels/val.cache... 113 images, 294 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 407/407 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 26/26 [00:03<00:00,  6.74it/s]


                   all        407        268      0.751      0.586      0.646      0.368
Speed: 0.6ms preprocess, 5.9ms inference, 0.0ms loss, 1.5ms postprocess per image
Results saved to [1mruns/detect/val[0m


In [4]:
metrics.box.map  # map50-95


0.368143368779391

In [5]:
metrics.box.map50  # map50


0.6463201789353338

In [6]:
metrics.box.map75  # map75


0.3671263637403003

In [7]:
metrics.box.maps  # a list contains map50-95 of each category

array([    0.36814])

## Inference

In [7]:
from pathlib import Path
import json
from ultralytics import YOLO
from PIL import Image
from tqdm import tqdm

MODEL_WEIGHTS = "runs/detect/train/weights/best.pt"   # your .pt or .onnx file
TEST_DIR      = "dataset/images/test"                    # folder of .jpg/.png
OUT_JSON      = "yolo_predictions_coco.json"          # file to create
CONF_THRES    = 0.5
IOU_THRES     = 0.7                                   # NMS IoU

# --------------------------------------------------
# 1.  Load model
# --------------------------------------------------
model = YOLO(MODEL_WEIGHTS)
names = model.model.names  # idâ†’class dict

# Build COCO categories list (id must start at 1)
categories = [{"id": idx + 1, "name": name} for idx, name in enumerate(names.values())]

# --------------------------------------------------
# 2.  Prepare image metadata
# --------------------------------------------------
id_lookup = {}
img_paths = sorted([p for p in Path(TEST_DIR).glob("*") if p.suffix.lower() in {".jpg", ".jpeg", ".png"}])
images = []
for p in img_paths:
    with Image.open(p) as im:
        w, h = im.size
    ms_id, page_id, region_id = p.name.split(".")[0].split("_")
    ms_id, page_id, region_id = int(ms_id), int(page_id), int(region_id)
    img_id = 100 * ms_id + 10 * page_id + region_id
    images.append({"id": img_id, "file_name": p.name, "width": w, "height": h})
    id_lookup[p] = img_id


# --------------------------------------------------
# 3.  Run inference & collect annotations
# --------------------------------------------------
annotations = []
ann_id = 1

for p in tqdm(img_paths, desc="Running inference"):
    results = model.predict(
        source=str(p),
        conf=CONF_THRES,
        iou=IOU_THRES,
        max_det=300,
        verbose=False
    )[0]  # first batch element

    # YOLOv8: results.boxes.xyxy (N,4) in pixels; results.boxes.conf; results.boxes.cls
    for box, score, cls in zip(results.boxes.xyxy.cpu(), results.boxes.conf.cpu(), results.boxes.cls.cpu()):
        x1, y1, x2, y2 = box.tolist()
        bbox = [x1, y1, x2 - x1, y2 - y1]           # COCO = [x, y, w, h]
        annotations.append(
            {
                "id": ann_id,
                "image_id": id_lookup[p],
                "category_id": int(cls.item()) + 1,  # 1-indexed
                "bbox": [round(v) for v in bbox],
                "area": round(bbox[2] * bbox[3]),
                "iscrowd": 0,
                # "score": round(score.item(), 3),
                # "segmentation": []                   # empty list â€“ boxes only
            }
        )
        ann_id += 1

# --------------------------------------------------
# 4.  Dump to JSON
# --------------------------------------------------
pred_dict = {"images": images, "annotations": annotations, "categories": categories}
with open(OUT_JSON, "w", encoding="utf-8") as f:
    json.dump(pred_dict, f, indent=4)
print(f"Saved {len(annotations)} predictions for {len(images)} images âžœ {OUT_JSON}")

Running inference:   0%|          | 0/281 [00:00<?, ?it/s]

Running inference: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 281/281 [00:36<00:00,  7.79it/s]


Saved 10514 predictions for 281 images âžœ yolo_predictions_coco.json
