Things to do: 
1. API access to Kaggle
2. Yolo V8


In [1]:
import kagglehub
import os
import cv2
import pathlib
from ultralytics import YOLO
from PIL import Image
import yaml
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# THIS IS THE PATH: DO NOT CHANGE
path = kagglehub.dataset_download("rupankarmajumdar/crop-pests-dataset")



In [3]:
data_yaml_path = pathlib.Path(path) / "data.yaml"
data_cfg = {
    "path": str(path),
    "train": "train/images",
    "val":   "valid/images",  
    "test":  "test/images",
    "names": [
        "ant","bee","beetle","caterpillar","earthworm","earwig",
        "grasshopper","moth","slug","snail","wasp","weevil"
    ]
}
with open(data_yaml_path, "w") as f:
    yaml.safe_dump(data_cfg, f)
print("Wrote:", data_yaml_path)

Wrote: /Users/TanA20/.cache/kagglehub/datasets/rupankarmajumdar/crop-pests-dataset/versions/2/data.yaml


In [4]:
# constants
RESOLUTION = 160 # default 320
BATCH = 4
WORKERS = 8

In [5]:
# train the model on training data
model = YOLO("yolov8n.pt")  

start = time.time()
train_res = model.train(
    data=str(data_yaml_path),
    epochs=2,          
    imgsz=RESOLUTION,         
    batch=BATCH,     
    device="mps",      
    workers=WORKERS,
    plots=False,       
    project="quick_test",
    name="yolo_sanity"
)
end = time.time()
train_time = end - start

New https://pypi.org/project/ultralytics/8.3.225 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.224 üöÄ Python-3.10.18 torch-2.9.0 MPS (Apple M4 Pro)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/Users/TanA20/.cache/kagglehub/datasets/rupankarmajumdar/crop-pests-dataset/versions/2/data.yaml, degrees=0.0, deterministic=True, device=mps, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=2, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=160, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mos

In [6]:
# fit the model and use to predict test data
start = time.time()
test_res = model.val(
    data=str(data_yaml_path),
    split='test',
    imgsz=RESOLUTION,
    batch=BATCH,
    device="mps",
    workers=WORKERS,
    plots=False
)
end = time.time()
test_time = end - start

Ultralytics 8.3.224 üöÄ Python-3.10.18 torch-2.9.0 MPS (Apple M4 Pro)
Model summary (fused): 72 layers, 3,007,988 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.1¬±0.1 ms, read: 212.5¬±137.4 MB/s, size: 44.3 KB)
[K[34m[1mval: [0mScanning /Users/TanA20/.cache/kagglehub/datasets/rupankarmajumdar/crop-pests-dataset/versions/2/test/labels.cache... 546 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 546/546 2.5Mit/s 0.0ss
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 137/137 8.4it/s 16.4s<0.1s
                   all        546        689      0.367      0.459      0.399      0.214
                   ant         54         87       0.53      0.195      0.276      0.108
                   bee         40         44      0.329      0.705      0.492      0.211
                beetle         41         44     0.0362     0.0227     0.0794  

In [7]:
# metrics and testing

# Mean Average Precision (mAP)
mAP50 = test_res.results_dict['metrics/mAP50(B)']
mAP50_95 = test_res.results_dict['metrics/mAP50-95(B)']

# Precision, Recall, and F1-score (typically averaged across classes/batches)
precision = test_res.results_dict['metrics/precision(B)']
recall = test_res.results_dict['metrics/recall(B)']
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# Note: YOLO does not directly output a single 'Accuracy' metric in the classification sense,
# nor a single 'Area Under the Curve (AUC)' value; mAP is the primary AUC equivalent.

print(f"Mean Average Precision (mAP@0.50): {mAP50:.4f}")
print(f"Mean Average Precision (mAP@0.50-0.95): {mAP50_95:.4f}")
print(f"Precision (Box): {precision:.4f}")
print(f"Recall (Box): {recall:.4f}")
print(f"F1-Score (Derived): {f1_score:.4f}\n")

def format_time(seconds):
    """Converts total seconds into minutes and format."""
    mins, secs = divmod(seconds, 60)
    return f"{int(mins):0d}m {secs:.2f}s"

print(f"Training Time (Total): {format_time(train_time)}")
print(f"Testing/Validation Time (Total): {format_time(test_time)}")

Mean Average Precision (mAP@0.50): 0.3994
Mean Average Precision (mAP@0.50-0.95): 0.2136
Precision (Box): 0.3668
Recall (Box): 0.4590
F1-Score (Derived): 0.4078

Training Time (Total): 13m 40.53s
Testing/Validation Time (Total): 0m 16.50s
