In [None]:
from ultralytics import YOLO
import cv2
import os
import glob

# =====================================================
# 1Ô∏è‚É£ Ki·ªÉm tra dataset v√† t·∫°o b√°o c√°o
# =====================================================
def check_dataset(dataset_path="datasets"):
    """Ki·ªÉm tra s·ªë l∆∞·ª£ng images/labels v√† ph√°t hi·ªán file thi·∫øu"""
    print("=== Ki·ªÉm tra dataset ===")
    for split in ["train", "val", "test"]:
        img_dir = os.path.join(dataset_path, "images", split)
        lbl_dir = os.path.join(dataset_path, "labels", split)
        
        if not os.path.exists(img_dir):
            print(f"‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y: {img_dir}")
            continue
            
        images = set([os.path.splitext(f)[0] for f in os.listdir(img_dir)])
        labels = set([os.path.splitext(f)[0] for f in os.listdir(lbl_dir)])
        
        missing_labels = images - labels
        missing_images = labels - images
        
        print(f"\n{split.upper()}:")
        print(f"  ‚úì {len(images)} images, {len(labels)} labels")
        if missing_labels:
            print(f"  ‚ö†Ô∏è  {len(missing_labels)} ·∫£nh thi·∫øu label")
        if missing_images:
            print(f"  ‚ö†Ô∏è  {len(missing_images)} label th·ª´a (kh√¥ng c√≥ ·∫£nh)")

check_dataset()

# =====================================================
# 2Ô∏è‚É£ T·∫°o file YAML v·ªõi absolute path
# =====================================================
yaml_content = f"""
path: {os.path.abspath('datasets')}
train: images/train
val: images/val
test: images/test

names:
  0: crop
  1: weed
"""

with open("weed.yaml", "w") as f:
    f.write(yaml_content)

print("\n‚úì File weed.yaml ƒë√£ t·∫°o v·ªõi absolute path!")

# =====================================================
# 3Ô∏è‚É£ Hu·∫•n luy·ªán m√¥ h√¨nh (c√≥ th·ªÉ t·∫Øt ƒë·ªÉ test nhanh)
# =====================================================
TRAIN_MODEL = True  # ƒê·ªïi th√†nh False ƒë·ªÉ skip training

if TRAIN_MODEL:
    print("\n=== B·∫Øt ƒë·∫ßu hu·∫•n luy·ªán ===")
    model = YOLO("yolov8n.pt")
    
    model.train(
        data="weed.yaml",
        epochs=50,
        imgsz=640,
        batch=8,
        device=0,  # 0=GPU, 'cpu'=CPU
        project="weed_detection",
        name="yolov8_weed",
        exist_ok=True,
        patience=10,  # Early stopping sau 10 epochs kh√¥ng c·∫£i thi·ªán
        save=True,
        verbose=True
    )
    
    best_model_path = "weed_detection/yolov8_weed/weights/best.pt"
else:
    # D√πng model ƒë√£ train s·∫µn
    best_model_path = "weed_detection/yolov8_weed/weights/best.pt"
    if not os.path.exists(best_model_path):
        print("‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y model ƒë√£ train. D√πng pretrained model.")
        best_model_path = "yolov8n.pt"

# =====================================================
# 4Ô∏è‚É£ ƒê√°nh gi√° m√¥ h√¨nh tr√™n validation v√† test set
# =====================================================
model = YOLO(best_model_path)

print("\n=== ƒê√°nh gi√° tr√™n VALIDATION set ===")
val_metrics = model.val(data="weed.yaml", split="val")
print(f"mAP50: {val_metrics.box.map50:.3f}")
print(f"mAP50-95: {val_metrics.box.map:.3f}")

print("\n=== ƒê√°nh gi√° tr√™n TEST set ===")
test_metrics = model.val(data="weed.yaml", split="test")
print(f"mAP50: {test_metrics.box.map50:.3f}")
print(f"mAP50-95: {test_metrics.box.map:.3f}")

# =====================================================
# 5Ô∏è‚É£ D·ª± ƒëo√°n tr√™n ·∫£nh test v·ªõi error handling
# =====================================================
def predict_on_test_images(model, test_dir="datasets/images/test", max_images=5):
    """D·ª± ƒëo√°n tr√™n t·ªëi ƒëa N ·∫£nh test v√† hi·ªÉn th·ªã k·∫øt qu·∫£"""
    test_images = glob.glob(os.path.join(test_dir, "*.jpg"))
    
    if not test_images:
        print(f"‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y ·∫£nh test trong {test_dir}")
        return
    
    print(f"\n=== D·ª± ƒëo√°n tr√™n {min(len(test_images), max_images)} ·∫£nh test ===")
    
    for img_path in test_images[:max_images]:
        print(f"\nüì∑ {os.path.basename(img_path)}")
        results = model.predict(img_path, conf=0.5, verbose=False)
        
        # In th√¥ng tin bbox
        for r in results:
            if len(r.boxes) == 0:
                print("  ‚ûú Kh√¥ng ph√°t hi·ªán object")
            else:
                for box in r.boxes:
                    cls = int(box.cls[0])
                    cls_name = "crop" if cls == 0 else "weed"
                    conf = float(box.conf[0])
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    print(f"  ‚ûú {cls_name.upper()} | Conf: {conf:.2f} | BBox: [{x1:.0f}, {y1:.0f}, {x2:.0f}, {y2:.0f}]")
        
        # L∆∞u ·∫£nh c√≥ bbox (kh√¥ng hi·ªÉn th·ªã t·ª± ƒë·ªông)
        results[0].save(filename=f"output_{os.path.basename(img_path)}")
    
    print(f"\n‚úì ·∫¢nh k·∫øt qu·∫£ ƒë√£ l∆∞u v·ªõi t√™n 'output_*.jpg'")

predict_on_test_images(model, max_images=5)

# =====================================================
# 6Ô∏è‚É£ Real-time detection t·ª´ webcam (tu·ª≥ ch·ªçn)
# =====================================================
def webcam_detection(model):
    """Ph√°t hi·ªán real-time t·ª´ webcam v·ªõi error handling"""
    print("\n=== B·∫Øt ƒë·∫ßu webcam detection (nh·∫•n 'q' ƒë·ªÉ tho√°t) ===")
    
    cap = cv2.VideoCapture(0)
    
    if not cap.isOpened():
        print("‚ö†Ô∏è  Kh√¥ng th·ªÉ m·ªü webcam. Vui l√≤ng ki·ªÉm tra k·∫øt n·ªëi.")
        return
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("‚ö†Ô∏è  L·ªói ƒë·ªçc frame t·ª´ webcam")
            break
        
        results = model.predict(frame, conf=0.5, verbose=False)
        annotated_frame = results[0].plot()
        
        cv2.imshow("Weed Detection - Real-time", annotated_frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
    print("‚úì ƒê√£ ƒë√≥ng webcam")

# Uncomment d√≤ng d∆∞·ªõi ƒë·ªÉ ch·∫°y webcam detection
# webcam_detection(model)

In [1]:
from ultralytics import YOLO
import cv2
import os
import glob

# =====================================================
# C·∫§U H√åNH DATASET
# =====================================================
DATASET_ROOT = "C:\\computervision\\dataset"  # Th∆∞ m·ª•c ch·ª©a train/, valid/, test/
USE_ROBOFLOW_YAML = True  # True = d√πng data.yaml c√≥ s·∫µn, False = t·∫°o m·ªõi

# =====================================================
# 1Ô∏è‚É£ Ki·ªÉm tra dataset v√† t·∫°o b√°o c√°o
# =====================================================
def check_dataset(dataset_path=DATASET_ROOT):
    """Ki·ªÉm tra s·ªë l∆∞·ª£ng images/labels v√† ph√°t hi·ªán file thi·∫øu"""
    print("=== Ki·ªÉm tra dataset ===")
    for split in ["train", "valid", "test"]:
        img_dir = os.path.join(dataset_path, split, "images")
        lbl_dir = os.path.join(dataset_path, split, "labels")
        
        if not os.path.exists(img_dir):
            print(f"‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y: {img_dir}")
            continue
            
        images = set([os.path.splitext(f)[0] for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
        labels = set([os.path.splitext(f)[0] for f in os.listdir(lbl_dir) if f.endswith('.txt')])
        
        missing_labels = images - labels
        missing_images = labels - images
        
        print(f"\n{split.upper()}:")
        print(f"  ‚úì {len(images)} images, {len(labels)} labels")
        if missing_labels:
            print(f"  ‚ö†Ô∏è  {len(missing_labels)} ·∫£nh thi·∫øu label: {list(missing_labels)[:5]}")
        if missing_images:
            print(f"  ‚ö†Ô∏è  {len(missing_images)} label th·ª´a: {list(missing_images)[:5]}")

check_dataset()

# =====================================================
# 2Ô∏è‚É£ Chu·∫©n b·ªã file YAML
# =====================================================
if USE_ROBOFLOW_YAML and os.path.exists(os.path.join(DATASET_ROOT, "data.yaml")):
    yaml_path = os.path.join(DATASET_ROOT, "data.yaml")
    print(f"\n‚úì S·ª≠ d·ª•ng data.yaml c√≥ s·∫µn t·ª´ Roboflow: {yaml_path}")
    
    # ƒê·ªçc v√† hi·ªÉn th·ªã n·ªôi dung
    with open(yaml_path, "r") as f:
        print("\nN·ªôi dung data.yaml:")
        print(f.read())
else:
    # T·∫°o file YAML m·ªõi
    yaml_path = "weed_custom.yaml"
    yaml_content = f"""path: {os.path.abspath(DATASET_ROOT)}
train: train/images
val: valid/images
test: test/images

names:
  0: crop
  1: weed
"""
    
    with open(yaml_path, "w") as f:
        f.write(yaml_content)
    
    print(f"\n‚úì ƒê√£ t·∫°o file YAML m·ªõi: {yaml_path}")

# =====================================================
# 3Ô∏è‚É£ Hu·∫•n luy·ªán m√¥ h√¨nh
# =====================================================
TRAIN_MODEL = True  # ƒê·ªïi th√†nh False ƒë·ªÉ skip training
EPOCHS = 50
IMG_SIZE = 640
BATCH_SIZE = 8

if TRAIN_MODEL:
    print("\n=== B·∫Øt ƒë·∫ßu hu·∫•n luy·ªán ===")
    model = YOLO("yolov8n.pt")  # yolov8s.pt cho accuracy cao h∆°n
    
    results = model.train(
        data=yaml_path,
        epochs=EPOCHS,
        imgsz=IMG_SIZE,
        batch=BATCH_SIZE,
        device=0,  # 0=GPU, 'cpu'=CPU, [0,1]=multi-GPU
        project="weed_detection",
        name="yolov8_weed",
        exist_ok=True,
        patience=15,  # Early stopping
        save=True,
        plots=True,  # L∆∞u confusion matrix, F1 curve...
        verbose=True
    )
    
    best_model_path = "weed_detection/yolov8_weed/weights/best.pt"
    print(f"\n‚úì Model t·ªët nh·∫•t ƒë√£ l∆∞u t·∫°i: {best_model_path}")
else:
    # D√πng model ƒë√£ train s·∫µn
    best_model_path = "weed_detection/yolov8_weed/weights/best.pt"
    if not os.path.exists(best_model_path):
        print("‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y model ƒë√£ train. S·ª≠ d·ª•ng pretrained yolov8n.pt")
        best_model_path = "yolov8n.pt"

# =====================================================
# 4Ô∏è‚É£ ƒê√°nh gi√° m√¥ h√¨nh
# =====================================================
model = YOLO(best_model_path)

print("\n" + "="*60)
print("ƒê√ÅNH GI√Å M√î H√åNH TR√äN VALIDATION SET")
print("="*60)
val_metrics = model.val(data=yaml_path, split="val")
print(f"\nüìä K·∫øt qu·∫£ Validation:")
print(f"   mAP50:    {val_metrics.box.map50:.3f}")
print(f"   mAP50-95: {val_metrics.box.map:.3f}")
print(f"   Precision: {val_metrics.box.mp:.3f}")
print(f"   Recall:    {val_metrics.box.mr:.3f}")

print("\n" + "="*60)
print("ƒê√ÅNH GI√Å M√î H√åNH TR√äN TEST SET")
print("="*60)
test_metrics = model.val(data=yaml_path, split="test")
print(f"\nüìä K·∫øt qu·∫£ Test:")
print(f"   mAP50:    {test_metrics.box.map50:.3f}")
print(f"   mAP50-95: {test_metrics.box.map:.3f}")
print(f"   Precision: {test_metrics.box.mp:.3f}")
print(f"   Recall:    {test_metrics.box.mr:.3f}")

# =====================================================
# 5Ô∏è‚É£ D·ª± ƒëo√°n tr√™n ·∫£nh test
# =====================================================
def predict_on_test_images(model, test_dir=None, max_images=5, conf=0.25):
    """D·ª± ƒëo√°n tr√™n ·∫£nh test v√† l∆∞u k·∫øt qu·∫£"""
    if test_dir is None:
        test_dir = os.path.join(DATASET_ROOT, "test", "images")
    
    test_images = glob.glob(os.path.join(test_dir, "*.jpg")) + \
                  glob.glob(os.path.join(test_dir, "*.png"))
    
    if not test_images:
        print(f"‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y ·∫£nh test trong {test_dir}")
        return
    
    print(f"\n{'='*60}")
    print(f"D·ª∞ ƒêO√ÅN TR√äN {min(len(test_images), max_images)} ·∫¢NH TEST")
    print(f"{'='*60}")
    
    # T·∫°o th∆∞ m·ª•c output
    output_dir = "predictions"
    os.makedirs(output_dir, exist_ok=True)
    
    for i, img_path in enumerate(test_images[:max_images], 1):
        print(f"\n[{i}/{min(len(test_images), max_images)}] üì∑ {os.path.basename(img_path)}")
        results = model.predict(img_path, conf=conf, verbose=False)
        
        # ƒê·∫øm s·ªë l∆∞·ª£ng t·ª´ng class
        crop_count = 0
        weed_count = 0
        
        for r in results:
            if len(r.boxes) == 0:
                print("  ‚ûú Kh√¥ng ph√°t hi·ªán object n√†o")
            else:
                for box in r.boxes:
                    cls = int(box.cls[0])
                    cls_name = "crop" if cls == 0 else "weed"
                    conf_score = float(box.conf[0])
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    
                    if cls == 0:
                        crop_count += 1
                    else:
                        weed_count += 1
                    
                    print(f"  ‚ûú {cls_name.upper()} | Confidence: {conf_score:.2%} | "
                          f"BBox: [{int(x1)}, {int(y1)}, {int(x2)}, {int(y2)}]")
        
        # T·ªïng k·∫øt
        if crop_count > 0 or weed_count > 0:
            print(f"  üìä T·ªïng: {crop_count} crops, {weed_count} weeds")
        
        # L∆∞u ·∫£nh c√≥ bbox
        output_path = os.path.join(output_dir, f"pred_{os.path.basename(img_path)}")
        results[0].save(filename=output_path)
    
    print(f"\n‚úì ƒê√£ l∆∞u ·∫£nh k·∫øt qu·∫£ v√†o th∆∞ m·ª•c: {output_dir}/")

predict_on_test_images(model, max_images=5, conf=0.25)



print("\n" + "="*60)
print("‚úÖ HO√ÄN TH√ÄNH!")
print("="*60)

=== Ki·ªÉm tra dataset ===

TRAIN:
  ‚úì 1661 images, 1661 labels

VALID:
  ‚úì 580 images, 580 labels

TEST:
  ‚úì 245 images, 245 labels

‚úì S·ª≠ d·ª•ng data.yaml c√≥ s·∫µn t·ª´ Roboflow: C:\computervision\dataset\data.yaml

N·ªôi dung data.yaml:
train: ../train/images
val: ../valid/images
test: ../test/images

nc: 1
names: ['0 ridderzuring']

roboflow:
  workspace: roboflow-100
  project: grass-weeds
  version: 2
  license: CC BY 4.0
  url: https://universe.roboflow.com/roboflow-100/grass-weeds/dataset/2

=== B·∫Øt ƒë·∫ßu hu·∫•n luy·ªán ===
Ultralytics 8.3.233  Python-3.10.8 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3050 Ti Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\computervision\dataset\data.yaml, degrees=0.0, deter