In [10]:
import os
from PIL import Image

def load_images(folder, extensions=(".jpg",".jpeg",".png",".JPG",".JPEG",".PNG")):
    images = []
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith(extensions):
                img_path = os.path.join(root, file)
                try:
                    img = Image.open(img_path).convert("RGB")  # force RGB
                    images.append(img)
                except Exception as e:
                    print(f"Skipped {file}: {e}")
    return images

# Reload with Pillow
train_images = load_images(r"C:\Users\Windows\Downloads\Data-final_pro\Data\Train")
test_images = load_images(r"C:\Users\Windows\Downloads\Data-final_pro\Data\Test")

print(f"Train images loaded: {len(train_images)}")
print(f"Test images loaded: {len(test_images)}")



Train images loaded: 240
Test images loaded: 60


In [11]:


train_dir = r"C:\Users\Windows\Downloads\Data-final_pro\Data\Train"
test_dir = r"C:\Users\Windows\Downloads\Data-final_pro\Data\Test"

def count_files_by_extension(folder):
    ext_counts = {}
    for root, _, files in os.walk(folder):
        for file in files:
            ext = os.path.splitext(file)[1].lower()
            ext_counts[ext] = ext_counts.get(ext, 0) + 1
    return ext_counts

print("Train file types:", count_files_by_extension(train_dir))
print("Test file types:", count_files_by_extension(test_dir))

Train file types: {'.jpg': 240, '.xml': 240}
Test file types: {'.jpg': 60, '.xml': 60}


In [12]:
##Data Cleaning & Preprocessing
import cv2
import numpy as np

def preprocess_image(img, target_size=(640, 640)):
    # Resize
    img_resized = cv2.resize(img, target_size)
    # Normalize to [0,1]
    img_normalized = img_resized.astype(np.float32) / 255.0
    return img_normalized

# Apply preprocessing to all train images
train_images_preprocessed = [preprocess_image(np.array(img)) for img in train_images]
test_images_preprocessed = [preprocess_image(np.array(img)) for img in test_images]

print(f"Train preprocessed: {len(train_images_preprocessed)}")
print(f"Test preprocessed: {len(test_images_preprocessed)}")

Train preprocessed: 240
Test preprocessed: 60


In [13]:
##Parsing Annotations (Bounding Boxes + Labels)

import xml.etree.ElementTree as ET

def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes = []
    labels = []
    
    for obj in root.findall("object"):
        label = obj.find("name").text
        bbox = obj.find("bndbox")
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)
        
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)
    
    return boxes, labels

# Example usage
xml_path = r"C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_77.xml"
boxes, labels = parse_annotation(xml_path)
print("Bounding Boxes:", boxes)
print("Labels:", labels)

Bounding Boxes: [[71, 60, 175, 164], [12, 22, 105, 111], [134, 23, 243, 115], [107, 126, 216, 229], [207, 138, 298, 229]]
Labels: ['apple', 'apple', 'apple', 'apple', 'apple']


In [16]:
##Batch Parsing All Annotations

import glob

def parse_all_annotations(folder):
    dataset = []
    image_files = glob.glob(os.path.join(folder, "*.jpg")) + \
                  glob.glob(os.path.join(folder, "*.JPG")) + \
                  glob.glob(os.path.join(folder, "*.png")) + \
                  glob.glob(os.path.join(folder, "*.jpeg"))
    
    for img_file in image_files:
        xml_file = os.path.splitext(img_file)[0] + ".xml"  # same name, .xml extension
        if os.path.exists(xml_file):
            boxes, labels = parse_annotation(xml_file)
            dataset.append({
                "image_path": img_file,
                "boxes": boxes,
                "labels": labels
            })
        else:
            print(f"No annotation found for {img_file}")
    
    return dataset

# Reload
train_dataset = parse_all_annotations(r"C:\Users\Windows\Downloads\Data-final_pro\Data\Train")
test_dataset = parse_all_annotations(r"C:\Users\Windows\Downloads\Data-final_pro\Data\Test")

print(f"Train dataset entries: {len(train_dataset)}")
print(f"Test dataset entries: {len(test_dataset)}")

# Peek safely
if train_dataset:
    print(train_dataset[0])
else:
    print("Train dataset is empty!")

Train dataset entries: 480
Test dataset entries: 120
{'image_path': 'C:\\Users\\Windows\\Downloads\\Data-final_pro\\Data\\Train\\apple_1.jpg', 'boxes': [[8, 15, 331, 349]], 'labels': ['apple']}


In [26]:
import os
import cv2

# Define label mapping
label_map = {"apple": 0, "banana": 1, "orange": 2}

def convert_to_yolo_format(image_path, boxes, labels, output_dir):
    # Make sure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Read image to get dimensions
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Image not found or cannot be read: {image_path}")
    h, w, _ = img.shape
    
    yolo_lines = []
    for box, label in zip(boxes, labels):
        xmin, ymin, xmax, ymax = box
        x_center = ((xmin + xmax) / 2) / w
        y_center = ((ymin + ymax) / 2) / h
        width = (xmax - xmin) / w
        height = (ymax - ymin) / h
        class_id = label_map[label]
        
        # Format: class_id x_center y_center width height
        yolo_lines.append(f"{class_id} {x_center} {y_center} {width} {height}")
    
    # Save YOLO annotation file
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    yolo_file = os.path.join(output_dir, base_name + ".txt")
    with open(yolo_file, "w") as f:
        f.write("\n".join(yolo_lines))

# Example conversion for one entry
sample = train_dataset[0]
convert_to_yolo_format(sample["image_path"], sample["boxes"], sample["labels"], "yolo_labels")

print("YOLO annotation created for:", sample["image_path"])

YOLO annotation created for: C:\Users\Windows\Downloads\Data-final_pro\Data\Train\apple_1.jpg


In [37]:
def batch_convert_to_yolo(dataset, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for entry in dataset:
        convert_to_yolo_format(entry["image_path"], entry["boxes"], entry["labels"], output_dir)

# Convert train and test datasets
batch_convert_to_yolo(train_dataset, "yolo_labels/train")
batch_convert_to_yolo(test_dataset, "yolo_labels/val")

print("YOLO annotations created for full train and test datasets!")

YOLO annotations created for full train and test datasets!


In [42]:
def organize_yolo_dataset(train_dataset, test_dataset, base_dir="dataset"):
    for sub in ["images/train", "images/val", "labels/train", "labels/val"]:
        os.makedirs(os.path.join(base_dir, sub), exist_ok=True)

    # Train set
    for entry in train_dataset:
        print("Processing train image:", entry["image_path"])
        shutil.copy(entry["image_path"], os.path.join(base_dir, "images/train"))
        convert_to_yolo_format(entry["image_path"], entry["boxes"], entry["labels"], os.path.join(base_dir, "labels/train"))

    # Validation set
    for entry in test_dataset:
        print("Processing val image:", entry["image_path"])
        shutil.copy(entry["image_path"], os.path.join(base_dir, "images/val"))
        convert_to_yolo_format(entry["image_path"], entry["boxes"], entry["labels"], os.path.join(base_dir, "labels/val"))

    print("✅ Dataset organized in YOLOv8 format!")

In [47]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
model.train(
    data=r"C:\Users\Windows\dataset\data.yaml",
    epochs=50,
    imgsz=640,
    batch=8,
    device="cpu"
)

Ultralytics 8.4.6  Python-3.12.9 torch-2.9.1+cpu CPU (11th Gen Intel Core i5-1145G7 @ 2.60GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Users\Windows\dataset\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=0.0, name=train7, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=1

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x00000182BE506F90>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          

In [50]:
from ultralytics import YOLO

# Load your trained model
model = YOLO("C:/Users/Windows/runs/detect/train7/weights/best.pt")

# Run validation
metrics = model.val()

Ultralytics 8.4.6  Python-3.12.9 torch-2.9.1+cpu CPU (11th Gen Intel Core i5-1145G7 @ 2.60GHz)
Model summary (fused): 73 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.1 ms, read: 333.6350.4 MB/s, size: 80.7 KB)
[K[34m[1mval: [0mScanning C:\Users\Windows\dataset\labels\val.cache... 60 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 60/60  0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 4/4 1.4s/it 5.6s2.2ss
                   all         60        117      0.899      0.892      0.942      0.735
                 apple         24         35      0.889      0.943      0.953      0.819
                banana         22         40      0.854        0.9      0.918      0.641
                orange         22         42      0.954      0.833      0.954      0.745
Speed: 1.7ms preprocess, 73.5ms inference, 0.0ms loss, 4.2ms postprocess per image
Results saved to 

In [53]:
from ultralytics import YOLO

model = YOLO("C:/Users/Windows/runs/detect/train7/weights/best.pt")
results = model.predict("C:/Users/Windows/Downloads/Data-final_pro/Data/Test/orange_87.jpg", device="cpu")

# Show the first result
results[0].show()


image 1/1 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\orange_87.jpg: 640x640 3 oranges, 90.9ms
Speed: 4.5ms preprocess, 90.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


In [54]:
results = model.predict("C:/Users/Windows/Downloads/Data-final_pro/Data/Test", device="cpu", save=True)


image 1/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_77.jpg: 512x640 5 apples, 118.3ms
image 2/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_78.jpg: 640x640 1 apple, 102.6ms
image 3/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_79.jpg: 640x640 1 apple, 74.9ms
image 4/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_80.jpg: 544x640 1 apple, 95.0ms
image 5/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_81.jpg: 640x576 1 apple, 86.8ms
image 6/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_82.jpg: 640x576 1 apple, 73.2ms
image 7/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_83.jpg: 608x640 1 apple, 109.0ms
image 8/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_84.jpg: 448x640 5 apples, 99.7ms
image 9/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_85.jpg: 512x640 1 apple, 80.2ms
image 10/60 C:\Users\Windows\Downloads\Data-final_pro\Data\Test\apple_86.jpg: 448x640

In [55]:
from ultralytics import YOLO

model = YOLO("C:/Users/Windows/runs/detect/train7/weights/best.pt")
model.export(format="onnx")

Ultralytics 8.4.6  Python-3.12.9 torch-2.9.1+cpu CPU (11th Gen Intel Core i5-1145G7 @ 2.60GHz)
 ProTip: Export to OpenVINO format for best performance on Intel hardware. Learn more at https://docs.ultralytics.com/integrations/openvino/
Model summary (fused): 73 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from 'C:\Users\Windows\runs\detect\train7\weights\best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 7, 8400) (6.0 MB)
[31m[1mrequirements:[0m Ultralytics requirements ['onnx>=1.12.0,<2.0.0', 'onnxslim>=0.1.71', 'onnxruntime'] not found, attempting AutoUpdate...
Collecting onnx<2.0.0,>=1.12.0
  Downloading onnx-1.20.1-cp312-abi3-win_amd64.whl.metadata (8.6 kB)
Collecting onnxslim>=0.1.71
  Downloading onnxslim-0.1.82-py3-none-any.whl.metadata (10 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.23.2-cp312-cp312-win_amd64.whl.metadata (5.3 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.



[34m[1mONNX:[0m slimming with onnxslim 0.1.82...
[34m[1mONNX:[0m export success  79.2s, saved as 'C:\Users\Windows\runs\detect\train7\weights\best.onnx' (11.7 MB)

Export complete (79.5s)
Results saved to [1mC:\Users\Windows\runs\detect\train7\weights[0m
Predict:         yolo predict task=detect model=C:\Users\Windows\runs\detect\train7\weights\best.onnx imgsz=640 
Validate:        yolo val task=detect model=C:\Users\Windows\runs\detect\train7\weights\best.onnx imgsz=640 data=C:\Users\Windows\dataset\data.yaml  
Visualize:       https://netron.app


'C:\\Users\\Windows\\runs\\detect\\train7\\weights\\best.onnx'

In [56]:
model.export(format="torchscript")

Ultralytics 8.4.6  Python-3.12.9 torch-2.9.1+cpu CPU (11th Gen Intel Core i5-1145G7 @ 2.60GHz)
Model summary (fused): 73 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from 'C:\Users\Windows\runs\detect\train7\weights\best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 7, 8400) (6.0 MB)

[34m[1mTorchScript:[0m starting export with torch 2.9.1+cpu...
[34m[1mTorchScript:[0m export success  2.3s, saved as 'C:\Users\Windows\runs\detect\train7\weights\best.torchscript' (11.9 MB)

Export complete (2.6s)
Results saved to [1mC:\Users\Windows\runs\detect\train7\weights[0m
Predict:         yolo predict task=detect model=C:\Users\Windows\runs\detect\train7\weights\best.torchscript imgsz=640 
Validate:        yolo val task=detect model=C:\Users\Windows\runs\detect\train7\weights\best.torchscript imgsz=640 data=C:\Users\Windows\dataset\data.yaml  
Visualize:       https://netron.app


'C:\\Users\\Windows\\runs\\detect\\train7\\weights\\best.torchscript'