### 1. Importing Required Libraries for YOLO Model andÂ forÂ Training

In [2]:
!pip install ultralytics torch torchvision


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from ultralytics import YOLO
import torch

### 2.1 Randomly selecting dataset samples from the huge dataset,
### 2.2 Downloading pre-labeled dataset from Kaggle

#### We will be using pre-labeled data. So this code block is not necessary anymore.

In [6]:
import os
import random
import shutil

def sample_dataset(source_dir, target_root_dir, sample_size=100):
    """
    Scans the source directory for ALL subdirectories (classes),
    selects a random subset of images from each, and copies them to a target directory.
    """

    # Supported image extensions
    valid_extensions = {".jpg", ".jpeg"}

    # Create the target root directory if it doesn't exist
    if not os.path.exists(target_root_dir):
        os.makedirs(target_root_dir)
        print(f"Created target root directory: {target_root_dir}")

    # AUTOMATICALLY get all subdirectories in the source folder
    classes_to_process = [d for d in os.listdir(source_dir)
                          if os.path.isdir(os.path.join(source_dir, d))]

    print(f"Found {len(classes_to_process)} classes. Processing all of them...\n")

    total_copied = 0

    for class_name in classes_to_process:
        class_path = os.path.join(source_dir, class_name)

        # List all valid image files
        images = [f for f in os.listdir(class_path)
                  if os.path.splitext(f)[1].lower() in valid_extensions]

        # Determine sample count (take all if less than sample_size)
        num_to_sample = min(len(images), sample_size)

        if num_to_sample == 0:
            print(f"Warning: No images found in '{class_name}'. Skipping.")
            continue

        # Randomly select images
        selected_images = random.sample(images, num_to_sample)

        # Define new folder name (e.g., "sampled-apple")
        new_class_name = f"sampled-{class_name}"
        target_class_path = os.path.join(target_root_dir, new_class_name)

        # Create class directory
        if not os.path.exists(target_class_path):
            os.makedirs(target_class_path)

        # Copy images
        for image in selected_images:
            src_file = os.path.join(class_path, image)
            dst_file = os.path.join(target_class_path, image)
            shutil.copy2(src_file, dst_file)

        print(f"{class_name}: Copied {num_to_sample} images -> '{new_class_name}'")
        total_copied += num_to_sample

    print("-" * 40)
    print(f"Process completed. Total of {total_copied} images copied to '{target_root_dir}'.")

# --- CONFIGURATION & RUN ---

# Set your folder paths here
SOURCE_FOLDER = "unlabeled-data"
TARGET_FOLDER = "sampled-unlabeled-data"
SAMPLES_PER_CLASS = 100

# Run directly
sample_dataset(SOURCE_FOLDER, TARGET_FOLDER, sample_size=SAMPLES_PER_CLASS)

Found 19 classes. Processing all of them...

apple: Copied 100 images -> 'sampled-apple'
bell pepper: Copied 100 images -> 'sampled-bell pepper'
strawberry: Copied 100 images -> 'sampled-strawberry'
avocado: Copied 100 images -> 'sampled-avocado'
pomegranate: Copied 100 images -> 'sampled-pomegranate'
pumpkin: Copied 100 images -> 'sampled-pumpkin'
kiwi: Copied 100 images -> 'sampled-kiwi'
lemon: Copied 100 images -> 'sampled-lemon'
mandarine: Copied 100 images -> 'sampled-mandarine'
grapefruit: Copied 100 images -> 'sampled-grapefruit'
quince: Copied 100 images -> 'sampled-quince'
coconut: Copied 100 images -> 'sampled-coconut'
eggplant: Copied 100 images -> 'sampled-eggplant'
banana: Copied 100 images -> 'sampled-banana'
zucchini: Copied 100 images -> 'sampled-zucchini'
pineapple: Copied 100 images -> 'sampled-pineapple'
tomato: Copied 100 images -> 'sampled-tomato'
persimmon: Copied 100 images -> 'sampled-persimmon'
orange: Copied 100 images -> 'sampled-orange'
---------------------

#### Downloading Pre-labeled Dataset From Kaggle

In [8]:
import kagglehub
import shutil
import os

downloaded_path = kagglehub.dataset_download("kapturovalexander/fruits-by-yolo-fruits-detection")

target_path = "label-dataset"


if not os.path.exists(target_path):
    shutil.copytree(downloaded_path, target_path)
    print(f"Dataset is copied to this path: {os.path.abspath(target_path)}")
else:
    print("This path already exists.")

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/kapturovalexander/fruits-by-yolo-fruits-detection?dataset_version_number=11...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 115M/115M [00:35<00:00, 3.36MB/s] 

Extracting files...





Dataset is copied to this path: /Users/ramazanyildiz/PycharmProjects/object-detection-localization/label-dataset


### 3. Importing Model (YOLOv8m)

In [3]:
model = YOLO("yolov8m.pt")
print(model.info())

YOLOv8m summary: 169 layers, 25,902,640 parameters, 0 gradients, 79.3 GFLOPs
(169, 25902640, 0, 79.3204224)


In [5]:
# Testing the model with a jpg file
results = model.predict(source='https://ultralytics.com/images/bus.jpg', save=True, conf=0.5)

for r in results:
    r.show()


Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 /Users/ramazanyildiz/PycharmProjects/object-detection-localization/bus.jpg: 640x480 4 persons, 1 bus, 328.9ms
Speed: 22.7ms preprocess, 328.9ms inference, 16.8ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1m/Users/ramazanyildiz/PycharmProjects/object-detection-localization/runs/detect/predict[0m


### 4. Model Training

In [None]:
results = model.train(
    data='labeled-dataset/data.yaml',
    epochs=50,
    imgsz=640,
    device='mps',
    batch=4,
    workers=0,
)

New https://pypi.org/project/ultralytics/8.3.239 available ðŸ˜ƒ Update with 'pip install -U ultralytics'
Ultralytics 8.3.227 ðŸš€ Python-3.13.0 torch-2.9.0 MPS (Apple M1)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=labeled-dataset/data.yaml, degrees=0.0, deterministic=True, device=mps, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train3, nbs=64, nms=False, opset=None, opt

##### failed. no enough gpu. we will continue with google colab