# step 1

In [15]:
import os
import cv2
import glob
import numpy as np
import random
from sklearn.model_selection import train_test_split

# Define dataset paths
data_dir = "./"  # Your working directory with 6 X-rays and 6 annotated images
output_dir = "./processed_split/"
os.makedirs(output_dir, exist_ok=True)

# Create necessary subdirectories
for subdir in ["train", "val", "test"]:
    os.makedirs(os.path.join(output_dir, "images", subdir), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "masks", subdir), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "labels", subdir), exist_ok=True)

# Get all X-ray images (excluding labeled ones)
image_files = sorted(glob.glob(os.path.join(data_dir, "*.JPG")))
image_files = [f for f in image_files if "(1)" not in f]  # Ignore annotated images

# Ensure that each image has a corresponding mask
def get_mask_path(image_path):
    """Finds the corresponding mask for a given X-ray."""
    mask_path = image_path.replace(".JPG", " (1).JPG")
    return mask_path if os.path.exists(mask_path) else None

# Split dataset (4 train, 1 val, 1 test)
train_files, test_files = train_test_split(image_files, test_size=0.2, random_state=42)
val_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)

# Function to preprocess images
def preprocess_image(image_path):
    """Convert X-ray to grayscale, apply CLAHE, and resize."""
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    image = clahe.apply(image)
    
    # Resize to 1024x1024
    image = cv2.resize(image, (1024, 1024))
    
    return image

# Function to preprocess masks
def preprocess_mask(mask_path):
    """Convert annotation mask to binary and resize."""
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)  # Convert to binary
    mask = cv2.resize(mask, (1024, 1024))
    
    return mask

# Save preprocessed images and masks
def process_and_save(files, split_type):
    for image_path in files:
        filename = os.path.basename(image_path)
        mask_path = get_mask_path(image_path)

        # Process and save the X-ray image
        processed_image = preprocess_image(image_path)
        cv2.imwrite(os.path.join(output_dir, "images", split_type, filename), processed_image)

        if mask_path:
            # Process and save the corresponding mask
            processed_mask = preprocess_mask(mask_path)
            cv2.imwrite(os.path.join(output_dir, "masks", split_type, filename), processed_mask)

        print(f"✅ Processed {filename} into {split_type} set")

# Run dataset preprocessing
process_and_save(train_files, "train")
process_and_save(val_files, "val")
process_and_save(test_files, "test")

print("✅ Step 1 Complete! Dataset is ready for YOLO & U-Net training.")


ValueError: With n_samples=1, test_size=0.5 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

## step 2

In [8]:
import os
import cv2
import glob
import numpy as np
import random

# Define paths
data_dir = "./processed/"
output_dir = "./processed_split/"
os.makedirs(output_dir, exist_ok=True)

# Paths for YOLO format
image_dir = os.path.join(output_dir, "images")
mask_dir = os.path.join(output_dir, "masks")  # Only for U-Net
label_dir = os.path.join(output_dir, "labels")  # Only for YOLO

for subdir in ["train", "val", "test"]:
    os.makedirs(os.path.join(image_dir, subdir), exist_ok=True)
    os.makedirs(os.path.join(label_dir, subdir), exist_ok=True)
    os.makedirs(os.path.join(mask_dir, subdir), exist_ok=True)

# Get all images (excluding labeled ones)
image_files = sorted(glob.glob(os.path.join(data_dir, "*.JPG")))
image_files = [f for f in image_files if "_mask" not in f]  # Ignore masks

# Manually assign dataset split
random.shuffle(image_files)
train_files = image_files[:4]  # 4 images for training
val_files = image_files[4:5]  # 1 image for validation
test_files = image_files[5:6]  # 1 image for testing

# Function to convert segmentation mask to YOLO format
def mask_to_yolo(mask_path, image_size=(1024, 1024)):
    """Convert binary mask into YOLO bounding box format."""
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    h, w = image_size
    yolo_labels = []
    
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        x_center = (x + w / 2) / w
        y_center = (y + h / 2) / h
        width = w / w
        height = h / h
        yolo_labels.append(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    return yolo_labels

# Process and save data
def process_and_save(files, split_type):
    for image_path in files:
        filename = os.path.basename(image_path)
        mask_path = image_path.replace(".JPG", "_mask.JPG")

        # Copy images
        cv2.imwrite(os.path.join(image_dir, split_type, filename), cv2.imread(image_path))

        # Handle YOLO conversion
        if os.path.exists(mask_path):
            yolo_labels = mask_to_yolo(mask_path)
            label_filename = filename.replace(".JPG", ".txt")
            with open(os.path.join(label_dir, split_type, label_filename), "w") as f:
                f.write("\n".join(yolo_labels))
            
            # Also save mask for U-Net
            cv2.imwrite(os.path.join(mask_dir, split_type, filename), cv2.imread(mask_path))

        print(f"✅ Processed {filename} into {split_type} set")

# Run processing for all dataset splits
process_and_save(train_files, "train")
process_and_save(val_files, "val")
process_and_save(test_files, "test")

print("✅ Dataset conversion complete!")


✅ Processed 00012_09_20241021155243.JPG into train set
✅ Processed 00026_03_20241021155243.JPG into train set
✅ Processed 00034_15_20240402113805242.JPG into train set
✅ Dataset conversion complete!


## step 3


In [10]:
!pip install ultralytics



Defaulting to user installation because normal site-packages is not writeable
Collecting ultralytics
  Downloading ultralytics-8.3.74-py3-none-any.whl.metadata (35 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting seaborn>=0.11.0 (from ultralytics)
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.74-py3-none-any.whl (914 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m914.7/914.7 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
[?25hDownloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Downloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Downloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Installing collected packages: py-cpuinfo, seaborn, ultralytics-thop, ultralytics
Successfu

In [14]:
from ultralytics import YOLO

# Load YOLOv8 segmentation model
model = YOLO("yolov8m-seg.pt")  # Using medium YOLOv8-seg model

# Train YOLOv8-seg
model.train(
    data="./processed_split/data.yaml",  # 🔹 Now using the YAML file!
    epochs=100,
    imgsz=1024,
    batch=4,
    workers=2,
    patience=10,
    lr0=0.001,  # Learning rate
    optimizer="Adam",
    save=True
)

# Save trained model weights
model.export(format="onnx")  # Convert to ONNX for inference compatibility

print("✅ YOLOv8-Seg training completed! Model saved as best.pt")


Ultralytics 8.3.74 🚀 Python-3.10.12 torch-2.5.1+cu124 CPU (Intel Core(TM) i5-8250U 1.60GHz)
[34m[1mengine/trainer: [0mtask=segment, mode=train, model=yolov8m-seg.pt, data=./processed_split/data.yaml, epochs=100, time=None, patience=10, batch=4, imgsz=1024, save=True, save_period=-1, cache=False, device=None, workers=2, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=Tr

100%|█████████████████████████████████████████| 755k/755k [00:00<00:00, 839kB/s]


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytics

[34m[1mtrain: [0mScanning /home/zigron/Downloads/abdullah/processed_split/labels/train... [0m

[34m[1mtrain: [0mNew cache created: /home/zigron/Downloads/abdullah/processed_split/labels/train.cache





ValueError: not enough values to unpack (expected 3, got 0)