In [None]:
!pip install Pillow opencv-python numpy matplotlib seaborn
!pip install ultralytics

In [None]:
# Cell 2: Import Libraries
# This cell imports all the modules needed for data preprocessing.
import os
import shutil
import random
from collections import defaultdict
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Cell 4: After Manual Annotation: Structure Data for YOLO Training (REVISED)
# This cell is revised to correctly process the 'train', 'val', 'test' structure from Roboflow's export.

print("\n--- Step 2: AFTER MANUAL ANNOTATION: Structure Data for YOLO Training (REVISED) ---")
print("This code processes the output from your Roboflow export and organizes it for model training.")

# IMPORTANT: Configure this path AFTER you have completed the manual annotation and exported your data from Roboflow.
# This 'roboflow_export_root' should be the path to the folder that Roboflow unzipped to.
# For example, if you unzipped 'your_project_name.zip' to 'my_roboflow_data', then set this to 'my_roboflow_data'.
roboflow_export_root = 'annotation_data'

# Define the final main dataset directory for YOLO training (detection model).
yolo_dataset_dir = 'yolo_medical_device_dataset'
os.makedirs(os.path.join(yolo_dataset_dir, 'images', 'train'), exist_ok=True)
os.makedirs(os.path.join(yolo_dataset_dir, 'images', 'val'), exist_ok=True)
os.makedirs(os.path.join(yolo_dataset_dir, 'images', 'test'), exist_ok=True)
os.makedirs(os.path.join(yolo_dataset_dir, 'labels', 'train'), exist_ok=True)
os.makedirs(os.path.join(yolo_dataset_dir, 'labels', 'val'), exist_ok=True)
os.makedirs(os.path.join(yolo_dataset_dir, 'labels', 'test'), exist_ok=True)

# Define your class names for YOLO. These MUST match the labels used in your annotation tool
# and be in the same order as the class IDs (0, 1, 2, 3, 4) in your YOLO .txt files.
# Make sure the order here matches the order Roboflow assigned (usually alphabetical or by creation order).
yolo_class_names = ['bp_monitor', 'glucose_meter', 'hba1c', 'spo2', 'weighing_scale']

# Collect all image and label file paths from Roboflow's train, val, test subdirectories
all_images = []
all_labels = []

# Roboflow typically exports with 'train', 'valid' (or 'val'), and 'test' subfolders.
# We will iterate through these.
roboflow_splits = ['train', 'valid', 'test']

for split_folder in roboflow_splits:
    images_path = os.path.join(roboflow_export_root, split_folder, 'images')
    labels_path = os.path.join(roboflow_export_root, split_folder, 'labels')

    if not os.path.exists(images_path) or not os.path.exists(labels_path):
        print(f"Warning: '{split_folder}/images' or '{split_folder}/labels' not found in '{roboflow_export_root}'. Skipping this split.")
        continue

    for img_name in os.listdir(images_path):
        if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_base_name = os.path.splitext(img_name)[0]
            label_file_name = img_base_name + '.txt'
            label_path = os.path.join(labels_path, label_file_name)

            if os.path.exists(label_path):
                all_images.append(os.path.join(images_path, img_name))
                all_labels.append(label_path)
            else:
                print(f"Warning: No label found for image: {os.path.join(images_path, img_name)}. Skipping.")

# Create a list of (image_path, label_path) tuples
valid_pairs = list(zip(all_images, all_labels))

print(f"Found {len(valid_pairs)} valid image-label pairs collected from Roboflow export.")

# Shuffle and split the data for reproducibility (even if Roboflow pre-split, we re-split for consistency)
random.seed(42) # For reproducibility
random.shuffle(valid_pairs)

train_split_ratio = 0.7
val_split_ratio = 0.2
test_split_ratio = 0.1

num_samples = len(valid_pairs)
num_train = int(num_samples * train_split_ratio)
num_val = int(num_samples * val_split_ratio)
num_test = num_samples - num_train - num_val # Ensures all samples are covered

train_data = valid_pairs[:num_train]
val_data = valid_pairs[num_train : num_train + num_val]
test_data = valid_pairs[num_train + num_val :]

print(f"Splitting data: Train={len(train_data)}, Validation={len(val_data)}, Test={len(test_data)}")

# Function to copy files to their respective directories
def copy_files_to_split(data_list, split_name, base_dir):
    print(f"Copying files for {split_name} split...")
    for img_path, lbl_path in data_list:
        # Copy image
        dest_img_path = os.path.join(base_dir, 'images', split_name, os.path.basename(img_path))
        shutil.copy(img_path, dest_img_path)
        # Copy label
        dest_lbl_path = os.path.join(base_dir, 'labels', split_name, os.path.basename(lbl_path))
        shutil.copy(lbl_path, dest_lbl_path)

copy_files_to_split(train_data, 'train', yolo_dataset_dir)
copy_files_to_split(val_data, 'val', yolo_dataset_dir)
copy_files_to_split(test_data, 'test', yolo_dataset_dir)

print("\nData organized into YOLO format successfully!")
print(f"Check the '{yolo_dataset_dir}' directory for the structured dataset.")

# Create data.yaml for the Detection Model.
detection_data_yaml_content = f"""
path: {os.path.abspath(yolo_dataset_dir)}
train: images/train
val: images/val
test: images/test

# Classes
nc: {len(yolo_class_names)}
names: {yolo_class_names}
"""

with open(os.path.join(yolo_dataset_dir, 'detection_data.yaml'), 'w') as f:
    f.write(detection_data_yaml_content)

print(f"detection_data.yaml created in {yolo_dataset_dir}.")

In [None]:
# Cell 5: Prepare Cropped Images for Classification Model (CORRECTED AGAIN for YAML path)
# This cell generates a separate dataset of cropped device images, organized by class,
# which is required for training the classification model.

print("\n--- Step 3: Preparing Cropped Images for Classification Model ---")
print("This step generates a separate dataset of cropped device images, organized by class.")

classification_dataset_path = 'classification_dataset'
os.makedirs(os.path.join(classification_dataset_path, 'train'), exist_ok=True)
os.makedirs(os.path.join(classification_dataset_path, 'val'), exist_ok=True)
os.makedirs(os.path.join(classification_dataset_path, 'test'), exist_ok=True)

# Function to crop images based on YOLO labels
def crop_and_save_images(data_list, split_name, base_yolo_dir, base_cls_dir, class_names_map):
    print(f"Cropping and saving for {split_name} split...")
    for img_path, lbl_path in data_list:
        try:
            img = Image.open(img_path).convert("RGB")
            img_width, img_height = img.size

            with open(lbl_path, 'r') as f:
                lines = f.readlines()

            for i, line in enumerate(lines): # Enumerate to create unique names for multiple crops from one image
                parts = line.strip().split()
                if len(parts) == 5:
                    class_id = int(parts[0])
                    # Ensure class_id is within the bounds of the class_names_map
                    if class_id >= len(class_names_map) or class_id < 0:
                        print(f"Warning: Class ID {class_id} out of bounds for {img_path}. Skipping bounding box.")
                        continue

                    class_label = class_names_map[class_id]
                    # YOLO format: center_x, center_y, width, height (normalized)
                    center_x, center_y, bbox_width, bbox_height = map(float, parts[1:])

                    # Convert normalized coordinates to pixel coordinates
                    x_center_px = center_x * img_width
                    y_center_px = center_y * img_height
                    bbox_width_px = bbox_width * img_width
                    bbox_height_px = bbox_height * img_height

                    # Calculate xmin, ymin, xmax, ymax
                    xmin = int(x_center_px - (bbox_width_px / 2))
                    ymin = int(y_center_px - (bbox_height_px / 2))
                    xmax = int(x_center_px + (bbox_width_px / 2))
                    ymax = int(y_center_px + (bbox_height_px / 2))

                    # Ensure coordinates are within image bounds
                    xmin = max(0, xmin)
                    ymin = max(0, ymin) # Corrected line from previous fix
                    xmax = min(img_width, xmax)
                    ymax = min(img_height, ymax)

                    if xmax <= xmin or ymax <= ymin:
                        print(f"Warning: Invalid bounding box for {img_path}. Skipping cropping.")
                        continue

                    cropped_img = img.crop((xmin, ymin, xmax, ymax))

                    # Create destination folder for this class if it doesn't exist
                    dest_class_folder = os.path.join(base_cls_dir, split_name, class_label)
                    os.makedirs(dest_class_folder, exist_ok=True)

                    # Save the cropped image with a unique name
                    original_img_name_base = os.path.splitext(os.path.basename(img_path))[0]
                    # Add index (i) for multiple crops from the same original image
                    cropped_img_name = f"{original_img_name_base}_crop{i}.jpg"
                    cropped_img.save(os.path.join(dest_class_folder, cropped_img_name))
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

# Pass the yolo_class_names list directly for mapping class IDs to names.
crop_and_save_images(train_data, 'train', yolo_dataset_dir, classification_dataset_path, yolo_class_names)
crop_and_save_images(val_data, 'val', yolo_dataset_dir, classification_dataset_path, yolo_class_names)
crop_and_save_images(test_data, 'test', yolo_dataset_dir, classification_dataset_path, yolo_class_names)

print("\nCropped images for classification model prepared successfully!")
print(f"Check the '{classification_dataset_path}' directory for the structured classification dataset.")

# --- CORRECTED PART FOR classification_data.yaml CONTENT ---
# Changed 'path: {os.path.abspath(classification_dataset_path)}' to 'path: .'
classification_data_yaml_content = f"""
# Path is relative to where this YAML file (classification_data.yaml) is located
path: {os.path.abspath(classification_dataset_path)}
train: train
val: val
test: test # Added test split as well for completeness if you need to evaluate it

# Classes
nc: {len(yolo_class_names)}
names: {yolo_class_names}
"""

with open(os.path.join(classification_dataset_path, 'classification_data.yaml'), 'w') as f:
    f.write(classification_data_yaml_content)

print(f"classification_data.yaml created in {classification_dataset_path}.")

In [None]:
# Cell 2: Import YOLO and Set Up Paths (UPDATED for Absolute Paths)
from ultralytics import YOLO
import os

# Define the relative paths to your data.yaml files created during preprocessing
relative_detection_data_yaml_path = 'yolo_medical_device_dataset/detection_data.yaml'
relative_classification_data_yaml_path = 'classification_dataset/classification_data.yaml'

# Get the absolute paths for robust use in Colab
detection_data_yaml_path = os.path.abspath(relative_detection_data_yaml_path)
classification_data_yaml_path = os.path.abspath(relative_classification_data_yaml_path)


print(f"Absolute Detection data YAML path: {detection_data_yaml_path}")
print(f"Absolute Classification data YAML path: {classification_data_yaml_path}")

In [None]:
# Cell 3: Train the Device Detection Model

print("--- Starting Device Detection Model Training ---")

# Load a pre-trained YOLOv8 detection model (e.g., 'yolov8n.pt' for the nano version)
# This model has learned general features and will be fine-tuned on your specific medical device dataset.
model_detection = YOLO('yolov8n.pt')

# Train the detection model
results_detection = model_detection.train(
    data=detection_data_yaml_path,
    epochs=100,
    imgsz=640,
    batch=16,
    name='device_detection_model_v1',
    # Fine-tuning techniques as per requirements:
    lr0=0.01,
    lrf=0.01,
    optimizer='AdamW',
    fliplr=0.5,
    mosaic=1.0,
    patience=50,                  # Stop training if no improvement for this many epochs
    cache=True,                   # Cache images for faster training (if enough RAM)
)

print("\n--- Device Detection Model Training Complete ---")
print(f"Model weights saved at: runs/detect/{results_detection.save_dir}/weights/best.pt")

In [None]:
# Cell 4: Train the Device Classification Model

print("\n--- Starting Device Classification Model Training ---")

# Load a YOLOv8 classification model architecture from scratch (no pre-trained weights)
# 'yolov8n-cls.yaml' defines the smallest YOLOv8 classification model architecture.
model_classification = YOLO('yolov8n-cls.yaml')

# Train the classification model
# Adjust epochs, batch size, and other parameters as needed.
results_classification = model_classification.train(
    data='classification_dataset', # Path to your classification dataset directory (parent folder of splits)
    epochs=50,                          # Number of training epochs (adjust as needed)
    imgsz=224,                          # Input image size (common for classification, e.g., 224x224)
    batch=32,                           # Batch size (adjust based on your GPU memory)
    name='device_classification_model_v1', # A name for this training run
    # Fine-tuning techniques:
    lr0=0.01,                           # Initial learning rate
    optimizer='SGD',                    # SGD is a common optimizer for classification
    patience=20,                      # Stop training if no improvement for this many epochs
)

print("\n--- Device Classification Model Training Complete ---")
print(f"Model weights saved at: runs/classify/{results_classification.save_dir}/weights/best.pt")

In [None]:
# To download the entire 'runs' folder (contains all detection and classification training results)
!zip -r /content/runs.zip /content/runs/

from google.colab import files
files.download('/content/runs.zip')