# Ethi Medical Products Object Detection

**YOLOv5 Object Detection Training Notebook**

**Step 1: Environment Setup and Imports**

First, we'll import the necessary libraries and set up the environment.

In [1]:
# Import libraries for file handling and data splitting
import os, sys
import random
import shutil
# Verify if the environment is set up properly
# Ensure the necessary dependencies are installed like torch, torchvision, and other YOLOv5 requirements
#!pip install opencv-python torch torchvision
# Add the 'scripts' directory to the Python path for module imports
sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))

from logger import Logger
# Set instance of the Logger class
logger = Logger(log_file='../yolo_training.log')
sys.path.append('../yolov5')  # Adjust the path based on your directory structure

**Step 2: Define Paths and Prepare Directories**

We define the paths for the source images and create directories for the training and validation sets.

In [2]:
# Define the directory paths
photos_dir = '../data/photos'  # Directory containing all your images
train_dir = '../data/train/images'  # Directory for training images
val_dir = '../data/valid/images'  # Directory for validation images


try:
    # Create directories for train and validation sets if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    
    # Print the number of images available
    print(f"Total images available: {len(os.listdir(photos_dir))}")
    logger.info(f'{len(os.listdir(photos_dir))} total images available in {photos_dir}')
except Exception as e:
    logger.info(f'Error: {e}')


Total images available: 567


**Step 3: Split the Dataset**

Split the images into training and validation datasets with a specified ratio (default is 80% training, 20% validation).

In [3]:
def split_dataset(source_dir, train_dir, val_dir, split_ratio=0.8):
    # List all images in the source directory
    images = [f for f in os.listdir(source_dir) if f.endswith('.jpg')]
    random.shuffle(images)  # Shuffle the images for random selection

    # Calculate the split index
    split_index = int(len(images) * split_ratio)
    train_images = images[:split_index]
    val_images = images[split_index:]

    # Copy images to the respective directories
    for image in train_images:
        shutil.copy(os.path.join(source_dir, image), train_dir)
    for image in val_images:
        shutil.copy(os.path.join(source_dir, image), val_dir)
    
    print(f"Training images: {len(train_images)}")
    print(f"Validation images: {len(val_images)}")


if __name__=='__main__':
    
    try:
        # Call the function to split the dataset
        split_dataset(photos_dir, train_dir, val_dir)
        logger.info(f'Splited the dataset into train and validation sets.')
    except Exception as e:
        logger.info(f'Error: {e}')


Training images: 453
Validation images: 114


**Step 4: Prepare YOLOv5 Environment**

Clone the YOLOv5 repository if it isn't already cloned, and install the necessary requirements.

In [4]:
# Clone the YOLOv5 repository (only run this if you don't have YOLOv5 cloned already)

# uncomment below line, and run the cell

#!git clone https://github.com/ultralytics/yolov5.git

# Change the working directory to yolov5

# uncomment below line, and run the cell
#%cd yolov5

# Install required packages for YOLOv5
# uncomment below line, and run the cell

#!pip install -r requirements.txt

#' to automatically track and visualize YOLOv5 🚀 runs in Comet
# uncomment below line, and run the cell
#%pip install comet_ml


**Step 5: Create the Data YAML File**

The YAML file will define paths to training and validation data, the number of classes, and their names.

In [4]:
# Define the content of the YAML file
yaml_content = """
train: D:/KMAI3/EMB-DataWarehouse/data/train/images  # Update this if you have a separate directory for training
val: D:/KMAI3/EMB-DataWarehouse/data/valid/images    # Update this if you have a separate directory for validation

nc: 9  # Number of classes
names: [
  'cosmotic',
  'food-package', 
  'supliment', 
  'formula-milk', 
  'lotion-moisturizer', 
  'medicine', 
  'mini-drop', 
  'pregnancy', 
  'other'
]
"""

# Write the content to a file
with open("medical_dataset.yml", "w") as file:
    file.write(yaml_content)

# Verify the file was created

print(os.path.exists("medical_dataset.yml"))  # Should return True

# Check the existence of the image directory
print(os.path.exists('D:/KMAI3/EMB-DataWarehouse/data/train/images'))

True
True


**Step 6: Train the YOLOv5 Model**

Train the model using the dataset and configuration set up in the previous steps.

Downloading labeled dataset from roboflow

In [5]:
#%pip install roboflow
from roboflow import Roboflow
rf = Roboflow(api_key="eDraTy7oHELDj6QbeQ2L")
project = rf.workspace("medical-products").project("medical-products")
version = project.version(1)
dataset = version.download("yolov5")
                

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in medical-products-1 to yolov5pytorch:: 100%|██████████| 7971/7971 [00:01<00:00, 5885.18it/s] 





Extracting Dataset Version Zip to medical-products-1 in yolov5pytorch:: 100%|██████████| 392/392 [00:00<00:00, 1561.69it/s]


**Use a Pretrained Model (Transfer Learning)**

Fine-tuning a smaller pre-trained model (like yolov5n.pt or yolov5s.pt instead of larger models like yolov5x.pt) will use less memory:
- Decrease the Batch Size
- Reduce Image Size

In [6]:
!python ../yolov5/detect.py --weights yolov5m.pt --source ../data/photos/ --save-txt --save-conf --project ../data/results --name run1

[34m[1mdetect: [0mweights=['yolov5m.pt'], source=../data/photos/, data=..\yolov5\data\coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_format=0, save_csv=False, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=../data/results, name=run1, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5  v7.0-398-g5cdad892 Python-3.12.8 torch-2.6.0+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m.pt to yolov5m.pt...

  0%|          | 0.00/40.8M [00:00<?, ?B/s]
  0%|          | 128k/40.8M [00:00<01:10, 609kB/s]
  1%|          | 256k/40.8M [00:00<00:47, 887kB/s]
  1%|          | 384k/40.8M [00:00<00:42, 998kB/s]
  2%|▏         | 640k/40.8M [00:00<00:35, 1.20MB/s]
  2%|▏         | 896k/40.8M [00:00<00:31, 1.34MB/s]
  3%|▎         | 1.12M/40.8

In [7]:
from ultralytics import YOLO

# Load a coco pretrained YOLOv8 model
model = YOLO("yolov5s.pt")

# Train the model on the specified dataset
results = model.train(data="medical_dataset.yml", epochs=50, imgsz=640)

PRO TIP  Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5su.pt to 'yolov5su.pt'...


100%|██████████| 17.7M/17.7M [00:02<00:00, 6.55MB/s]


Ultralytics 8.3.70  Python-3.12.8 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i5-11320H 3.20GHz)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov5s.pt, data=medical_dataset.yml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show

[34m[1mtrain: [0mScanning D:\KMAI3\EMB-DataWarehouse\data\train\labels... 0 images, 567 backgrounds, 0 corrupt: 100%|██████████| 567/567 [00:00<00:00, 750.99it/s]

[34m[1mtrain: [0mNew cache created: D:\KMAI3\EMB-DataWarehouse\data\train\labels.cache



[34m[1mval: [0mScanning D:\KMAI3\EMB-DataWarehouse\data\valid\labels... 0 images, 426 backgrounds, 0 corrupt: 100%|██████████| 426/426 [00:00<00:00, 846.52it/s]

[34m[1mval: [0mNew cache created: D:\KMAI3\EMB-DataWarehouse\data\valid\labels.cache





Plotting labels to D:\KMAI3\EMB-DataWarehouse\runs\detect\train\labels.jpg... 
zero-size array to reduction operation maximum which has no identity
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000769, momentum=0.9) with parameter groups 69 weight(decay=0.0), 76 weight(decay=0.0005), 75 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mD:\KMAI3\EMB-DataWarehouse\runs\detect\train[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G          0        396          0          0        640:  17%|█▋        | 6/36 [00:53<04:28,  8.95s/it]


KeyboardInterrupt: 

In [None]:
!python train.py --img 640 --batch 16 --epochs 50 --data ethio_medical_dataset.yml --weights yolov5s.pt

In [None]:
# Train the model using the pre-trained YOLOv5 model checkpoint (yolov5s.pt)
#!python train.py --img 640 --batch 16 --epochs 50 --data ethio_medical_dataset.yml --weights yolov5s.pt
# Run this in your notebook cell
#!python ../yolov5/train.py --img 640 --batch 16 --epochs 50 --data ../notebooks/ethio_medical_dataset.yml --weights yolov5s.pt --name fine_tuned_model
!python ../yolov5/train.py --img 320 --batch 8 --epochs 50 --data ../notebooks/ethio_medical_dataset.yml --weights yolov5n.pt --name fine_tuned_model


**Step 5: Processing the Detection Results**

Extract relevant data from the detection results, such as bounding box coordinates, confidence scores, and class labels.

To process detection results using YOLOv5 and extract relevant data, we can modify the inference script (detect.py) or create a custom script that reads the detection outputs. Below is a step-by-step guide:

**Step 1: Predict the the whole images dataset**

Based on the transfer learning and trained small subset of the labeled data, predict the whole images

In [None]:
!python ../yolov5/detect.py --weights ../yolov5/runs/train/fine_tuned_model12/weights/best.pt --img 320 --conf 0.25 --save-conf --source ../data/photos --name predictions --save-txt


**Storing detection data to a database table**

In [None]:
!python ../scripts/store_detection_results.py


Display the results in the plot

In [None]:
from IPython.display import Image, display

# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/results.png'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))


In [None]:
# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/labels.jpg'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))

In [None]:
# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/confusion_matrix.png'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))

In [None]:
# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/labels_correlogram.jpg'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))

In [None]:
# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/F1_curve.png'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))

In [None]:
# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/P_curve.png'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))

In [None]:
# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/PR_curve.png'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))

In [None]:
# Replace this with the path to your PNG results
png_file_path = '../yolov5/runs/train/fine_tuned_model12/R_curve.png'  # Adjust the path accordingly

# Display the PNG image
display(Image(filename=png_file_path))