In [16]:
#%pip install -r yolov5/requirements.txt --user 
#%pip install scikit-learn scikit-image azureml-core --user

In [1]:
from util import labeledImage, normalize_coordinates, convert_to_yolo_format
from sklearn.model_selection import train_test_split
import os, shutil, yaml

In [2]:
# Set the path to the root folder that contains images and labels

path_to_final_project = r'final_project_data'

source_images_dir = f'{path_to_final_project}/mos2_preprocessed/'
source_labels_dir = f'{path_to_final_project}/mos2_preprocessed/labels/'

labeled_images = []
tag = 'mos2_preprocessed' 

for file in os.listdir(source_images_dir):
    # find all jpeg file and it's ImageJ label
    if file.endswith(".jpeg"):
        image_path = os.path.join(source_images_dir, file)
        label_path = os.path.join(source_labels_dir, file.split('.')[0] + '.txt')
        labeled_images.append(labeledImage(image_path))
        labeled_images[-1].add_labels_from_file(tag, label_path)

In [3]:
train_and_val_set, test_set = train_test_split(labeled_images, test_size=0.1)
train_set, val_set = train_test_split(train_and_val_set, test_size=(2/9))

len(train_set), len(val_set), len(test_set)

(19, 6, 3)

In [4]:
# Create the molecule_images directory if it doesn't exist
output_dir = os.path.join(os.getcwd(),'mos2_preprocessed_images')
if not os.path.exists(output_dir): os.mkdir(output_dir)

train_dir = os.path.join(output_dir, 'train') 
val_dir   = os.path.join(output_dir, 'val') 
test_dir  = os.path.join(output_dir, 'test') 

# Create the sub-directories
for d in [train_dir, val_dir, test_dir]:
    if not os.path.exists(d): os.mkdir(d)
    
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')
    
    for sub_dir in [images_sub_dir, labels_sub_dir]:
        if not os.path.exists(sub_dir): os.mkdir(sub_dir)

In [5]:
# make unified yolo tags 
tags = [tag]

# zip the dataset
dataset = [(train_dir, train_set),(val_dir, val_set),(test_dir, test_set)]

for d, s in dataset:
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')

    # copy over the images
    for img in s:
        shutil.copyfile(img.path, os.path.join(images_sub_dir, img.name))
    
    # covert ImageJ labels to yolo format and save it to labels_sub_dir
    convert_to_yolo_format(s, labels_sub_dir, tags)

successfully generated labels for image  14_processed.jpeg
successfully generated labels for image  2_processed.jpeg
successfully generated labels for image  17_processed.jpeg
successfully generated labels for image  3_processed.jpeg
successfully generated labels for image  28_processed.jpeg
successfully generated labels for image  16_processed.jpeg
successfully generated labels for image  18_processed.jpeg
successfully generated labels for image  7_processed.jpeg
successfully generated labels for image  8_processed.jpeg
successfully generated labels for image  27_processed.jpeg
successfully generated labels for image  9_processed.jpeg
successfully generated labels for image  26_processed.jpeg
successfully generated labels for image  11_processed.jpeg
successfully generated labels for image  4_processed.jpeg
successfully generated labels for image  15_processed.jpeg
successfully generated labels for image  10_processed.jpeg
successfully generated labels for image  24_processed.jpeg
suc

In [6]:
# generate yolo yaml file
yolo_yaml = os.path.join(output_dir, 'mos2_preprocessed_defect_detection_yolov5.yaml')

with open(yolo_yaml, 'w') as yamlout:
    yaml.dump(
        {'train': train_dir,
         'val': val_dir,
         'nc': len(tags),
         'names': tags},
        yamlout,
        default_flow_style=None,
        sort_keys=False
    )

In [7]:
# Run YOLOv5 training with pretrained weights
# YOLOv5 will download weights if not found
%run yolov5/train.py --img 640 --batch 5 --epochs 1 --data ./mos2_preprocessed_images/mos2_preprocessed_defect_detection_yolov5.yaml --weights yolov5s.pt

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=./mos2_preprocessed_images/mos2_preprocessed_defect_detection_yolov5.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=1, batch_size=5, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=yolov5/data/hyps, resume_evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, ndjson_console=False, ndjson_file=False
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-419-gcd44191c Python-3.9.21 torch-2.7.0+cu126 CPU

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, 

In [8]:
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

In [9]:
yolov5_env = Environment(name="yolov5_env")

# Start from a base docker environments defined by Microsoft
yolov5_env.docker.base_image  = "docker.io/hstirrat/yolov5-env:fixed"

conda_dep = CondaDependencies()
# Indicate which version of python needs to be installed
conda_dep.add_conda_package('python=3.9')

# install all the yolov5 requirement at the image build time
with open('./yolov5/requirements.txt', 'r') as f:
    line = f.readline()
    
    while line != '':    
        # If the line is a comment or empty, skip it    
        if line.startswith('#') or len(line.split()) == 0:
            line = f.readline()
            continue
        # Otherwise add the corresponding package name as a dependency
        conda_dep.add_pip_package(line.split()[0])
        # Then move on to the next line in the requirements.txt file
        line = f.readline()

yolov5_env.python.conda_dependencies=conda_dep

In [10]:
yolov5_env.get_image_details

<bound method Environment.get_image_details of {
    "assetId": null,
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "docker.io/hstirrat/yolov5-env:fixed",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": "2g"
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "yolov5_env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "cha

In [11]:
subscription_id = '6b86c32f-3e1d-4be7-bbd4-dd16443808b2'
resource_group  = 'rg-amlclass-zbyw'
workspace_name  = 'azureml-zbyw'
ws = Workspace(subscription_id, resource_group, workspace_name)

experiment = Experiment(workspace=ws, name='mos2_preprocessed_defect_detection_yolo_training')

In [12]:
# Overall configuration for the script to be run on the compute cluster
config = ScriptRunConfig(source_directory='./deploy_yolo_training/',   ## folder in which the script is located
                         script='training_on_aml.py',       ## script name
                         compute_target='GPU-zbyw1',
                         environment=yolov5_env)   

In [13]:
os.getcwd()

'/home/zbyw/MSE544_project_preprocessed'

In [14]:
run = experiment.submit(config)
aml_url = run.get_portal_url()
print(aml_url)

https://ml.azure.com/runs/mos2_preprocessed_defect_detection_yolo_training_1748743172_771f95fe?wsid=/subscriptions/6b86c32f-3e1d-4be7-bbd4-dd16443808b2/resourcegroups/rg-amlclass-zbyw/workspaces/azureml-zbyw&tid=f6b6dd5b-f02f-441a-99a0-162ac5060bd2
