## **Importing the Libraries**

In [1]:
import os
import shutil
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import seaborn as sns
import random

import torch

## **Downloading the dataset from the Kaggle**

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jasonroggy/grazpedwri-dx")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/jasonroggy/grazpedwri-dx?dataset_version_number=1...


100%|██████████| 15.1G/15.1G [11:42<00:00, 23.1MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/jasonroggy/grazpedwri-dx/versions/1


## **Moving the downloaded Dataset to /content**

In [5]:
import shutil

source_dir = "/root/.cache/kagglehub/datasets/jasonroggy/grazpedwri-dx/versions/1"
destination_dir = "/content"

shutil.move(source_dir, destination_dir)

print("Files moved successfully!")


Files moved successfully!


In [9]:
%cd 1

/content/1


## **Combining all the images from each folder into Single Folder**

In [11]:
import os
import shutil

source_folders = ["images_part1", "images_part2", "images_part3", "images_part4"]
destination_folder = "/content/1/images"

os.makedirs(destination_folder, exist_ok=True)

for folder in source_folders:
    folder_path = os.path.join(os.getcwd(), folder)
    if os.path.exists(folder_path):
        for file_name in os.listdir(folder_path):
            if file_name.endswith(".png"):
                source_path = os.path.join(folder_path, file_name)
                destination_path = os.path.join(destination_folder, file_name)

                if os.path.isfile(source_path):
                    shutil.move(source_path, destination_path)

print("All .png images have been moved to the 'images' folder.")


All .png images have been moved to the 'images' folder.


In [19]:
len(os.listdir('/content/1/images'))

20327

## **Placing the Labels folder in the same level to the Images folder**

In [26]:
import shutil

source_path = "/content/1/yolov5/labels"
destination_path = "/content/1"

shutil.move(source_path, destination_path)

print("Folder moved successfully!")


Folder moved successfully!


In [27]:
len(os.listdir('/content/1/labels'))

20327

## **Creating dataset for Training**

In [28]:
import os
import shutil
import random

source_images_dir = "/content/1/images"
source_labels_dir = "/content/1/labels"

dataset_dir = "/content/dataset"
splits = ["train", "val", "test"]

for split in splits:
    os.makedirs(f"{dataset_dir}/images/{split}", exist_ok=True)
    os.makedirs(f"{dataset_dir}/labels/{split}", exist_ok=True)

image_files = [f for f in os.listdir(source_images_dir) if f.endswith(".png")]
image_filenames = {os.path.splitext(f)[0]: f for f in image_files}
label_files = {os.path.splitext(f)[0]: f for f in os.listdir(source_labels_dir) if f.endswith(".txt")}
valid_images = {name: image_filenames[name] for name in image_filenames if name in label_files}

image_list = list(valid_images.keys())
random.shuffle(image_list)

total_images = len(image_list)
train_split = int(0.7 * total_images)
val_split = int(0.2 * total_images)

train_images = image_list[:train_split]
val_images = image_list[train_split:train_split + val_split]
test_images = image_list[train_split + val_split:]

def copy_files(image_names, split):
    for name in image_names:
        img_path = os.path.join(source_images_dir, valid_images[name])
        label_path = os.path.join(source_labels_dir, label_files[name])

        shutil.copy2(img_path, f"{dataset_dir}/images/{split}/{valid_images[name]}")

        shutil.copy2(label_path, f"{dataset_dir}/labels/{split}/{label_files[name]}")

copy_files(train_images, "train")
copy_files(val_images, "val")
copy_files(test_images, "test")

print(f"Dataset successfully split into train ({len(train_images)}), val ({len(val_images)}), and test ({len(test_images)}) sets!")


Dataset successfully split into train (14228), val (4065), and test (2034) sets!


In [30]:
# len(os.listdir('/content/dataset/images/test'))

2034

## **Creating .yaml file**

In [35]:
import yaml

data_config = {
    "names": [
        "boneanomaly", "bonelesion", "foreignbody", "fracture",
        "metal", "periostealreaction", "pronatorsign", "softtissue", "text"
    ],
    "nc": 9,
    "path": "/content/dataset",
    "train": "/content/dataset/images/train",
    "val": "/content/dataset/images/val",
    "test": "/content/dataset/images/test"
}

yaml_file_path = "/content/dataset/data.yaml"

with open(yaml_file_path, "w") as yaml_file:
    yaml.dump(data_config, yaml_file, default_flow_style=False)

print(f"data.yaml file created successfully at {yaml_file_path}")


data.yaml file created successfully at /content/dataset/data.yaml


## **Removing the labels and their corresponding images if the class_id is greater than 8**

In [None]:
import os

base_dir = "/content/dataset"
sets = ["train", "val", "test"]
max_class_index = 8

for dataset in sets:
    labels_dir = os.path.join(base_dir, "labels", dataset)
    images_dir = os.path.join(base_dir, "images", dataset)

    for label_file in os.listdir(labels_dir):
        label_path = os.path.join(labels_dir, label_file)
        image_path = os.path.join(images_dir, label_file.replace(".txt", ".png"))

        with open(label_path, "r") as f:
            lines = f.readlines()

        corrected_lines = [line for line in lines if int(line.split()[0]) <= max_class_index]

        if corrected_lines:
            with open(label_path, "w") as f:
                f.writelines(corrected_lines)
        else:
            os.remove(label_path)
            if os.path.exists(image_path):
                os.remove(image_path)
                print(f"Removed: {image_path}")

print("Invalid class labels removed along with corresponding images from train, val, and test!")


## **Installing Ultralytics**

In [33]:
# !pip install ultralytics




## **Model Training**

In [None]:
from ultralytics import YOLO

model = YOLO("yolo11n.pt")
results = model.train(data="/content/dataset/data.yaml", epochs=20, imgsz=640)

Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/content/dataset/data.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=Tr

[34m[1mtrain: [0mScanning /content/dataset/labels/train... 14220 images, 0 backgrounds, 0 corrupt: 100%|██████████| 14220/14220 [01:27<00:00, 161.86it/s]


[34m[1mtrain: [0mNew cache created: /content/dataset/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/dataset/labels/val... 4060 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4060/4060 [00:23<00:00, 173.13it/s]


[34m[1mval: [0mNew cache created: /content/dataset/labels/val.cache
Plotting labels to runs/detect/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000769, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train3[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      2.37G      1.508      2.072      1.289         34        640: 100%|██████████| 889/889 [07:13<00:00,  2.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 127/127 [01:10<00:00,  1.79it/s]


                   all       4060       9553       0.75      0.289      0.307      0.182

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      2.26G      1.368      1.191      1.217         49        640: 100%|██████████| 889/889 [07:34<00:00,  1.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 127/127 [01:12<00:00,  1.75it/s]


                   all       4060       9553      0.522      0.319      0.339      0.192

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      2.24G      1.369      1.081      1.222         44        640:  35%|███▌      | 312/889 [02:37<03:58,  2.42it/s]