In [None]:
import numpy as np
import pandas as pd
from xml.dom.minidom import parse
from shutil import copyfile
import os
from pathlib import Path

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:

# Unzip the dataset
!unzip /content/drive/MyDrive/BTP/dataset.zip -d /content

In [None]:

# Create necessary directories
!mkdir -p /content/Dataset/labels
!mkdir -p /content/Dataset/images

In [None]:
classes = ['helmet', 'head', 'person']

In [None]:
def convert_annot(size, box):
    x1 = int(box[0])
    y1 = int(box[1])
    x2 = int(box[2])
    y2 = int(box[3])
# Normalization between 0 and 1
    dw = np.float32(1. / int(size[0]))
    dh = np.float32(1. / int(size[1]))

    w = x2 - x1
    h = y2 - y1
    x = x1 + (w / 2)  #centre
    y = y1 + (h / 2)
# converting pixel values to a normalized range [0, 1] for training
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return [x, y, w, h]

In [None]:
def save_txt_file(img_jpg_file_name, size, img_box):
    save_file_name = '/content/Dataset/labels/' + img_jpg_file_name + '.txt'
    with open(save_file_name, 'a+') as file_path:
        for box in img_box:
            cls_num = classes.index(box[0])
            new_box = convert_annot(size, box[1:])
            file_path.write(f"{cls_num} {new_box[0]} {new_box[1]} {new_box[2]} {new_box[3]}\n")


In [None]:
def get_xml_data(file_path, img_xml_file):
    img_path = file_path + '/' + img_xml_file + '.xml'
    dom = parse(img_path)
    root = dom.documentElement
    img_name = root.getElementsByTagName("filename")[0].childNodes[0].data
    img_size = root.getElementsByTagName("size")[0]
    objects = root.getElementsByTagName("object")
    img_w = img_size.getElementsByTagName("width")[0].childNodes[0].data
    img_h = img_size.getElementsByTagName("height")[0].childNodes[0].data
    img_c = img_size.getElementsByTagName("depth")[0].childNodes[0].data

    img_box = []
    for box in objects:
        cls_name = box.getElementsByTagName("name")[0].childNodes[0].data
        x1 = int(box.getElementsByTagName("xmin")[0].childNodes[0].data)
        y1 = int(box.getElementsByTagName("ymin")[0].childNodes[0].data)
        x2 = int(box.getElementsByTagName("xmax")[0].childNodes[0].data)
        y2 = int(box.getElementsByTagName("ymax")[0].childNodes[0].data)

        img_jpg_file_name = img_xml_file + '.jpg'
        img_box.append([cls_name, x1, y1, x2, y2])

    save_txt_file(img_xml_file, [img_w, img_h], img_box)

# List and process annotation files
files = os.listdir('/content/annotations')
for file in files:
    file_xml = file.split(".")
    get_xml_data('/content/annotations', file_xml[0])


In [None]:
from sklearn.model_selection import train_test_split

# List and split image files
image_list = os.listdir('/content/images')
train_list, test_list = train_test_split(image_list, test_size=0.2, random_state=42)
val_list, test_list = train_test_split(test_list, test_size=0.5, random_state=42)

print('total =', len(image_list))
print('train :', len(train_list))
print('val   :', len(val_list))
print('test  :', len(test_list))


In [None]:
def copy_data(file_list, img_labels_root, imgs_source, mode):
    root_file = Path('/content/Dataset/images/' + mode)
    if not root_file.exists():
        os.makedirs(root_file)

    root_file = Path('/content/Dataset/labels/' + mode)
    if not root_file.exists():
        os.makedirs(root_file)

    for file in file_list:
        img_name = file.replace('.png', '')
        img_src_file = imgs_source + '/' + img_name + '.png'
        label_src_file = img_labels_root + '/' + img_name + '.txt'

        # Copy image
        DICT_DIR = '/content/Dataset/images/' + mode
        img_dict_file = DICT_DIR + '/' + img_name + '.png'
        copyfile(img_src_file, img_dict_file)

        # Copy label
        DICT_DIR = '/content/Dataset/labels/' + mode
        img_dict_file = DICT_DIR + '/' + img_name + '.txt'
        copyfile(label_src_file, img_dict_file)

copy_data(train_list, '/content/Dataset/labels', '/content/images', "train")
copy_data(val_list, '/content/Dataset/labels', '/content/images', "val")
copy_data(test_list, '/content/Dataset/labels', '/content/images', "test")

In [None]:
!ls /content/Dataset/images

In [None]:
import yaml

# Create configuration
config = {
   "path": "/content/Dataset/images",
   "train": "train",
   "val": "val",
   "test": "test",
   "nc": 3,
   "names": ['helmet', 'head', 'person']
}
with open("data.yaml", "w") as file:
   yaml.dump(config, file, default_flow_style=False)


In [None]:
# Display the YAML file
!cat data.yaml

In [None]:
!git clone https://github.com/ultralytics/ultralytics
!pip install ultralytics

In [None]:
pip install wandb

In [None]:
!wandb disabled

In [None]:
# Train YOLOv8 model
!yolo task=detect mode=train data=data.yaml model=yolov8s.pt epochs=5 lr0=0.01

In [None]:
# Check the results
!ls /content/runs/detect/train

In [None]:
from IPython.display import Image, clear_output
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import plotly.express as px
import pandas as pd

df = pd.read_csv("/content/runs/detect/train/results.csv")
fig = px.line(df, x='                  epoch', y='       metrics/mAP50(B)', title='mAP50')
fig.show()

In [None]:
Image(filename='/content/runs/detect/train/val_batch0_pred.jpg', width=1000)

In [None]:
# Validate the model
!yolo task=detect mode=val model=/content/runs/detect/train/weights/best.pt data=data.yaml

In [None]:
# # Save the model to Google Drive
# model_path = "/content/runs/detect/train/weights/best.pt"
# drive_model_path = "/content/drive/MyDrive/best.pt"
# copyfile(model_path, drive_model_path)

# print(f"Model saved to {drive_model_path}")

In [None]:
# from google.colab import files

# # Download the file
# files.download('/content/runs/detect/train/weights/best.pt')
