In [18]:
import os
import glob
import pandas as pd
import io
import shutil
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET
import torch
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# here is the dataset https://drive.google.com/uc?id=1h7R5dnsUQNRKt8I7n5PWm2QYXWRJWcxN
!unzip /content/drive/MyDrive/dataset/dataset.zip

In [3]:
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install

Cloning into 'yolov5'...
remote: Enumerating objects: 14997, done.[K
remote: Counting objects: 100% (62/62), done.[K
remote: Compressing objects: 100% (52/52), done.[K
remote: Total 14997 (delta 28), reused 24 (delta 10), pack-reused 14935[K
Receiving objects: 100% (14997/14997), 13.97 MiB | 33.12 MiB/s, done.
Resolving deltas: 100% (10292/10292), done.
/content/yolov5
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.0/184.0 KB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 KB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m78.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Displaying all the xml files
path = '/content/data'
allFiles = glob.glob(path + '/*.xml')
allFiles

In [5]:
def xml_to_pd(path):
    """Iterates through all .xml files (generated by labelImg) in a given directory and combines
    them in a single Pandas dataframe.

    Parameters:
    ----------
    path : str
        The path containing the .xml files
    Returns
    -------
    Pandas DataFrame
        The produced dataframe
    """

    xml_list = []
    # List down all the files within the path
    for xml_file in glob.glob(path + '/*.xml'):
        # Get the tree and the root of the xml files
        tree = ET.parse(xml_file)
        root = tree.getroot()
        # Get the filename, width and height from the respective elements
        filename = root.find('filename').text
        width = int(root.find('size').find('width').text)
        height = int(root.find('size').find('height').text)
        # Extract the class names and the bounding boxes of the classes
        for member in root.findall('object'):
            bndbox = member.find('bndbox')
            value = (filename,
                     width,
                     height,
                     member.find('name').text,
                     int(bndbox.find('xmin').text),
                     int(bndbox.find('ymin').text),
                     int(bndbox.find('xmax').text),
                     int(bndbox.find('ymax').text),
                     )
            xml_list.append(value)
    # Consolidate all the information into a data frame
    column_name = ['filename', 'width', 'height',
                   'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

In [6]:
pothole_df = xml_to_pd(path)
pothole_df

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,0275.png,1920,1080,low,1,537,510,648
1,0247.png,1920,1080,moderate,52,923,343,1080
2,0247.png,1920,1080,moderate,280,448,852,681
3,0098.png,1920,1080,low,737,580,957,761
4,0098.png,1920,1080,low,1126,547,1337,611
...,...,...,...,...,...,...,...,...
553,0049.png,1920,1080,low,798,563,973,637
554,0049.png,1920,1080,low,937,491,1018,519
555,0049.png,1920,1080,low,823,476,909,514
556,0076.png,1920,1080,low,515,461,637,516


In [7]:
classes = pothole_df['class'].unique().tolist()
classes

['low', 'moderate', 'high']

In [8]:
# Create a dictionary for storing class to ID mapping
classMap = {}
 
for i,cls in enumerate(classes):
    # Map a class name to an integet ID
    classMap[cls] = i
     
classMap

{'low': 0, 'moderate': 1, 'high': 2}

In [9]:
# Create the main data folder
!mkdir potholeData
# Create images and labels data folders
!mkdir potholeData/images
!mkdir potholeData/labels
# Create train,val and test data folders for both images and labels
!mkdir potholeData/images/train potholeData/images/val potholeData/images/test  potholeData/labels/train potholeData/labels/val potholeData/labels/test

In [10]:
# Creating the list of images from the excel sheet
imgs = pothole_df['filename'].unique().tolist()
# Loop through each of the image
for img in imgs:
    boundingDetails = []
    # First get the bounding box information for a particular image from the excel sheet
    boundingInfo = pothole_df.loc[pothole_df.filename == img,:]
    # Loop through each row of the details
    for idx, row in boundingInfo.iterrows():
        # Get the class Id for the row
        class_id = classMap[row["class"]]
        # Convert the bounding box info into the format for YOLOV5
        # Get the width
        bb_width = row['xmax'] - row['xmin']
        # Get the height
        bb_height = row['ymax'] - row['ymin']
        # Get the centre coordinates
        bb_xcentre = (row['xmin'] + row['xmax'])/2
        bb_ycentre = (row['ymin'] + row['ymax'])/2
        # Normalise the coordinates by diving by width and height
        bb_xcentre /= row['width'] 
        bb_ycentre /= row['height'] 
        bb_width    /= row['width'] 
        bb_height   /= row['height']  
        # Append details in the list 
        boundingDetails.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, bb_xcentre, bb_ycentre, bb_width, bb_height))
    # Create the file name to save this info     
    file_name = os.path.join("potholeData/labels", img.split(".")[0] + ".txt")
    # Save the annotation to disk
    print("\n".join(boundingDetails), file= open(file_name, "w"))

In [None]:
# Get the list of all annotations
annotations = glob.glob('potholeData/labels' + '/*.txt')
annotations

In [None]:
# Get the list of images from its folder
imagePath = '/content/data'
images = glob.glob(imagePath + '/*.png')
images

In [20]:
# Sort the annotations and images and the prepare the train ,test and validation sets
images.sort()
annotations.sort()
 
# Split the dataset into train-valid-test splits 
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.2, random_state = 123)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 123)

In [22]:
#Utility function to copy images to destination folder
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.copy(f, destination_folder)
        except:
            print(f)
            assert False
            
# Copy the splits into the respective folders
move_files_to_folder(train_images, 'potholeData/images/train')
move_files_to_folder(val_images, 'potholeData/images/val/')
move_files_to_folder(test_images, 'potholeData/images/test/')
move_files_to_folder(train_annotations, 'potholeData/labels/train/')
move_files_to_folder(val_annotations, 'potholeData/labels/val/')
move_files_to_folder(test_annotations, 'potholeData/labels/test/')

# downlowd yaml file from here https://drive.google.com/uc?id=1tJsTYvOod7mJxSlGtqclY9sEL9C00RHA

In [24]:
!python train.py --img 640 --cfg yolov5m.yaml --hyp data/hyps/hyp.scratch-med.yaml --batch 4 --epochs 5 --data Classes.yaml --weights yolov5m.pt --workers 4 --name yolo_pothole_det_m3

[34m[1mtrain: [0mweights=yolov5m.pt, cfg=yolov5m.yaml, data=Classes.yaml, hyp=data/hyps/hyp.scratch-med.yaml, epochs=5, batch_size=4, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=4, project=runs/train, name=yolo_pothole_det_m3, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-69-g3b6e27a Python-3.8.16 torch-1.13.0+cu116 CUDA:0 (Tesla T4, 15110MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.1, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.3, cls_pw=1.0, obj=0.7, obj_pw=1.0, iou_t=0.2,