## 1. Data Preprocessing
#### Preprocess Datasets to extract bounding boxes from segmentation masks

Preprocess CODEBRIM dataset:

In [None]:
import os
from bs4 import BeautifulSoup

DATA_DIR = r'F:\Accumulated_Defect_Segmentation\CODEBRIM_original_images\original_dataset\annotations'
files = os.scandir(DATA_DIR)
for file in files:

    if file.is_file():
        with open(os.path.join(DATA_DIR, file.name), 'r') as f:
            data = f.read()

        # Output File
        outfile = open(os.path.join(DATA_DIR, "YOLO_annotations", file.name[:-4]+".txt"), "w")
        crack, spalling, efflorescence, corrosion = False, False, False, False
        h, w = 0, 0

        print("Parsing: " + file.name)
        Bs_data = BeautifulSoup(data, "xml")

        img_res = Bs_data.find('size')
        h, w = int(img_res.height.string), int(img_res.width.string)

        for obj in Bs_data.find_all("object"):
            crack, spalling, efflorescence, corrosion = False, False, False, False

            xmin, ymin, xmax, ymax = int(obj.bndbox.xmin.string), int(obj.bndbox.ymin.string), int(obj.bndbox.xmax.string), int(obj.bndbox.ymax.string)
            width, height = int(xmax - xmin), int(ymax - ymin)
            x_centre, y_centre = xmin + width/2, ymin + height/2

            # Convert to Percentile
            x_centre, y_centre, width, height = x_centre/w, y_centre/h, width/w, height/h

            crack, spalling, efflorescence = bool(int(obj.Defect.Crack.string)), bool(int(obj.Defect.Spallation.string)), bool(int(obj.Defect.Efflorescence.string))

            if bool(obj.Defect.ExposedBars.int) or bool(int(obj.Defect.CorrosionStain.string)):
                corrosion = True

            # Write in outfile
            if crack:
                outfile.write("0 " + str(x_centre) + " " + str(y_centre) + " " + str(width) + " " + str(height)+"\n")
            elif spalling:
                outfile.write("1 " + str(x_centre) + " " + str(y_centre) + " " + str(width) + " " + str(height)+"\n")
            elif efflorescence:
                outfile.write("3 " + str(x_centre) + " " + str(y_centre) + " " + str(width) + " " + str(height)+"\n")
            else:
                if corrosion:
                    outfile.write("2 " + str(x_centre) + " " + str(y_centre) + " " + str(width) + " " + str(height)+"\n")

        outfile.close()

files.close()

Preprocess QuakeCity dataset

In [None]:
import os
import numpy as np
import cv2
from skimage.measure import label, regionprops, find_contours
from skimage.morphology import dilation, closing

""" Creating a directory """


def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)


""" Convert a mask to border image """


def mask_to_border(mask):
    h, w = mask.shape
    border = np.zeros((h, w))

    contours = find_contours(mask)
    for contour in contours:
        for c in contour:
            x = int(c[0])
            y = int(c[1])
            border[x][y] = 255

    return border


""" Mask to bounding boxes """


def mask_to_bbox(mask):
    bboxes = []

    mask = mask_to_border(mask)
    lbl = label(mask)
    props = regionprops(lbl)
    for prop in props:
        x1 = prop.bbox[1]
        y1 = prop.bbox[0]

        x2 = prop.bbox[3]
        y2 = prop.bbox[2]

        bboxes.append([x1, y1, x2, y2])

    return bboxes


def parse_mask(mask):
    mask = np.expand_dims(mask, axis=-1)
    mask = np.concatenate([mask, mask, mask], axis=-1)
    return mask

def plot_bbox(img, bboxes, c):
    img_h, img_w, _ = img.shape
    for bbox in bboxes:
        x = bbox[0] * img_w
        y = bbox[1] * img_h
        w = bbox[2] * img_w
        h = bbox[3] * img_h

        x1 = int(x - w / 2)
        y1 = int(y - h / 2)
        x2 = int(x + w / 2)
        y2 = int(y + h / 2)
        

        cv2.rectangle(img, (x1, y1), (x2, y2), c, 2)

    return img

def remove_small_boxes(bboxes, min_size = 50):
    temp = bboxes
    for bbox in bboxes:
        width = bbox[2] - bbox[0]
        height = bbox[3] - bbox[1]
        if width < min_size or height < min_size:
            temp.remove(bbox)
    return temp

def write_outfile(bboxes, classification, outfile, w, h):
    for bbox in bboxes:
        xmin, ymin, xmax, ymax = bbox[0], bbox[1], bbox[2], bbox[3]
        width, height = int(xmax - xmin), int(ymax - ymin)
        x_centre, y_centre = xmin + (width/2), ymin + (height/2)

        # Convert to Percentile
        x_centre, y_centre, width, height = x_centre / w, y_centre / h, width / w, height / h

        outfile.write(str(classification) + " " + str(x_centre) + " " + str(y_centre) + " " + str(width) + " " + str(height) + "\n")


if __name__ == "__main__":
    """ Load the dataset """
    DATA_DIR = r'F:\Accumulated_Defect_Segmentation\UH_QuakeCity\QuakeCity\label'
    IMG_DIR = r'F:\Accumulated_Defect_Segmentation\UH_QuakeCity\QuakeCity\image'

    """ Create dir """
    create_dir(os.path.join(DATA_DIR, "YOLO_annotations"))

    files = os.scandir(os.path.join(DATA_DIR, "crack"))
    for file in files:

        print("Processing "+file.name)

        name = file.name.split(".")[0]

        # Output File
        outfile = open(os.path.join(DATA_DIR, "YOLO_annotations", name+".txt"), "w")

        """ Read image and mask """
        image = cv2.imread(os.path.join(IMG_DIR, file.name), cv2.IMREAD_COLOR)
        h, w, _ = image.shape

        crack_mask = cv2.imread(os.path.join(DATA_DIR, "crack", file.name), cv2.IMREAD_GRAYSCALE)
        rebar_mask = cv2.imread(os.path.join(DATA_DIR, "rebar", file.name), cv2.IMREAD_GRAYSCALE)
        spall_mask = cv2.imread(os.path.join(DATA_DIR, "spall", file.name), cv2.IMREAD_GRAYSCALE)

        """ Detecting Bounding Boxes """
        crack_mask = closing(dilation(dilation(dilation(dilation(dilation(closing(closing(crack_mask))))))))
        crack_bboxes = mask_to_bbox(crack_mask)
        rebar_mask = closing(dilation(dilation(dilation(dilation(dilation(closing(closing(rebar_mask))))))))
        rebar_bboxes = mask_to_bbox(rebar_mask)
        spall_mask = closing(dilation(dilation(dilation(dilation(dilation(closing(closing(spall_mask))))))))
        spall_bboxes = mask_to_bbox(spall_mask)

        """ Remove Too Small Boxes """
        crack_bboxes = remove_small_boxes(crack_bboxes)
        rebar_bboxes = remove_small_boxes(rebar_bboxes)
        spall_bboxes = remove_small_boxes(spall_bboxes)

        """ Write to File """
        write_outfile(crack_bboxes, 0, outfile, w, h)
        write_outfile(rebar_bboxes, 2, outfile, w, h)
        write_outfile(spall_bboxes, 1, outfile, w, h)

        outfile.close()

files.close()

Preprocess S2DS dataset

In [None]:
import sys
import glob
import os
import cv2
import shutil
import numpy as np

DATASET_DIR = '/home/zaid/datasets/defect_segment/s2ds'
OUTPUT_DIR = '/home/zaid/datasets/defect_segment/s2ds_out'

# white: cracks
# red: spalling
# yellow: corrosion
# blue: efflorescence

def main(dataset_dir, output_dir):

    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'check'), exist_ok=True)

    train = sorted(glob.glob(os.path.join(dataset_dir, "train", "*")))
    test = sorted(glob.glob(os.path.join(dataset_dir, "test", "*")))
    val = sorted(glob.glob(os.path.join(dataset_dir, "val", "*")))
    images = train + test + val
    n = len(images)

    for i,img in enumerate(images):
        if '_lab' in img:
            continue        

        print(f"Processing image {i} of {n}", end='\r')

        img_name = img.split('/')[-1]
        shutil.copy(img, os.path.join(output_dir, 'images', img_name))
        
        mask = cv2.imread(img.replace('.png', '_lab.png'), cv2.IMREAD_COLOR)
        mask_crack = cv2.inRange(mask, np.array([255, 255, 255]), np.array([255, 255, 255]))
        mask_spall = cv2.inRange(mask, np.array([0, 0, 255]), np.array([0, 0, 255]))
        mask_eff = cv2.inRange(mask, np.array([255, 255, 0]), np.array([255, 255, 0]))
        mask_corr = cv2.inRange(mask, np.array([0, 255, 255]), np.array([0, 255, 255]))        

        label_cracks = mask_to_bbox(mask_crack)
        label_spall = mask_to_bbox(mask_spall)
        label_eff = mask_to_bbox(mask_eff)
        label_corr = mask_to_bbox(mask_corr)

        with open(os.path.join(output_dir, 'labels', img_name.replace('.png', '.txt')), 'w') as f:
            for label in label_cracks:
                f.write('0 ' + ' '.join([str(x) for x in label]) + '\n')
            for label in label_spall:
                f.write('1 ' + ' '.join([str(x) for x in label]) + '\n')
            for label in label_corr:
                f.write('2 ' + ' '.join([str(x) for x in label]) + '\n')
            for label in label_eff:
                f.write('3 ' + ' '.join([str(x) for x in label]) + '\n')

        img_check = plot_bbox(cv2.imread(img, cv2.IMREAD_COLOR), label_cracks, (255, 255, 255))
        img_check = plot_bbox(img_check, label_spall, (0, 0, 255))
        img_check = plot_bbox(img_check, label_corr, (0, 255, 255))
        img_check = plot_bbox(img_check, label_eff, (255, 255, 0))

        cv2.imwrite(os.path.join(output_dir, 'check', img_name.replace('.png', '_check.png')), img_check)
    

if __name__ == "__main__":
    main(DATASET_DIR, OUTPUT_DIR)

Preprocess Zhang dataset

In [None]:
import sys
import glob
import os
import cv2
import shutil
import numpy as np

DATASET_DIR = '/home/zaid/datasets/defect_segment/zhang_defect_segmentation'
OUTPUT_DIR = '/home/zaid/datasets/defect_segment/zhang_defect_segmentation_out'


def main(dataset_dir, output_dir):

    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'check'), exist_ok=True)

    images = sorted(glob.glob(os.path.join(dataset_dir, "images", "*")))
    n = len(images)

    for i,img in enumerate(images):
        if '_lab' in img:
            continue        

        print(f"Processing image {i} of {n}", end='\r')

        img_name = img.split('/')[-1]
        shutil.copy(img, os.path.join(output_dir, 'images', img_name))

        img_id  = img_name.split('.')[0]                
                
        f_crack = os.path.join(dataset_dir, 'masks', img_id+'crack'+'.jpg')
        f_spall = os.path.join(dataset_dir, 'masks', img_id+'spall'+'.jpg')
        f_corr = os.path.join(dataset_dir, 'masks', img_id+'rebar'+'.jpg')
        
        if not os.path.exists(f_crack):
            label_cracks = []
        else:
            mask_crack = cv2.imread(f_crack, cv2.IMREAD_GRAYSCALE)
            label_cracks = mask_to_bbox(mask_crack)

        if not os.path.exists(f_spall):
            label_spall = []
        else:
            mask_spall = cv2.imread(f_spall, cv2.IMREAD_GRAYSCALE)
            label_spall = mask_to_bbox(mask_spall)
        
        if not os.path.exists(f_corr):
            label_corr = []
        else:
            mask_corr  = cv2.imread(f_corr, cv2.IMREAD_GRAYSCALE)
            label_corr = mask_to_bbox(mask_corr)

        with open(os.path.join(output_dir, 'labels', img_id + '.txt'), 'w') as f:
            for label in label_cracks:
                f.write('0 ' + ' '.join([str(x) for x in label]) + '\n')
            for label in label_spall:
                f.write('1 ' + ' '.join([str(x) for x in label]) + '\n')
            for label in label_corr:
                f.write('2 ' + ' '.join([str(x) for x in label]) + '\n')


        img_check = plot_bbox(cv2.imread(img, cv2.IMREAD_COLOR), label_cracks, (255, 255, 255))
        img_check = plot_bbox(img_check, label_spall, (0, 0, 255))
        img_check = plot_bbox(img_check, label_corr, (0, 255, 255))

        cv2.imwrite(os.path.join(output_dir, 'check', img_name.replace('.png', '_check.png')), img_check)
    

if __name__ == "__main__":
    main(DATASET_DIR, OUTPUT_DIR)

## 2. Models Training
#### Train YOLO V8 Models using RoboFlow and ultralytics on each dataset

CODEBRIM_baseline.py

In [None]:
import os
import ultralytics
from ultralytics import YOLO
from roboflow import Roboflow

HOME = os.getcwd()
print(HOME)

ultralytics.checks()

rf = Roboflow(api_key="YOUR API KEY")
os.chdir('./datasets')
project = rf.workspace("cvisslab").project("codebrim-poidd")
dataset = project.version(2155).download("yolov8")


QuakeCity_baseline.py

In [None]:
import os
import ultralytics
from ultralytics import YOLO
from roboflow import Roboflow

HOME = os.getcwd()
print(HOME)

ultralytics.checks()

rf = Roboflow(api_key="YOUR API KEY")
os.chdir('./datasets')
project = rf.workspace("cvisslab").project("quakecity")
dataset = project.version(1).download("yolov8")

S2DS_baseline.py

In [None]:
import os
import ultralytics
from ultralytics import YOLO
from roboflow import Roboflow

HOME = os.getcwd()
print(HOME)

ultralytics.checks()

rf = Roboflow(api_key="YOUR API KEY")
os.chdir('./datasets')
project = rf.workspace("cvisslab").project("s2ds")
dataset = project.version(1).download("yolov8")

Zhang.py

In [None]:
import os
import ultralytics
from ultralytics import YOLO
from roboflow import Roboflow

HOME = os.getcwd()
print(HOME)

ultralytics.checks()

rf = Roboflow(api_key="YOUR API KEY")
os.chdir('./datasets')
project = rf.workspace("cvisslab").project("zhang-3seb8")
dataset = project.version(1).download("yolov8")

DockerFile

Run Docker Container

Run Training

## 3. Model Validation

#### Validate baseline models and run cross-validation between each model and each dataset

QuakeCity

Zhang

CODEBRIM

S2DS