# DATA PREPARATION

The initial dataset was downloded following https://www.nature.com/articles/s41597-023-02780-1. The authors created a detailed dataset of over 130,000 above-ground storage tanks (ASTs) in the contiguous United States using high-resolution imagery.


## Extracting the Unique Classes from the Dataset


To understand the dataset composition, the annotation files provided in PASCAL Visual Object Classes (VOC) 2007 format as Extensible Markup Language (XML) files had to be examined

This code extracts all the unique classes present

The output will be a list of distinct class labels in the dataset

In [None]:
#To extract the unique classes from the XML file

#importing required libraries
import os
import xml.etree.ElementTree as ET  #module for parsing and creating xml data

def extract_classes_from_xml(xml_folder):
    classes = set()  # Use a set to store unique class names
    #iterating over XML files
    for xml_file in os.listdir(xml_folder):
        if xml_file.endswith(".xml"):
            xml_path = os.path.join(xml_folder, xml_file)
            if os.path.getsize(xml_path) == 0:  # Skip empty files
                print(f"Skipping empty file: {xml_file}")
                continue
            #Parsing XML files
            try:
                tree = ET.parse(xml_path)
                root = tree.getroot()
                for obj in root.findall("object"):
                    #print(f"{obj}")
                    class_name = obj.find("name").text  #Extracts the text content of the <name> element within each <object>
                    classes.add(class_name)  # Add class name to the set
            except ET.ParseError:    #catches and handles any parsing error, skipping invalid files
                print(f"Skipping invalid file: {xml_file}")
    return sorted(list(classes))  # Return sorted list of unique classes

xml_folder = "datasets/xml_dataset_tank/xmls"  # Path to XML files
classes = extract_classes_from_xml(xml_folder)
print(f"Unique classes found: {classes}")


## Converting Pascal VOC XML Annotations to YOLO Format

Once the classes present in the dataset are identified, the annotation files in Pascal VOC(XML) format needs to be converted to YOLO format(TXT) to make it compatible with YOLO Object Detection model

This code converts each XML annotation file into YOLO format which consists of text files with object lables and normalised bounding box coordinates

### Understanding the formats:

- Pascal VOC:
Stores object annotations in an XML file, including class name, bounding box (xmin, ymin, xmax, ymax), image size and other metadata

- YOLO (TXT) Format:
Each image get a `.txt` file with the following format per line:

```
class_id, center_x, center_y, width, height
```

- `class_id`: Integer ID of the object class
- `center_x, center_y`: Center of bounding box (normalised to image dimensions)
- `width, height`: Bounding box size (normalised to image dimensions)



In [None]:
#To convert XML annotations in Pascal VOC format to YOLO format

#importing required libraries
import os
import xml.etree.ElementTree as ET

#defining the function
def voc_to_yolo(xml_folder, output_folder, classes):
    os.makedirs(output_folder, exist_ok=True) #creates the output folder if it doesn't already exist
    #iterating over the XML files
    for xml_file in os.listdir(xml_folder):
        if xml_file.endswith(".xml"):
            xml_path = os.path.join(xml_folder, xml_file)
            if os.path.getsize(xml_path) == 0:  # Skip empty files
                print(f"Skipping empty file: {xml_file}")
                continue
            try:
                tree = ET.parse(xml_path)
                root = tree.getroot()

                # Get image size
                size = root.find("size")
                width = int(size.find("width").text)
                height = int(size.find("height").text)

                # Create YOLO label file
                yolo_file = os.path.join(output_folder, os.path.splitext(xml_file)[0] + ".txt") #constructs the path to YOLO label file
                with open(yolo_file, "w") as f:       #opens the YOLO file for writing
                    #Processing each object
                    for obj in root.findall("object"):
                        class_name = obj.find("name").text
                        if class_name not in classes:  #checks if the class name is in the provided list of classes
                            print(f"Skipping unknown class: {class_name}")
                            continue
                        class_id = classes.index(class_name)  #gets the index of the class name in the list of classes

                        #extarcting the bounding box coordinates
                        bbox = obj.find("bndbox")
                        xmin = int(bbox.find("xmin").text)
                        ymin = int(bbox.find("ymin").text)
                        xmax = int(bbox.find("xmax").text)
                        ymax = int(bbox.find("ymax").text)

                        # Normalize coordinates
                        x_center = ((xmin + xmax) / 2) / width
                        y_center = ((ymin + ymax) / 2) / height
                        bbox_width = (xmax - xmin) / width
                        bbox_height = (ymax - ymin) / height

                        # Write to YOLO format
                        f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n")
            except ET.ParseError:
                print(f"Skipping invalid file: {xml_file}")

    print(f"Conversion completed! YOLO labels saved in: {output_folder}")

# Function call
voc_to_yolo(
    xml_folder="datasets/xml_dataset_tank",  # Path to XML files
    output_folder="datasets/xml_dataset_tank",  # Path to save YOLO labels
    classes=[
        'closed_roof_tank',
        'external_floating_roof_tank',
        'narrow_closed_roof_tank',
        'sedimentation_tank',
        'spherical_tank',
        'undefined_object',
        'water_tower'
    ]  # Unique classes
)

## Downloading Slurry Tank Imagery and Annotations

After preparing the inital dataset, moved to collecting specific imagery for slurry tanks. The appraoch involved

1. Identifying Locations: Determined existing slurry tank locations using Open Street Map Datasets for England, Denmark and Wales

2. Downloading High-Resolution Imagery for the identified locations were used

3. Annotating Slurry Tank: Using Label Studio, the slurry tanks were manually labeled and annotations were exported directly in YOLO format to align with training requirements


## Data Augmentation: Expanding the Slurry Tank Dataset

After collecting and annotating the slurry tank images, it was observed that the dataset size was very low for effective training. To improve model performance and to prevent overfitting, data augmentation techniques were applied to artificially increase the number of training samples

The code provided below was used to perform augmentations

The *albumentations* library was used to perform the augmentation


In [None]:
#To create augmented images from slurry tank dataset

import os
import cv2   #OpenCV library for image processing
import albumentations as A  #Library for data augmentation
from albumentations.core.composition import BboxParams  #used to define parameters for bounding box augmentations
from tqdm import tqdm #library for showing progress bars

#Loading YOLO labels
def load_yolo_labels(label_file):

    #Load bounding boxes from a YOLO label file and returns it as a list of bounding boxes
    bboxes = []
    with open(label_file, "r") as file:
        for line in file:
            parts = line.strip().split()
            if len(parts) == 0:
                continue  # Skip empty lines
            class_id = int(parts[0])
            x_center, y_center, width, height = map(float, parts[1:])
            bboxes.append([x_center, y_center, width, height, class_id])
    return bboxes

#Saving Augmented Lables
def save_augmented_label(label_file, augmented_bboxes):

    #Save augmented bounding boxes in YOLO format.
    with open(label_file, "w") as file:
        for bbox in augmented_bboxes:
            class_id, x_center, y_center, width, height = bbox
            file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

#Augmenting and Saving Images
def augment_and_save_images(input_images_dir, input_labels_dir, output_images_dir, output_labels_dir, augmentations, num_augmentations=3):

    #Perform data augmentation on images and save augmented images and labels.
    os.makedirs(output_images_dir, exist_ok=True)
    os.makedirs(output_labels_dir, exist_ok=True)

    image_files = [f for f in os.listdir(input_images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

    for image_file in tqdm(image_files, desc="Processing images"):
        image_path = os.path.join(input_images_dir, image_file)
        label_file = os.path.join(input_labels_dir, os.path.splitext(image_file)[0] + ".txt")

        if not os.path.exists(label_file):
            print(f"Skipping {image_file}: No corresponding label file.")
            continue

        # Load image and labels
        image = cv2.imread(image_path)
        height, width = image.shape[:2]
        bboxes = load_yolo_labels(label_file)

        # Convert YOLO labels for albumentations
        albumentations_bboxes = [
            [x_center, y_center, bbox_width, bbox_height, class_id]
            for x_center, y_center, bbox_width, bbox_height, class_id in bboxes
        ]

        for i in range(num_augmentations):
            augmented = augmentations(
                image=image,
                bboxes=albumentations_bboxes,
                class_labels=[bbox[4] for bbox in bboxes]
            )
            aug_image = augmented["image"]
            aug_bboxes = augmented["bboxes"]

            # Save augmented image
            aug_image_file = f"{os.path.splitext(image_file)[0]}_aug{i}.jpg"
            aug_image_path = os.path.join(output_images_dir, aug_image_file)
            cv2.imwrite(aug_image_path, aug_image)

            # Save augmented labels
            aug_label_file = f"{os.path.splitext(image_file)[0]}_aug{i}.txt"
            aug_label_path = os.path.join(output_labels_dir, aug_label_file)

            save_augmented_label(aug_label_path, [
                [class_id, x_center, y_center, width, height] for x_center, y_center, width, height, class_id in aug_bboxes
            ])

    print(f"Augmentation complete. Augmented data saved to {output_images_dir} and {output_labels_dir}.")

#Function Call
if __name__ == "__main__":
    # Define paths
    input_images_dir = "datasets/slurry_tank/images"  # Update with images directory
    input_labels_dir = "datasets/slurry_tank/labels"  # Update with labels directory
    output_images_dir = "datasets/slurry_tank/augmented_data/images"
    output_labels_dir = "datasets/slurry_tank/augmented_data/labels"

    # Define augmentations
    augmentations = A.Compose(
        [
            # Geometric
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Affine(scale=(0.9, 1.1), translate_percent=(0.05, 0.05), rotate=(-15, 15), p=0.5),  # Small shifts and rotations

            # Photometric
            A.RandomBrightnessContrast(p=0.3),
            A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.3),
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.2),
            A.MotionBlur(blur_limit=5, p=0.2),

            # Compression Artifacts
            A.ImageCompression(quality_lower=50, quality_upper=100, p=0.2),
        ],
        bbox_params=BboxParams(
            format="yolo",  # Specify YOLO format
            label_fields=["class_labels"],  # Ensure class labels are linked
        )
    )

    # Run augmentation
    augment_and_save_images(
        input_images_dir,
        input_labels_dir,
        output_images_dir,
        output_labels_dir,
        augmentations,
        num_augmentations=3
    )

In [None]:
# Note while running image augmentations the label file created has class_id output as "7.0" instead of "7"
# This code block is being used to correct the class_id
import os
  
labels_dir = 'datasets/slurry_tank/augmented_data/labels'
  
for file_name in os.listdir(labels_dir):
    if file_name.endswith(".txt"):  # Process only .txt files
        file_path = os.path.join(labels_dir, file_name)
 
        
        with open(file_path, "r") as file:
            lines = file.readlines()
 
        
        updated_lines = []
        for line in lines:
            parts = line.strip().split()
            if parts:  # Ensure it's not an empty line
                if parts[0] == "7.0":  # Check if class ID is 1.0
                    parts[0] = "7"  # Change it to 1
                updated_lines.append(" ".join(parts))  # Rebuild the line
 
        
        with open(file_path, "w") as file:
            file.write("\n".join(updated_lines) + "\n")
 
print("Class IDs updated successfully!")

## Creating Image Mosaics for Slurry Tanks

After applying basic augmentations, image mosaics were introduced to further enhance the dataset complexity. Mosaic augmentation combines multiple images into a single image, creating more varied backgrounds and object placement.

The below code is used is used to create mosaiced images. The main functions includes:
- Selecting images randomly
- Resizing and arranging it in a grid
- Adjusting the Bounding Box Labels

In [None]:
#To create mosaiced images from the augmented slurry tank images
import cv2
import os
import random
import numpy as np

def load_image_and_labels(image_path, labels_path):
    """
    Load an image and its corresponding labels.

    Args:
        image_path (str): Path to the image.
        labels_path (str): Path to the label file.

    Returns:
        image (ndarray): Loaded image.
        labels (list): List of bounding boxes and class IDs.
    """
    image = cv2.imread(image_path)
    labels = []
    if os.path.exists(labels_path):
        with open(labels_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                class_id = int(float(parts[0]))
                x_center, y_center, width, height = map(float, parts[1:])
                labels.append([class_id, x_center, y_center, width, height])
    return image, labels

def adjust_labels(labels, x_offset, y_offset, scale_x, scale_y):
    """
    Adjust bounding boxes for mosaicing.

    Args:
        labels (list): Original bounding boxes.
        x_offset (float): Horizontal offset.
        y_offset (float): Vertical offset.
        scale_x (float): Horizontal scale factor.
        scale_y (float): Vertical scale factor.

    Returns:
        adjusted_labels (list): Adjusted bounding boxes.
    """
    adjusted_labels = []
    for label in labels:
        class_id, x_center, y_center, width, height = label
        x_center = (x_center * scale_x) + x_offset
        y_center = (y_center * scale_y) + y_offset
        width *= scale_x
        height *= scale_y
        adjusted_labels.append([class_id, x_center, y_center, width, height])
    return adjusted_labels

def create_mosaic(images_dir, labels_dir, output_images_dir, output_labels_dir, grid_size, num_samples):
    """
    Create mosaic images from training data.

    Args:
        images_dir (str): Directory containing images.
        labels_dir (str): Directory containing label files.
        output_images_dir (str): Directory to save mosaiced images.
        output_labels_dir (str): Directory to save mosaiced labels.
        grid_size (int): Number of rows and columns in the grid.
        num_samples (int): Number of mosaiced images to create.
    """
    os.makedirs(output_images_dir, exist_ok=True)
    os.makedirs(output_labels_dir, exist_ok=True)

    image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

    for i in range(num_samples):
        selected_files = random.sample(image_files, grid_size * grid_size)
        images = []
        all_labels = []

        for j, file in enumerate(selected_files):
            image_path = os.path.join(images_dir, file)
            label_path = os.path.join(labels_dir, f"{os.path.splitext(file)[0]}.txt")
            image, labels = load_image_and_labels(image_path, label_path)

            h, w, _ = image.shape
            scale_x, scale_y = 1 / grid_size, 1 / grid_size
            x_offset = (j % grid_size) * scale_x
            y_offset = (j // grid_size) * scale_y

            image = cv2.resize(image, (int(w * scale_x), int(h * scale_y)))
            adjusted_labels = adjust_labels(labels, x_offset, y_offset, scale_x, scale_y)

            images.append(image)
            all_labels.extend(adjusted_labels)

        # Create mosaic
        rows = [
            np.hstack(images[r * grid_size:(r + 1) * grid_size])
            for r in range(grid_size)
        ]
        mosaic = np.vstack(rows)

        # Save mosaic image
        mosaic_image_path = os.path.join(output_images_dir, f"mosaic_{grid_size}x{grid_size}_{i}.jpg")
        cv2.imwrite(mosaic_image_path, mosaic)

        # Save mosaic labels
        mosaic_label_path = os.path.join(output_labels_dir, f"mosaic_{grid_size}x{grid_size}_{i}.txt")
        with open(mosaic_label_path, "w") as f:
            for label in all_labels:
                f.write(" ".join(map(str, label)) + "\n")

    print(f"{grid_size}x{grid_size} Mosaiced images saved in {output_images_dir}")
    print(f"{grid_size}x{grid_size} Mosaiced labels saved in {output_labels_dir}")

# User inputs
images_dir = "datasets/augmented_data/images"  # Path to image directory
labels_dir = "datasets/augmented_data/labels"  # Path to label directory
output_images_dir = "datasets/mosaiced_data/images"  # Path to save mosaiced images
output_labels_dir = "datasets/mosaiced_data/labels"  # Path to save mosaiced labels
num_samples = 50  # Number of mosaiced images to create

# Run mosaic creation for different grid sizes
for grid_size in [2, 3, 4, 5, 6, 7,8]:
    create_mosaic(
        images_dir,
        labels_dir,
        output_images_dir,
        output_labels_dir,
        grid_size,
        num_samples=50
    )

## Splitting the Dataset

The next step is to split the dataset into train, validation and test sets. This ensures that the model is trained on one subset, validated on another and finally evaluated on an unseen test set.

The below code, splits the dataset and move the image and corresponding label files to their respective folder.

Here first split the above ground storage tank into train, val and test. After this copy the slurry tank dataset for England and Denmark into the train folder labels and images to respective folder. Run augmentations on the Wales dataset. Split the slurry tank data from Wales for Validation and Testing.

In [None]:
#To split the data for training, testing and validating

#importing libraries

import os
import shutil
from sklearn.model_selection import train_test_split #function from sklearn to split datasets into training and testing sets

def split_dataset(dataset_dir, output_dir, train_ratio, val_ratio, test_ratio):
    # Update paths to point to images and labels subdirectories
    images_dir = os.path.join(dataset_dir, "images")
    labels_dir = os.path.join(dataset_dir, "labels")

    # Validate directories
    if not os.path.exists(images_dir) or not os.path.exists(labels_dir):
        print(f"Error: 'images' or 'labels' subdirectory not found in {dataset_dir}")
        return

    # List all image files
    image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    if len(image_files) == 0:
        print("No image files found in the 'images' folder. Please check your dataset.")
        return

    # Split dataset
    train_files, temp_files = train_test_split(image_files, test_size=(1 - train_ratio))
    val_files, test_files = train_test_split(temp_files, test_size=test_ratio / (val_ratio + test_ratio))

    # Define output subdirectories
    splits = {'train': train_files, 'val': val_files, 'test': test_files}
    for split, files in splits.items():
        split_images_dir = os.path.join(output_dir, split, "images")
        split_labels_dir = os.path.join(output_dir, split, "labels")
        os.makedirs(split_images_dir, exist_ok=True)
        os.makedirs(split_labels_dir, exist_ok=True)

        # Move files to respective directories
        for file in files:
            # Copy image
            shutil.copy(os.path.join(images_dir, file), split_images_dir)

            # Copy corresponding label
            label_file = os.path.splitext(file)[0] + ".txt"
            label_path = os.path.join(labels_dir, label_file)
            if os.path.exists(label_path):
                shutil.copy(label_path, split_labels_dir)
            else:
                print(f"Warning: Label file for {file} not found. Skipping.")

    print(f"Dataset split completed. Train: {len(train_files)}, Val: {len(val_files)}, Test: {len(test_files)}")
    print(f"Data saved to {output_dir}")

# Define paths and ratios
dataset_dir = "datasets/xml_dataset_tank"  # Path to the dataset containing 'images' and 'labels' folders
output_dir = "datasets/processed_data"
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Ensure ratios sum to 1
assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-6, "Ratios must sum to 1.0!"
#The small tolerance (1e-6) accounts for minor floating-point arithmetic errors that might occur when adding the ratios


# Call the function
split_dataset(dataset_dir, output_dir, train_ratio, val_ratio, test_ratio)

## Dataset Configuration: Generating a YAML File for YOLO Training

A YAML configuration file is required for training the YOLO model. It provides the essential information about the dataset including class names and file paths.

In [None]:
#creating a data.yaml file for YOLO dataset configuration

#importing libraries
import os

#Constructs YAML content with paths and number of classes
#writes YAML content to data.yaml
def create_data_yaml(output_dir, train_path, val_path, test_path, classes):
    yaml_content = f"""
# YOLO Dataset Configuration
path: ./processed_data
train: {train_path}
val: {val_path}
test: {test_path}
# Number of classes
nc: {len(classes)}
names: {classes}
    """
    yaml_path = os.path.join(output_dir, 'data.yaml')
    with open(yaml_path, 'w') as f:
        f.write(yaml_content.strip())
    print(f"`data.yaml` file created at {yaml_path}")

# Function call
if __name__ == "__main__":
    create_data_yaml(
        output_dir="datasets/processed_data",  # Path to processed dataset
        train_path="./train/images",    # Path to training images
        val_path="./val/images",        # Path to validation images
        test_path="./test/images",      # Path to testing images
        classes=[
            'closed_roof_tank',
            'external_floating_roof_tank',
            'narrow_closed_roof_tank',
            'sedimentation_tank',
            'spherical_tank',
            'undefined_object',
            'water_tower',
            'slurry_tank'
        ]  # Unique class names
    )

# MODEL TRAINING

The YOLOv8l model was used in training, with 300 epochs. A relatively small batch size was chosen based on available GPU memory. The learning rate schedule and optimizer were selected for stable convergence.


In [None]:
#training the data

from ultralytics import YOLO

model = YOLO("yolov8l.pt")      #required YOLO model

# Train the model with optimized hyperparameters
model.train(
    data="./processed_data/data.yaml",  # Path to dataset YAML
    epochs=300, #More epochs for better results
    batch=8,  # Adjust based on GPU memory
    imgsz=640,  # Higher resolution for better detections
    lr0=0.001,  # Initial learning rate
    lrf=0.0001,  # Final learning rate
    optimizer="AdamW",  # Use AdamW for stability
    weight_decay=0.0005,  # Regularization
    patience=20,  # Stops if no improvement after 20 epochs
    hsv_h=0.015,  # Hue augmentation
    hsv_s=0.7,  # Saturation augmentation
    hsv_v=0.4,  # Brightness augmentation
    translate=0.1,  # Image translation
    scale=0.5,  # Image scaling
    flipud=0.1,  # Vertical flip probability
    fliplr=0.5,  # Horizontal flip probability
    mosaic=1.0,  # Mosaic augmentation
    mixup=0.1,  # Mixup augmentation
    dropout=0.05,  # Add dropout for better generalization
    multi_scale=True,  # Train with different image sizes
    workers=16,  # More workers for faster data loading
    device=0,  # Use GPU
)

print("Training complete! Check the 'runs/train' folder for results.")

# USING THE TRAINED MODEL FOR INFERENCE

This script run object detection on a folder of JPEG or PNG images using a trained YOLOv8 model. It saves detection results as:

- Annotated images with bounding boxes
- YOLO-format text label files
- Confidence scores
The detections are saved in a specified output directory for easy review or further processing

In [None]:
#for jpeg, png images

from ultralytics import YOLO
import os

def detect_objects():
    # Path to the trained weights
    weights_path = "runs/detect/train/weights/best.pt"

    # Path to the images or folder for detection
    source_path = "runs/detect/predictions/results9/0cb8e0d7-tile18_127992_86433.jpg"

    # Output directory for saving results
    output_dir = "runs/detect/predictions"
    os.makedirs(output_dir, exist_ok=True)

    # Initialize the model
    model = YOLO(weights_path)

    # Run detection
    results = model.predict(
        source=source_path,
        save=True,         # Save predictions
        save_txt=True,     # Save results as YOLO format labels
        save_conf=True,    # Save confidence scores in results
        conf=0.5,          # Confidence threshold for predictions
        iou=0.5,           # IoU threshold for non-max suppression
        project=output_dir, # Folder to save the results
        name="results",    # Subfolder name
        line_width =1,     # Width of bounding box lines
        classes =[7],      #Restricts detection to class ID 7
        device =0          #GPU
    )

    print(f"Detection complete. Results saved to {os.path.join(output_dir, 'results')}")

if __name__ == "__main__":
    detect_objects()
