In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Pass this step if you have already unzip the tar.gz files
import tarfile

# Define the paths to your .tar.gz files
file_path_images = '/content/drive/MyDrive/Colab Notebooks/dataset/images.tar.gz'
file_path_bounding_boxes = '/content/drive/MyDrive/Colab Notebooks/dataset/bounding_boxes.tar.gz'

# Extract the contents of the images.tar.gz file
with tarfile.open(file_path_images, 'r:gz') as tar:
    tar.extractall(path='/content/drive/MyDrive/Colab Notebooks/dataset/')

# Extract the contents of the bounding_boxes.tar.gz file
with tarfile.open(file_path_bounding_boxes, 'r:gz') as tar:
    tar.extractall(path='/content/drive/MyDrive/Colab Notebooks/dataset/')

print("Extraction complete for both files!")



Extraction complete for both files!


**Write a function to convert given annotation format in training labels to YOLO annotation
format (The code for Step 1)**

In [None]:
import pandas as pd
import os
from tqdm import tqdm

In [None]:
train_csv = '/content/drive/MyDrive/Colab Notebooks/dataset/Bounding_boxes/train_labels.csv'
test_csv = '/content/drive/MyDrive/Colab Notebooks/dataset/Bounding_boxes/test_labels.csv'


train_img = '/content/drive/MyDrive/Colab Notebooks/dataset/images/train'
test_img = '/content/drive/MyDrive/Colab Notebooks/dataset/images/test'

train_labels = '/content/drive/MyDrive/Colab Notebooks/dataset/labels/train'
test_labels = '/content/drive/MyDrive/Colab Notebooks/dataset/labels/test'

class_mapping = {'Graffiti': 0}

In [None]:
def yolo_annotation_convert(csv_file, images_dir, output_dir, class_mapping):
    df = pd.read_csv(csv_file)  # Load the CSV containing annotations
    grouped_annotations = df.groupby('filename')  # Group annotations by image filename

    # Create the output folder if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through each image and its corresponding annotations
    for image_name, image_annotations in tqdm(grouped_annotations, desc=f'Processing annotations for {csv_file}'):
        image_path = os.path.join(images_dir, image_name)  # Get the full path to the image
        if not os.path.exists(image_path):  # Skip if the image doesn't exist
            continue

        # Get the image dimensions (width and height)
        image_width = image_annotations.iloc[0]['width']
        image_height = image_annotations.iloc[0]['height']

        yolo_annotations = []  # List to store the YOLO annotations

        # Loop through each row in the group of annotations for this image
        for _, row in image_annotations.iterrows():
            class_id = class_mapping[row['class']]  # Get the class ID from the mapping
            xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']

            # Calculate normalized bounding box parameters
            x_center = ((xmin + xmax) / 2) / image_width
            y_center = ((ymin + ymax) / 2) / image_height
            bbox_width = (xmax - xmin) / image_width
            bbox_height = (ymax - ymin) / image_height

            # Format the annotation in YOLO format
            yolo_annotations.append(f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}")

        # Write the annotations to a text file
        txt_filename = os.path.splitext(image_name)[0] + '.txt'  # Output text file name
        with open(os.path.join(output_dir, txt_filename), 'w') as file:
            for annotation in yolo_annotations:
                file.write(annotation + '\n')

In [None]:
yolo_annotation_convert(train_csv, train_img, train_labels, class_mapping)
yolo_annotation_convert(test_csv, test_img, test_labels, class_mapping)

Processing annotations for /content/drive/MyDrive/Colab Notebooks/dataset/Bounding_boxes/train_labels.csv: 100%|██████████| 813/813 [00:05<00:00, 144.07it/s]
Processing annotations for /content/drive/MyDrive/Colab Notebooks/dataset/Bounding_boxes/test_labels.csv: 100%|██████████| 209/209 [00:01<00:00, 111.11it/s]


In [None]:
# count the files
def files_count(dir_path):
  if not os.path.exists(dir_path):
    return 0
  return len([f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))])

train_img_count = files_count(train_img)
train_labels_count = files_count(train_labels)

test_img_count = files_count(test_img)
test_labels_count = files_count(test_labels)

print(f"Number of files in train_img: {train_img_count}")
print(f"Number of files in train_labels: {train_labels_count}")
print(f"Number of files in test_img: {test_img_count}")
print(f"Number of files in test_labels: {test_labels_count}")

Number of files in train_img: 813
Number of files in train_labels: 813
Number of files in test_img: 209
Number of files in test_labels: 209


**Train and create a YOLO model by randomly taking 400 images from train data which can
detect graffiti in the image**

In [None]:
import random
import shutil

In [None]:
# Create new folders for the selected images and their corresponding annotations
selected_train_img_dir = '/content/drive/MyDrive/Colab Notebooks/dataset/images/selected_train'
selected_train_labels_dir = '/content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train'
selected_test_img_dir = '/content/drive/MyDrive/Colab Notebooks/dataset/images/selected_test'
selected_test_labels_dir = '/content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test'


In [None]:
def image_selecting(source_directory, destination_directory, num_img, img_used=set()):
    img = [file for file in os.listdir(source_directory) if (file.endswith('.jpg') or file.endswith('.JPG')) and file not in img_used]
    img_selected = random.sample(img, min(num_img, len(img)))
    img_used.update(img_selected)

    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    # Clear existing files in destination directory
    for file in os.listdir(destination_directory):
        file_path = os.path.join(destination_directory, file)
        if os.path.isfile(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)

    for i in img_selected:
        shutil.copy(os.path.join(source_directory, i), os.path.join(destination_directory, i))
    return img_used


In [None]:
def anno_copy(img_directory, label_destination_directory, label_source_directory):
    if not os.path.exists(label_destination_directory):
        os.makedirs(label_destination_directory)

    # Clear existing files in destination directory
    for file in os.listdir(label_destination_directory):
        file_path = os.path.join(label_destination_directory, file)
        if os.path.isfile(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)

    for image_file in os.listdir(img_directory):
        if image_file.endswith('.jpg') or image_file.endswith('.JPG'):
            root_name = os.path.splitext(image_file)[0]
            annotation_file = root_name + '.txt'
            source_label_path = os.path.join(label_source_directory, annotation_file)
            destination_label_path = os.path.join(label_destination_directory, annotation_file)
            if os.path.exists(source_label_path):
                shutil.copy(source_label_path, destination_label_path)


In [None]:
# Select 400 images from the training set
random.seed(42)
selected_train_img = set()
selected_train_img = image_selecting(train_img, selected_train_img_dir, 400, selected_train_img)
anno_copy(train_img, selected_train_labels_dir, train_labels)

# Select 40 images from the test set
random.seed(42)
selected_test_img = set()
selected_test_img = image_selecting(test_img, selected_test_img_dir, 40, selected_test_img)
anno_copy(test_img, selected_test_labels_dir, test_labels)

In [None]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.78-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

**Train YOLO model with Ultralytics**

In [None]:
from ultralytics import YOLO
import yaml

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [None]:
#  Create YAML file
# Define the paths for the images
train_img_path = os.path.abspath(selected_train_img_dir)
val_img_path = os.path.abspath(selected_test_img_dir)

# Define the YAML file path
yaml_file_path = '/content/drive/MyDrive/Colab Notebooks/dataset/graffiti.yaml'


# Create the data dictionary for the YAML file
data_dict = {
    'train': train_img_path,
    'val': val_img_path,
    'nc': 1,
    'names': ['Graffiti'],

}

# Write the dictionary to the YAML file
with open(yaml_file_path, 'w') as file:
    yaml.dump(data_dict, file, indent=2)

print("YAML file created and saved at:", yaml_file_path)


YAML file created and saved at: /content/drive/MyDrive/Colab Notebooks/dataset/graffiti.yaml


In [None]:
yaml_path = '/content/drive/MyDrive/Colab Notebooks/dataset/graffiti.yaml'

# Load model
model = YOLO("yolo11n.pt")

#Train model
train = model.train(data = yaml_path, epochs = 5, imgsz=640, batch=16, name='graffiti_detection')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|██████████| 5.35M/5.35M [00:00<00:00, 266MB/s]


Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/content/drive/MyDrive/Colab Notebooks/dataset/graffiti.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=graffiti_detection, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, sa

100%|██████████| 755k/755k [00:00<00:00, 89.0MB/s]


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      6640  ultralytics.nn.modules.block.C3k2            [32, 64, 1, False, 0.25]      
  3                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
  4                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  5                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  6                  -1  1     87040  ultralytics.nn.modules.block.C3k2            [128, 128, 1, True]           
  7                  -1  1    295424  ultralytics

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:03<00:00, 130.95it/s]


[34m[1mtrain: [0mNew cache created: /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<00:00, 105.58it/s]


[34m[1mval: [0mNew cache created: /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test.cache
Plotting labels to runs/detect/graffiti_detection/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/graffiti_detection[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5      2.32G      1.505      3.001       1.43         89        640: 100%|██████████| 25/25 [00:10<00:00,  2.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:03<00:00,  1.72s/it]

                   all         40        118     0.0065      0.661      0.145     0.0728






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5      2.22G       1.52      2.281      1.382         45        640: 100%|██████████| 25/25 [00:08<00:00,  3.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.20it/s]

                   all         40        118      0.335      0.111      0.125     0.0574






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5      2.27G      1.483      2.212      1.407         66        640: 100%|██████████| 25/25 [00:10<00:00,  2.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.76it/s]

                   all         40        118      0.316      0.136      0.134     0.0523






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5      2.22G      1.461      2.018      1.378         67        640: 100%|██████████| 25/25 [00:09<00:00,  2.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.74it/s]

                   all         40        118      0.401      0.254      0.251      0.117






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5      2.23G      1.342      1.856      1.303         49        640: 100%|██████████| 25/25 [00:09<00:00,  2.59it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.97it/s]

                   all         40        118       0.48      0.352      0.356      0.215






5 epochs completed in 0.018 hours.
Optimizer stripped from runs/detect/graffiti_detection/weights/last.pt, 5.5MB
Optimizer stripped from runs/detect/graffiti_detection/weights/best.pt, 5.5MB

Validating runs/detect/graffiti_detection/weights/best.pt...
Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLO11n summary (fused): 100 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  3.61it/s]


                   all         40        118      0.478      0.347      0.355      0.214
Speed: 0.2ms preprocess, 3.1ms inference, 0.0ms loss, 2.6ms postprocess per image
Results saved to [1mruns/detect/graffiti_detection[0m


In [None]:
import torch
from torchvision.ops import box_iou
from PIL import Image
import csv

In [None]:
def cal_bb_IoU(pred_bboxeses, gt_bboxeses):
    # Convert boxes to tensors
    pred_bboxes = torch.tensor(pred_bboxeses)  # Corrected variable name
    gt_bboxes = torch.tensor(gt_bboxeses)

    # Calculate the (x1, y1) coordinates of the intersection rectangle
    inter_x1 = torch.max(pred_bboxes[0], gt_bboxes[0])
    inter_y1 = torch.max(pred_bboxes[1], gt_bboxes[1])

    # Calculate the (x2, y2) coordinates of the intersection rectangle
    inter_x2 = torch.min(pred_bboxes[2], gt_bboxes[2])
    inter_y2 = torch.min(pred_bboxes[3], gt_bboxes[3])

    # Calculate the width and height of the intersection rectangle
    inter_width = (inter_x2 - inter_x1).clamp(min=0)
    inter_height = (inter_y2 - inter_y1).clamp(min=0)

    # Calculate the area of the intersection rectangle
    inter_area = inter_width * inter_height

    # Calculate the area of the predicted and true bounding boxes
    pred_area = (pred_bboxes[2] - pred_bboxes[0]) * (pred_bboxes[3] - pred_bboxes[1])
    gt_area = (gt_bboxes[2] - gt_bboxes[0]) * (gt_bboxes[3] - gt_bboxes[1])

    # Calculate the area of the union of the two bounding boxes
    union_area = pred_area + gt_area - inter_area

    # Avoid division by zero if union area is 0
    if union_area == 0:
        return 0.0

    # Calculate the IoU score
    iou = inter_area / union_area
    return iou.item()

In [None]:
def model_evaluation(model, images_dir, labels_dir, output_img_dir = None, IoU_threshold=0.5):
    eva_results = []  # Initialize a list to collect evaluation data for each image

    # Create a list of image files with '.jpg' extension in the images directory
    img_files = [f for f in os.listdir(images_dir) if f.lower().endswith(('.jpg'))]

    # Iterate over each image file and show a progress bar with tqdm
    for img_file in tqdm(img_files, desc="Evaluating....."):
        img_path = os.path.join(images_dir, img_file)
        label_file = os.path.splitext(img_file)[0] + '.txt'
        label_path = os.path.join(labels_dir, label_file)

        ground_truth_boxes = []

        # If a corresponding label file is found, read the ground truth bounding boxes
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    components = line.strip().split()
                    # Ensure the line is properly formatted (class, x_center, y_center, width, height)
                    if len(components) != 5:
                        continue
                    # Convert the bounding box details and class into appropriate float values
                    cls, x_center, y_center, width, height = map(float, components)
                    img = Image.open(img_path)
                    img_width, img_height = img.size
                    # Calculate pixel coordinates from normalized bounding box values
                    x1 = (x_center - width / 2) * img_width
                    y1 = (y_center - height / 2) * img_height
                    x2 = (x_center + width / 2) * img_width
                    y2 = (y_center + height / 2) * img_height
                    # Add the true bounding box to the list of ground truth
                    ground_truth_boxes.append([x1, y1, x2, y2])

        # Get predictions for the current image from the model
        predictions = model.predict(img_path, conf = IoU_threshold, verbose = False)
        predicted_boxes = []
        confs = []

        # Loop through model predictions to extract bounding boxes and their associated conf scores
        for pred in predictions:
            if len(pred.boxes) > 0:
                predicted_boxes.append(pred.boxes.xyxy[0].tolist())  # Extract box coordinates
                confs.append(pred.boxes.conf[0].item())  # Extract conf score

        # If both predicted boxes and ground truth boxes exist, evaluate the Intersection over Union (IoU)
        if predicted_boxes and ground_truth_boxes:
            best_IoU = 0.0
            best_confidence = 0.0
            # Compare each predicted box with the ground truth boxes to find the best matching IoU
            for pred_box, conf in zip(predicted_boxes, confs):
                for b in ground_truth_boxes:
                    # Calculate the IoU between the predicted and ground truth boxes
                    IoU = cal_bb_IoU(pred_box, b)
                    # Keep track of the best IoU and associated conf
                    if IoU > best_IoU:
                        best_IoU = IoU
                        best_confidence = conf
            # Add the evaluation results for the current image to the list
            eva_results.append({
                'image_name': img_file,
                'confidence value': best_confidence,
                'IoU value': best_IoU
            })

        # Case: There are predictions but no ground truth (false positive case)
        elif predicted_boxes and not ground_truth_boxes:
            eva_results.append({
                'image_name': img_file,
                'confidence value': confs[0],  # Use the first confidence value
                'IoU value': 0.0  # No ground truth to calculate IoU
            })

        # Case: No predictions or incorrect predictions (false negative or wrong predictions)
        else:
            eva_results.append({
                'image_name': img_file,
                'confidence value': 0.0,
                'IoU value': 0.0
            })

        # If an output directory is specified and predictions exist, save the output images
        if output_img_dir and predicted_boxes:
            os.makedirs(output_img_dir, exist_ok=True)

    # Convert the collected evaluation results into a DataFrame for easy analysis
    df = pd.DataFrame(eva_results)
    return df


In [None]:
eva_image = '/content/drive/MyDrive/Colab Notebooks/dataset/eva_img' # evaluation images path

 # Create the output folder if it doesn't exist
if not os.path.exists(eva_image):
  os.makedirs(eva_image)

df_results = model_evaluation(model, selected_test_img_dir , selected_test_labels_dir , eva_image)
df_results.to_csv(f'{eva_image}/evaluation_results.csv')
print("The CSV file is saved to: ", f'{eva_image}/evaluation_results.csv')


Evaluating.....: 100%|██████████| 40/40 [00:01<00:00, 28.64it/s]

The CSV file is saved to:  /content/drive/MyDrive/Colab Notebooks/dataset/eva_img/evaluation_results.csv





**Iteratively train and test the model with a new
set of 400 training and 40 test images**

In [None]:
import cv2

acc = 0.9 # target accuracy for new training and test images
iteration = 1

random.seed(42)
new_selected_train = set()
new_selected_test = set()

# Specify model save directory in Google Drive
model_save_dir = '/content/drive/MyDrive/Colab Notebooks/dataset/model'

 # Create the output folder if it doesn't exist
if not os.path.exists(model_save_dir):
     os.makedirs(model_save_dir)




In [None]:
while True:
    print(f"----Iterating...... {iteration}----")

    new_selected_train_img = image_selecting(train_img, selected_train_img_dir, 400, new_selected_train)
    anno_copy(train_img, selected_train_labels_dir, train_labels)
    new_selected_test_img = image_selecting(test_img, selected_test_img_dir, 40, new_selected_test)
    anno_copy(test_img, selected_test_labels_dir, test_labels)

    # Define YAML file for the current iteration
    yaml_file = f'/content/drive/MyDrive/Colab Notebooks/dataset/graffiti_{iteration}.yaml'

   # Prepare data to be written into the YAML file
    yaml_data = {
     'train': os.path.abspath(selected_train_img_dir),
     'val': os.path.abspath(selected_test_img_dir),
     'nc': 1,  # Number of classes
     'names': ['Graffiti']  # Class names
   }

# Write the data into the YAML file
    with open(yaml_file, 'w') as file:
      yaml.dump(yaml_data, file, indent=2)

    print("--------Train model part----------")

    train_model = model.train(
            data = yaml_file,
            epochs = 4,
            imgsz = 640,
            batch = 16,
            name = f'graffiti_detection_iter_{iteration}',
    )

    temporary_path = f'runs/detect/graffiti_detection_iter_{iteration}'

    path_for_best_pt = os.path.join(temporary_path, 'weights', 'best.pt')

    best_pt_iteration_path = os.path.join(model_save_dir, f'graffiti_detection_iter_{iteration}.pt')
    os.makedirs(os.path.dirname(best_pt_iteration_path), exist_ok=True)

    shutil.copy(path_for_best_pt, best_pt_iteration_path)
    print(f"The best.pt is saved for iteration {iteration} at {best_pt_iteration_path}")

    if not os.path.exists(path_for_best_pt):
        raise FileNotFoundError(f"The best model is not found at {path_for_best_pt}")

     # Load the best.pt of the current iteration for the next iteration
    model = YOLO(best_pt_iteration_path)

    df_results = model_evaluation(model, selected_test_img_dir, selected_test_labels_dir, eva_image)
    df_results.to_csv(f'{eva_image}/eva_results_iter_{iteration}.csv')

    best_result = f'{eva_image}/eval_{iteration}'
    os.makedirs(best_result, exist_ok=True)

    # Identify the two images with the highest IoU values
    best_two_results = df_results.nlargest(2, 'confidence value')

    # Save the images with the predicted bounding boxes to the designated folder
    for idx, row in best_two_results.iterrows():
        image_name = row['image_name']
        image_path = os.path.join(selected_test_img_dir, image_name)

        # Make predictions using the model
        predictions = model.predict(image_path, conf = 0.25)

        # Save the annotated image to the best result folder
        annotated_frame = predictions[0].plot()

        # Save the image with bounding boxes
        output_path = os.path.join(best_result, image_name)
        cv2.imwrite(output_path, annotated_frame)

    # Compute the accuracy for images with IoU above the threshold of 0.8
    accuracy = (df_results['IoU value'] >= 0.8).mean()
    print(f"Iteration {iteration} with the accuracy (IoU >= 0.8): {accuracy * 100:.2f}%")

    iteration += 1
    # Verify if the performance target is met or if all images have been processed
    if accuracy >= acc or (len(new_selected_train) == files_count(train_img) or len(new_selected_test) == files_count(test_img)):
        print(f"Requirement satisfied.")
        break

df_results.to_csv(f'{eva_image}/eva_results.csv', index=False)
print("Complete the iterative training and test")


----Iterating...... 1----
--------Train model part----------
Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/content/drive/MyDrive/Colab Notebooks/dataset/graffiti_1.yaml, epochs=4, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=graffiti_detection_iter_1, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=No

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train.cache... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<00:00, 70.75it/s]

[34m[1mval: [0mNew cache created: /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test.cache





Plotting labels to runs/detect/graffiti_detection_iter_1/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/graffiti_detection_iter_1[0m
Starting training for 4 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/4      2.35G      2.537      2.926      2.226         89        640: 100%|██████████| 25/25 [00:10<00:00,  2.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:01<00:00,  1.81it/s]

                   all         40         98      0.235      0.112     0.0789     0.0416






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/4      2.24G      1.914      2.337      1.699         45        640: 100%|██████████| 25/25 [00:08<00:00,  3.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.68it/s]

                   all         40         98      0.423      0.245      0.229      0.127






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/4      2.28G      1.743      2.141      1.566         66        640: 100%|██████████| 25/25 [00:09<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.46it/s]

                   all         40         98      0.393      0.367      0.318      0.185






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/4      2.23G       1.64      2.015       1.49         67        640: 100%|██████████| 25/25 [00:09<00:00,  2.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.88it/s]


                   all         40         98      0.598      0.357      0.401      0.234

4 epochs completed in 0.015 hours.
Optimizer stripped from runs/detect/graffiti_detection_iter_1/weights/last.pt, 5.5MB
Optimizer stripped from runs/detect/graffiti_detection_iter_1/weights/best.pt, 5.5MB

Validating runs/detect/graffiti_detection_iter_1/weights/best.pt...
Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLO11n summary (fused): 100 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  3.51it/s]


                   all         40         98      0.598      0.357      0.401      0.235
Speed: 0.2ms preprocess, 2.5ms inference, 0.0ms loss, 2.6ms postprocess per image
Results saved to [1mruns/detect/graffiti_detection_iter_1[0m
The best.pt is saved for iteration 1 at /content/drive/MyDrive/Colab Notebooks/dataset/model/graffiti_detection_iter_1.pt


Evaluating.....: 100%|██████████| 40/40 [00:01<00:00, 24.28it/s]


Iteration 1 with the accuracy (IoU >= 0.8): 37.50%
----Iterating...... 2----
--------Train model part----------
Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/content/drive/MyDrive/Colab Notebooks/dataset/model/graffiti_detection_iter_1.pt, data=/content/drive/MyDrive/Colab Notebooks/dataset/graffiti_2.yaml, epochs=4, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=graffiti_detection_iter_2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_str

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:02<00:00, 160.22it/s]


[34m[1mtrain: [0mNew cache created: /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<00:00, 74.02it/s]


[34m[1mval: [0mNew cache created: /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test.cache
Plotting labels to runs/detect/graffiti_detection_iter_2/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/graffiti_detection_iter_2[0m
Starting training for 4 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/4      2.35G      1.818      2.217      1.584         76        640: 100%|██████████| 25/25 [00:11<00:00,  2.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.48it/s]

                   all         40         72      0.401      0.472      0.404      0.239






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/4      2.24G      1.729      2.056      1.516         82        640: 100%|██████████| 25/25 [00:10<00:00,  2.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.40it/s]

                   all         40         72      0.579      0.375      0.409      0.235






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/4      2.25G      1.673      1.992      1.461         65        640: 100%|██████████| 25/25 [00:10<00:00,  2.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  3.06it/s]

                   all         40         72      0.498      0.444      0.454      0.261






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/4      2.28G      1.624      1.937      1.478         62        640: 100%|██████████| 25/25 [00:08<00:00,  2.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.13it/s]

                   all         40         72      0.542      0.486      0.483      0.294






4 epochs completed in 0.015 hours.
Optimizer stripped from runs/detect/graffiti_detection_iter_2/weights/last.pt, 5.5MB
Optimizer stripped from runs/detect/graffiti_detection_iter_2/weights/best.pt, 5.5MB

Validating runs/detect/graffiti_detection_iter_2/weights/best.pt...
Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLO11n summary (fused): 100 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.76it/s]


                   all         40         72       0.54      0.486      0.483      0.294
Speed: 0.2ms preprocess, 3.4ms inference, 0.0ms loss, 3.6ms postprocess per image
Results saved to [1mruns/detect/graffiti_detection_iter_2[0m
The best.pt is saved for iteration 2 at /content/drive/MyDrive/Colab Notebooks/dataset/model/graffiti_detection_iter_2.pt


Evaluating.....: 100%|██████████| 40/40 [00:01<00:00, 35.03it/s]


Iteration 2 with the accuracy (IoU >= 0.8): 35.00%
----Iterating...... 3----
--------Train model part----------
Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/content/drive/MyDrive/Colab Notebooks/dataset/model/graffiti_detection_iter_2.pt, data=/content/drive/MyDrive/Colab Notebooks/dataset/graffiti_3.yaml, epochs=4, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=graffiti_detection_iter_3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_str

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train... 13 images, 0 backgrounds, 0 corrupt: 100%|██████████| 13/13 [00:00<00:00, 171.96it/s]

[34m[1mtrain: [0mNew cache created: /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<00:00, 45.55it/s]


[34m[1mval: [0mNew cache created: /content/drive/MyDrive/Colab Notebooks/dataset/labels/selected_test.cache
Plotting labels to runs/detect/graffiti_detection_iter_3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/graffiti_detection_iter_3[0m
Starting training for 4 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/4      1.86G      1.757      1.993       1.42         79        640: 100%|██████████| 1/1 [00:00<00:00,  2.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.76it/s]

                   all         40         73      0.531      0.325      0.323      0.172






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/4      1.88G      1.565      1.844      1.508         42        640: 100%|██████████| 1/1 [00:00<00:00,  4.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  3.16it/s]

                   all         40         73      0.535      0.342      0.353      0.197






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/4      1.86G       1.84      2.071      1.416         64        640: 100%|██████████| 1/1 [00:00<00:00,  4.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:01<00:00,  1.69it/s]

                   all         40         73      0.531      0.342      0.375      0.211






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/4      1.87G       1.76      1.897      1.463         77        640: 100%|██████████| 1/1 [00:00<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  2.56it/s]

                   all         40         73      0.568       0.37      0.398      0.226






4 epochs completed in 0.004 hours.
Optimizer stripped from runs/detect/graffiti_detection_iter_3/weights/last.pt, 5.5MB
Optimizer stripped from runs/detect/graffiti_detection_iter_3/weights/best.pt, 5.5MB

Validating runs/detect/graffiti_detection_iter_3/weights/best.pt...
Ultralytics 8.3.78 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLO11n summary (fused): 100 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  3.61it/s]


                   all         40         73      0.567       0.37      0.397      0.226
Speed: 0.2ms preprocess, 3.3ms inference, 0.0ms loss, 3.0ms postprocess per image
Results saved to [1mruns/detect/graffiti_detection_iter_3[0m
The best.pt is saved for iteration 3 at /content/drive/MyDrive/Colab Notebooks/dataset/model/graffiti_detection_iter_3.pt


Evaluating.....: 100%|██████████| 40/40 [00:01<00:00, 25.31it/s]


Iteration 3 with the accuracy (IoU >= 0.8): 17.50%
Requirement satisfied.
Complete the iterative training and test


**Use your final model to detect graffiti in real-time video data.**

In [None]:
# Load the final model for detection
model_path = '/content/drive/MyDrive/Colab Notebooks/dataset/model/graffiti_detection_iter_3.pt'
model = YOLO(model_path)

In [None]:
# path where i store 5 videos on Google Drive
video_directory = '/content/drive/MyDrive/Colab Notebooks/dataset/videos'

In [None]:
# List all the .mp4 video files in the directory
video_files = [f for f in os.listdir(video_directory) if f.endswith('.mp4')]

In [None]:
# Function to process and detect graffiti in video frames
def process_video(video_path, model):
    # Open the video file
    cap = cv2.VideoCapture(video_path)

    # Set the output video parameters (same resolution as the input)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    output_path = video_path.replace('.mp4', '_processed.avi')
    out = cv2.VideoWriter(output_path, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Make predictions on the current frame
        results = model.predict(frame, conf=0.25)  # Adjust confidence threshold if necessary

        # Plot the results on the frame (bounding boxes)
        annotated_frame = results[0].plot()  # Add bounding boxes to the frame

        # Write the processed frame to the output video
        out.write(annotated_frame)

        # To display the frame in Colab
        from matplotlib import pyplot as plt
        plt.imshow(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    return output_path  # Return path to processed video

In [None]:
# List to hold the download links for processed videos
download_links = []

# Process all video files in the directory
for video_file in video_files:
    video_path = os.path.join(video_directory, video_file)
    print(f'Processing video: {video_file}')
    output_video = process_video(video_path, model)
    print(f'Processed video saved as: {output_video}')

    # Add the output video to the download links list
    download_links.append(output_video)

# Provide download links for all processed videos
from google.colab import files
for output_video in download_links:
    files.download(output_video)  # This will prompt the download of each processed video

[1;30;43mThis cell output is too large and can only be displayed while logged in.[0m
