#Introduction
This notebook is used for training YOLOv7 network for a crack detection on the concrete surfaces. It is trained on the custom dataset that consist of 11 298 pictures of cracks.

#Install Dependencies

_(Remember to choose GPU in Runtime if not already selected. Runtime --> Change Runtime Type --> Hardware accelerator --> GPU)_

In [None]:
!pip install wandb

#Connect to Google Drive where our dataset is located
from google.colab import drive
drive.mount('/content/drive')


# Download YOLOv7 repository and install requirements
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7
!pip install -r requirements.txt



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.15.4-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.25.1-py2.py3-none-any.whl (206 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m206.7/206.7 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting pathtools (from wandb)
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting

In [None]:
#This opens file explorer in Google Colab
from google.colab import files
files.view('.')


<IPython.core.display.Javascript object>

# Split dataset into train/val/test subsets




Check if all images have their corresponding txt labels in dataset folder.

In [None]:
import os

# Path to the data folder
data_path = "/content/drive/MyDrive/CracksDsBig"

# Count the number of images and label files
num_images = 0
num_labels = 0
num_missing_labels = 0

# Loop through the images in the data folder
for image_file in os.listdir(data_path):
    if image_file.endswith(".jpg"):
        num_images += 1
        label_file = os.path.join(data_path, image_file[:-4] + ".txt")
        if os.path.isfile(label_file):
            num_labels += 1
        else:
            num_missing_labels += 1
            print(f'Missing label: {label_file}')

# Print the results
print("Number of images:", num_images)
print("Number of label files:", num_labels)
print("Number of missing label files:", num_missing_labels)

KeyboardInterrupt: ignored

Separate data from a main dataset folder to a train/val/test folders, as YOLOv7 network requires.

In [None]:
#Set split ratio, remaining ratio is for test subset
train_ratio, val_ratio = 0.8, 0.10
limit_number_of_images_to = None # Set to None if you want to use all images
seed = 1

In [None]:
import os
import shutil
import random
import math

# Function to move files to a folder
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            dest_path = os.path.join(destination_folder, os.path.basename(f))
            shutil.copy(f, dest_path)
        except Exception as e:
            print(f"Error copying {f}: {e}")
            assert False



# Path to the data folder on Google Drive
data_path = "/content/drive/MyDrive/CracksDsBig"

# Paths to the train, validation, and test folders
main_path = "/content/drive/MyDrive/CracksDsColab"
train_images_path = main_path + "/images/train"
val_images_path = main_path + "/images/val"
test_images_path = main_path + "/images/test"

train_labels_path = main_path + "/labels/train"
val_labels_path = main_path + "/labels/val"
test_labels_path = main_path + "/labels/test"

# Remove all files from the train, validation, and test folders
if os.path.exists(main_path):
  shutil.rmtree(main_path)

# Create folders for train, validation, and test sets on your Google Drive, skips if they already exist
os.makedirs(train_images_path, exist_ok=True)
os.makedirs(val_images_path, exist_ok=True)
os.makedirs(test_images_path, exist_ok=True)

os.makedirs(train_labels_path, exist_ok=True)
os.makedirs(val_labels_path, exist_ok=True)
os.makedirs(test_labels_path, exist_ok=True)

# Read images and annotations
images = [os.path.join(data_path, x) for x in os.listdir(data_path) if x[-3:] == "jpg"]
annotations = [os.path.join(data_path, x) for x in os.listdir(data_path) if x[-3:] == "txt"]

images.sort()
annotations.sort()

# Split the dataset into train-valid-test splits
random.seed(seed)

indices = list(range(len(images)))
random.shuffle(indices)



if limit_number_of_images_to is not None:
    indices = indices[:limit_number_of_images_to]
    num_images = limit_number_of_images_to
else:
    num_images = len(images)

num_train = math.floor(num_images * train_ratio)
num_val = math.floor(num_images * val_ratio)

train_indices = indices[:num_train]
val_indices = indices[num_train:num_train+num_val]
test_indices = indices[num_train+num_val:]

# Get corresponding images and annotations for each split
train_images = [images[i] for i in train_indices]
train_annotations = [annotations[i] for i in train_indices]

val_images = [images[i] for i in val_indices]
val_annotations = [annotations[i] for i in val_indices]

test_images = [images[i] for i in test_indices]
test_annotations = [annotations[i] for i in test_indices]

# Move the images and annotations to the train, validation, and test folders
move_files_to_folder(train_images, train_images_path)
move_files_to_folder(train_annotations, train_labels_path)
move_files_to_folder(val_images, val_images_path)
move_files_to_folder(val_annotations, val_labels_path)
move_files_to_folder(test_images, test_images_path)
move_files_to_folder(test_annotations, test_labels_path)


In [None]:
import os

# Path to the data folder
main_path = "/content/drive/MyDrive/CracksDsColab"
images_path = main_path + "/images/test"
labels_path = main_path + "/labels/test"

# Count the number of images and label files
num_images = 0
num_labels = 0
num_missing_labels = 0

# Loop through the images in the data folder
for image_file in os.listdir(images_path):
    num_images += 1
    label_file = os.path.join(labels_path, image_file[:-4] + ".txt")
    if os.path.isfile(label_file):
        num_labels += 1
    else:
        num_missing_labels += 1
        print(f'Missing: {label_file}')

# Print the results
print("Number of images:", num_images)
print("Number of label files:", num_labels)
print("Number of missing label files:", num_missing_labels)



Number of images: 1131
Number of label files: 1131
Number of missing label files: 0


# Begin Custom Training

Before we are ready to start training, we must first create data.yaml file where we store some basic informations about our custom dataset.

In [None]:
train_path = "/content/drive/MyDrive/CracksDsColab/images/train"
test_path = "/content/drive/MyDrive/CracksDsColab/images/test"
valid_path = "/content/drive/MyDrive/CracksDsColab/images/val"

nc = 1
names = ["crack"]

print(
    f"train: {train_path}\n"
    f"test: {test_path}\n"
    f"val: {valid_path}\n\n"
    f"nc: {nc}\n"
    f"names: {names}",
)

train: /content/drive/MyDrive/CracksDsColab/images/train
test: /content/drive/MyDrive/CracksDsColab/images/test
val: /content/drive/MyDrive/CracksDsColab/images/val

nc: 1
names: ['crack']


In [None]:
import yaml
with open("data.yaml", "w") as file:
    yaml.dump({
        "train": train_path,
        "test": test_path,
        "val": valid_path,
        "nc": nc,
        "names": [f'{name}' for name in names]
    }, stream=file, default_flow_style=None)

Now, we are ready to start training!

In [None]:
# run this cell to begin training3
%cd /content/yolov7
!python train.py --img-size 448 --cfg cfg/training/yolov7-tiny.yaml --hyp data/hyp.scratch.custom.yaml --batch 32 --epochs 25 --data data.yaml --workers 24 --name yolo_crack_det_v2 --weights yolov7.pt  #--weights ' '


/content/yolov7
2023-06-12 13:19:03.625797: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
YOLOR 🚀 v0.1-126-g84932d7 torch 2.0.1+cu118 CUDA:0 (Tesla T4, 15101.8125MB)

Namespace(weights='yolov7.pt', cfg='cfg/training/yolov7-tiny.yaml', data='data.yaml', hyp='data/hyp.scratch.custom.yaml', epochs=25, batch_size=32, img_size=[448, 448], rect=False, resume=False, nosave=False, notest=False, noautoanchor=False, evolve=False, bucket='', cache_images=False, image_weights=False, device='', multi_scale=False, single_cls=False, adam=False, sync_bn=False, local_rank=-1, workers=24, project='runs/train', entity=None, name='yolo_crack_det_v2', exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, a

After the training is over, download generated files (including trained weights) locally:

In [None]:
from google.colab import files
files.download('/content/yolov7/runs/train/yolo_crack_det_v2/weights/best.pt')
files.download('/content/yolov7/runs/train/yolo_crack_det_v2/weights/last.pt')
files.download('/content/yolov7/runs/train/yolo_crack_det_v2/F1_curve.png')
files.download('/content/yolov7/runs/train/yolo_crack_det_v2/PR_curve.png')
files.download('/content/yolov7/runs/train/yolo_crack_det_v2/P_curve.png')
files.download('/content/yolov7/runs/train/yolo_crack_det_v2/R_curve.png')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!zip -r /content/yolo_gotovo_tren.zip /content/yolov7/runs/train/yolo_crack_det_v22

  adding: content/yolov7/runs/train/yolo_crack_det_v25/ (stored 0%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/test_batch0_pred.jpg (deflated 8%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/test_batch2_labels.jpg (deflated 9%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/test_batch1_labels.jpg (deflated 4%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/train_batch5.jpg (deflated 0%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/events.out.tfevents.1680191348.9d0067626b74.9088.0 (deflated 71%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/P_curve.png (deflated 20%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/test_batch0_labels.jpg (deflated 7%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/opt.yaml (deflated 47%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/train_batch2.jpg (deflated 1%)
  adding: content/yolov7/runs/train/yolo_crack_det_v25/train_batch1.jpg (deflated 0%)
  adding: content/yolov7/r

In [None]:
from google.colab import files
files.download("/content/yolo_gotovo_tren.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#Save progress and weights after training is completed

import wandb
api = wandb.Api()

sweep = api.sweep("diplomski/YOLOR/z9zpmzda")
runs = sorted(sweep.runs,
  key=lambda run: run.summary.get("val_acc", 0), reverse=True)
val_acc = runs[0].summary.get("val_acc", 0)
print(f"Best run {runs[0].name} with {val_acc}% validation accuracy")

runs[0].file("model.h5").download(replace=True)
print("Best model saved to model-best.h5")

IndexError: ignored

# Evaluation

We can evaluate the performance of our custom training using the provided evalution script.

Note we can adjust the below custom arguments. For details, see [the arguments accepted by detect.py](https://github.com/WongKinYiu/yolov7/blob/main/detect.py#L154).

In [None]:
!python test.py --img-size 448 --weights /content/drive/MyDrive/last_full_150epochs_split_0.8_0.1_0.1.pt --data data.yaml --task test --name yolo_det

Namespace(weights=['/content/drive/MyDrive/last_full_150epochs_split_0.8_0.1_0.1.pt'], data='data.yaml', batch_size=32, img_size=448, conf_thres=0.001, iou_thres=0.65, task='test', device='', single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project='runs/test', name='yolo_det', exist_ok=False, no_trace=False, v5_metric=False)
YOLOR 🚀 v0.1-126-g84932d7 torch 2.0.1+cu118 CUDA:0 (Tesla T4, 15101.8125MB)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Model Summary: 314 layers, 36481772 parameters, 6194944 gradients, 103.2 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

[34m[1mtest: [0mScanning '/content/drive/MyDrive/CracksDsColab/labels/test.cache' images and labels... 1131 found, 0 missing, 137 empty, 0 corrupted: 100% 1131/1131 [00:00<?, ?it/

In [None]:
# Run evaluation
#!python detect.py --weights /content/drive/MyDrive/CracksDataset3/weights/best.pt --conf 0.2 --source /content/drive/MyDrive/CracksDataset3/nepoznatiprimjeri

!python detect.py --weights /content/drive/MyDrive/CracksDataset3/weights/best.pt --conf 0.2 --source /content/drive/MyDrive/CracksDataset3/images/test


Namespace(weights=['/content/drive/MyDrive/CracksDataset3/weights/best.pt'], source='/content/drive/MyDrive/CracksDataset3/images/test', img_size=640, conf_thres=0.2, iou_thres=0.45, device='', view_img=False, save_txt=False, save_conf=False, nosave=False, classes=None, agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False, no_trace=False)
YOLOR 🚀 v0.1-122-g3b41c2c torch 2.0.0+cu118 CUDA:0 (Tesla T4, 15101.8125MB)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Model Summary: 314 layers, 36481772 parameters, 6194944 gradients, 103.2 GFLOPS
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

Done. (22.1ms) Inference, (11.8ms) NMS
 The image with the result is saved in: runs/detect/exp/CFD_018.jpg
2 cracks, Done. (22.1ms) Inference, (17.6ms) NMS
 The image with the result is saved

In [1]:
#display inference on ALL test images

import glob
from IPython.display import Image, display

i = 0
limit = 1000 # max images to print
for imageName in glob.glob('/content/yolov7/runs/detect/exp/*.jpg'): #assuming JPG
    if i < limit:
      display(Image(filename=imageName))
      print("\n")
    i = i + 1
