# Initialize constants

Set BASE_MODEL according to [Pretrained Checkpoints](https://github.com/ultralytics/yolov5/releases)

In [1]:
PROJECT_NAME = "yolov5_train"
BASE_MODEL = "yolov5m6.pt"
TRAIN_BATCH = 80
TRAIN_EPOCHS = 100
VAL_BATCH = 64
print('Initializtion Done!')

Initializtion Done!


# Clone yolov5 repo

In [4]:
!rm -rf /kaggle/working/yolov5
!git clone https://github.com/ultralytics/yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 16525, done.[K
remote: Total 16525 (delta 0), reused 0 (delta 0), pack-reused 16525[K
Receiving objects: 100% (16525/16525), 15.00 MiB | 25.56 MiB/s, done.
Resolving deltas: 100% (11361/11361), done.


In [5]:
%cd /kaggle/working/yolov5
!pip install -r requirements.txt

/kaggle/working/yolov5
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.0.232 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.1.43-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.4/40.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Downloading ultralytics-8.1.43-py3-none-any.whl (749 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m749.5/749.5 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.1.43


# Import libraries

This notebook contains steps to train and evaluate yolov5 model with custom data from scratch. 

Steps to reproduce:
1. Collect lots of images.
2. Label images using labeling tool.
4. Train model and get weights file.
5. Initialize model with weights file & use it.

In [6]:
import torch
from yolov5 import utils
import torch
from IPython import display
from IPython.display import clear_output
from pathlib import Path
import yaml
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
import io
import os
import cv2
import json
import shutil
import numpy as np
from sklearn.model_selection import train_test_split

%matplotlib inline

# Convert data to yolov5 Pytorch format

Prepare data from Label Studio yolov5 darknet format to pytorch yolov5

In [7]:
IMAGES_PATH = "/kaggle/input/plant-disease-dataset/images"
LABELS_PATH = "/kaggle/input/plant-disease-dataset/labels"
NOTES_PATH = "/kaggle/input/plant-disease-dataset/notes.json"

In [8]:
# Read labels
labels = os.listdir(LABELS_PATH)

# Split data
train, test = train_test_split(labels, test_size=0.15, shuffle=True)
valid, test = train_test_split(test, test_size=0.2)

print(f"train: {len(train)}; valid: {len(valid)}; test: {len(test)}")

train: 6784; valid: 958; test: 240


Make dirs for pytorch dataset format

In [9]:
os.makedirs("test/images")
os.makedirs("test/labels")
os.makedirs("train/images")
os.makedirs("train/labels")
os.makedirs("valid/images")
os.makedirs("valid/labels")

In [10]:
def move_files_to_dir(files, dirname):
    for label_filename in files:
        image_filename = f"{label_filename[:-4]}.jpg"
        shutil.copy(f"{IMAGES_PATH}/{image_filename}", f"{dirname}/images/{image_filename}")
        shutil.copy(f"{LABELS_PATH}/{label_filename}", f"{dirname}/labels/{label_filename}")

# Move splits to folders
move_files_to_dir(train, "train")
move_files_to_dir(test, "test")
move_files_to_dir(valid, "valid")

Convert yolov5-darknet to yolov5-pytorch description file

In [11]:
descr_darknet = json.load(open(NOTES_PATH))

train_path = "../train/images"
test_path = "../test/images"
valid_path = "../valid/images"

nc = len(descr_darknet["categories"])
names = [category["name"] for category in descr_darknet["categories"]]

print(
    f"train: {train_path}\n"
    f"test: {test_path}\n"
    f"val: {valid_path}\n\n"
    f"nc: {nc}\n"
    f"names: {names}",
)

train: ../train/images
test: ../test/images
val: ../valid/images

nc: 20
names: ['chilli antracnose', 'chilli bacterial leaf spot', 'chilli mosaic leaf virus', 'eggplant cercospora leaf spot', 'eggplant colorado potato beetle', 'eggplant fruit rot', 'eggplant fuit rot', 'eggplant healthy fruit', 'eggplant healthy leaf', 'healthy chilli leaf', 'healthy chilli', 'potato alternaria solani leaf', 'potato common scab fruit', 'potato healthy fruit', 'potato healthy leaf', 'potato pythopthora infestans leaf', 'tomato antracnose', 'tomato bacterial spot', 'tomato healthy', 'tomato late blight leaf']


In [12]:
with open("data.yaml", "w") as file:
    yaml.dump({
        "train": train_path,
        "test": test_path,
        "val": valid_path,
        "nc": nc,
        "names": [f'{name}' for name in names]
    }, stream=file, default_flow_style=None)

In [13]:
print("Now we are ready to train yolov5 model")
! ls 

Now we are ready to train yolov5 model
CITATION.cff	 benchmarks.py	export.py	  segment	  utils
CONTRIBUTING.md  classify	hubconf.py	  test		  val.py
LICENSE		 data		models		  train		  valid
README.md	 data.yaml	pyproject.toml	  train.py
README.zh-CN.md  detect.py	requirements.txt  tutorial.ipynb


# Train yolov5

In [14]:
!python train.py --batch-size $TRAIN_BATCH --epochs $TRAIN_EPOCHS --data "data.yaml" --weights $BASE_MODEL --project $PROJECT_NAME --name 'feature_extraction' --cache --freeze 12

2024-04-06 14:56:07.292932: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-06 14:56:07.293055: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-06 14:56:07.427858: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5m6.pt, cfg=, data=data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epo

# Validation

In [15]:
WEIGHTS_BEST = f"{PROJECT_NAME}/feature_extraction/weights/best.pt"
! python val.py --weights $WEIGHTS_BEST --batch $VAL_BATCH --data 'data.yaml' --task test --project $PROJECT_NAME --name 'validation_on_test_data' --augment

[34m[1mval: [0mdata=data.yaml, weights=['yolov5_train/feature_extraction/weights/best.pt'], batch_size=64, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=test, device=, workers=8, single_cls=False, augment=True, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=yolov5_train, name=validation_on_test_data, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-295-gac6c4383 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla P100-PCIE-16GB, 16276MiB)

Fusing layers... 
Model summary: 276 layers, 35358588 parameters, 0 gradients, 49.1 GFLOPs
[34m[1mtest: [0mScanning /kaggle/working/yolov5/test/labels... 240 images, 0 backgrounds, [0m
[34m[1mtest: [0mNew cache created: /kaggle/working/yolov5/test/labels.cache
                 Class     Images  Instances          P          R      mAP50   
                   all        240        321      0.924      0.959      0.967      0.909
     chilli antracnose        240         29      0.917      0

# Test detection

In [16]:
! python detect.py --weights $WEIGHTS_BEST --conf 0.6 --source 'test/images' --project $PROJECT_NAME --name 'detect_test' --augment --line=3

[34m[1mdetect: [0mweights=['yolov5_train/feature_extraction/weights/best.pt'], source=test/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.6, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=True, visualize=False, update=False, project=yolov5_train, name=detect_test, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-295-gac6c4383 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla P100-PCIE-16GB, 16276MiB)

Fusing layers... 
Model summary: 276 layers, 35358588 parameters, 0 gradients, 49.1 GFLOPs
image 1/240 /kaggle/working/yolov5/test/images/-117-edit_jpg.rf.8a4db31a2e26142a19d8939599271fb7.jpg: 640x640 1 eggplant colorado potato beetle, 152.7ms
image 2/240 /kaggle/working/yolov5/test/images/-131-edit_jpg.rf.cce2afe11f4db1ff4ab768767f29bdee.jpg: 640x640 1 eggplant colorado potato bee

In [17]:
def read_images(dirpath):
  images = []
  for img_filename in os.listdir(dirpath):
    images.append(mpimg.imread(f"{dirpath}/{img_filename}"))
  return images

In [18]:
def label_test_images(test_images_path, test_labels_path, classes):
  test_images = os.listdir(test_images_path)
  labeled_images = []

  for idx, test_image_filename in enumerate(test_images):
    image = cv2.imread(f"{test_images_path}/{test_image_filename}")
    
    x_shape, y_shape = image.shape[1], image.shape[0]

    test_label_filename = f"{test_image_filename[:-4]}.txt"
    
    with open(f"{test_labels_path}/{test_label_filename}", "r") as f:
      lines = f.readlines()

      for line in lines:
        # Parse line
        box = line.split()
        class_idx = box[0]
        
        class_name = names[int(class_idx)]
        x_center, y_center, box_w, box_h = int(float(box[1])*x_shape), int(float(box[2])*y_shape), int(float(box[3])*x_shape), int(float(box[3])*y_shape)
        x1, y1, x2, y2 = x_center-int(box_w/2), y_center-int(box_h/2), x_center+int(box_w/2), y_center+int(box_h/2)

        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 3)
        cv2.putText(image, class_name, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 3)

    labeled_images.append(image)

  return labeled_images

In [19]:
detect_path = f"{PROJECT_NAME}/detect_test"
test_images_path = f"test/images"
test_labels_path = f"test/labels"

detected_images = read_images(detect_path)
test_labeled_images = label_test_images(test_images_path, test_labels_path, classes=names)

stacked_images = [np.hstack([detected_images[idx], test_labeled_images[idx]]) for idx in range(len(detected_images))]

In [None]:
for image in stacked_images:
  fig = plt.figure(figsize=(40, 15))
  ax1 = fig.add_subplot(2,2,1)
  ax1.imshow(image)

# Save model

To save your model just download best.pt file from PROJECT_FOLDER -> feature_extraction (your best) -> weights -> best.pt

File best.pt will be used to load it in your project to predict.