# **How to Train YOLOv7 on a Custom Dataset**

# **1. Setting up Dependencies and paths**

In [1]:
!pip install roboflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting roboflow
  Downloading roboflow-0.2.21-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 136 kB/s 
[?25hCollecting wget
  Downloading wget-3.2.zip (10 kB)
Collecting pyparsing==2.4.7
  Downloading pyparsing-2.4.7-py2.py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 3.4 MB/s 
Collecting certifi==2021.5.30
  Downloading certifi-2021.5.30-py2.py3-none-any.whl (145 kB)
[K     |████████████████████████████████| 145 kB 65.8 MB/s 
Collecting python-dotenv
  Downloading python_dotenv-0.21.0-py3-none-any.whl (18 kB)
Collecting requests-toolbelt
  Downloading requests_toolbelt-0.10.1-py2.py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 961 kB/s 
Collecting chardet==4.0.0
  Downloading chardet-4.0.0-py2.py3-none-any.whl (178 kB)
[K     |████████████████████████████████| 178 kB 70.4 MB/s 
Collecting cycler==0.10.0

In [2]:
import os
from pathlib import Path
import sys
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# configure the project directory
project_dir = "project_research"
BASE_DIR = Path(Path.cwd(), "drive/MyDrive/demolabs", project_dir)

In [5]:
PRETRAINED_MODEL_NAME = [
    "yolov7.pt"
]
# PRETRAINED_MODEL_NAME = [
#     "yolov7.pt", "yolov7x.pt", "yolov7-w6.pt", "yolov7-e6.pt", "yolov7-d6.pt", "yolov7-e6e.pt"
# ]

In [6]:
paths = {
    'WORKSPACE_PATH': Path(BASE_DIR, 'workspace'),
    'SCRIPTS_PATH': Path(BASE_DIR,'scripts'),
    'ANNOTATION_PATH': Path(BASE_DIR, 'workspace','annotations'),
    'IMAGE_PATH': Path(BASE_DIR, 'workspace','images'),
    'RESULTS_PATH': Path(BASE_DIR, 'workspace','results'),
    'DETECTOR_PATH': Path(BASE_DIR, 'workspace','yolov7'),
}

In [7]:
for path in paths.values():
    if not os.path.exists(path):
        if os.name == 'posix': #linux
            !mkdir -p {path}
        if os.name == 'nt': # windows
            !mkdir {path}

## 1.1 Clone repo Yolov7

In [8]:
!git clone https://github.com/WongKinYiu/yolov7.git {paths["DETECTOR_PATH"]}

fatal: destination path '/content/drive/MyDrive/demolabs/project_research/workspace/yolov7' already exists and is not an empty directory.


In [9]:
%%bash
cd {paths["DETECTOR_PATH"]}
wget https://raw.githubusercontent.com/WongKinYiu/yolov7/u5/requirements.txt
pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting thop
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: jedi, thop
Successfully installed jedi-0.18.2 thop-0.1.1.post2209072238


bash: line 1: cd: {paths[DETECTOR_PATH]}: No such file or directory
--2022-12-08 17:27:09--  https://raw.githubusercontent.com/WongKinYiu/yolov7/u5/requirements.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1032 (1.0K) [text/plain]
Saving to: ‘requirements.txt’

     0K .                                                     100% 49.7M=0s

2022-12-08 17:27:10 (49.7 MB/s) - ‘requirements.txt’ saved [1032/1032]



In [10]:
paths["WEIGHTS_PATH"] = paths["DETECTOR_PATH"] / "weights"

for path in paths.values():
    if not os.path.exists(path):
        if os.name == 'posix': #linux
            !mkdir -p {path}
        if os.name == 'nt': # windows
            !mkdir {path}

In [11]:
# detector directory in system path 
sys.path.append(str(paths["DETECTOR_PATH"]))

## 1.2 Download pre-trained models

In [12]:
for model in PRETRAINED_MODEL_NAME:
  !wget -P {paths["WEIGHTS_PATH"]} {"https://github.com/WongKinYiu/yolov7/releases/download/v0.1/" + model}

--2022-12-08 17:28:03--  https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/b0243edf-9fb0-4337-95e1-42555f1b37cf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20221208%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20221208T172803Z&X-Amz-Expires=300&X-Amz-Signature=636d84720751c4a3bb1669a76e8a0473023ac244820c7b874bcdb5bd520ccf58&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=511187726&response-content-disposition=attachment%3B%20filename%3Dyolov7.pt&response-content-type=application%2Foctet-stream [following]
--2022-12-08 17:28:03--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/b0243edf-9fb0-4337-95e1-42555f1b37cf?X-Amz-Algorithm=A

# **2. Getting Our Dataset**

If you haven't followed the link to dataset given in description, here it is again [waste segregation](https://universe.roboflow.com/dark-mqa4m/waste-segregation-3ykjs)

- Follow the link and sign in to your Roboflow account. If you haven't signed up before, first sign up and then sign in
- Once you are login, click the **Download this Dataset** tab in the top right corner
- A dialogue box will open, select the YOLOv7 format, check the **Show download code** option and press continue.
- A download code will appear 

In [None]:
%cd {paths["DETECTOR_PATH"]}
#### ROBOFLOW DATASET DOWNLOAD CODE #####

from roboflow import Roboflow
rf = Roboflow(api_key="")
project = rf.workspace("dark-mqa4m").project("waste-segregation-3ykjs")
dataset = project.version(3).download("yolov7")

/content/drive/MyDrive/demolabs/project_research/workspace/yolov7
loading Roboflow workspace...
loading Roboflow project...
Downloading Dataset Version Zip in waste-segregation-3 to yolov7pytorch: 100% [291827498 / 291827498] bytes


Extracting Dataset Version Zip to waste-segregation-3 in yolov7pytorch::  46%|████▋     | 5399/11646 [49:44<1:08:37,  1.52it/s]

# **3. Run YOLOv7 Training**

# 3.1 Getting our pretrained model, you can choose any model from below to fine-tune


In [None]:
pretrained_model = PRETRAINED_MODEL_NAME[0]
pretrained_model_dir = paths["WEIGHTS_PATH"] / pretrained_model


# 3.2 Start Training

**Note**

[To get the full list of training arguments follow the link](https://github.com/WongKinYiu/yolov7/blob/main/train.py)

Some important arguments to know
- **configuration**: In the main yolov7 folder go to cfg/training folder and select the path of appropriate configuration file. Give the relative path to the file in **--cfg** argument
- **data** the path to data folder, it will be automatically catered 
- **weights** path to pretrained weights given by **--weights** argument


<br><br>

**Note for resuming training from checkpoint** <br>
By default, the checkpoints for the epoch are stored in folder, yolov7/runs/train, give the relative path to last epoch checkpoints

In [None]:
dataset.location

In [None]:
%cd {paths["DETECTOR_PATH"]}
!python train.py --batch 16 --cfg cfg/training/yolov7.yaml --epochs 10 --data {dataset.location}/data.yaml --weights 'weights/yolov7.pt' --device 0 

# **4. Evaluation**

- Note the checkpoints from training will be stored by default in runs/train/exp. Take the path of the latest checkpoint

We can evaluate the performance of our custom training using the provided evalution script.

Note we can adjust the below custom arguments. For details, see [the arguments accepted by detect.py](https://github.com/WongKinYiu/yolov7/blob/main/detect.py#L154).

## 4.1 F1 and Precision Recall Curve

In [None]:
from IPython.display import Image
display(Image(paths["DETECTOR_PATH"] / "runs/train/exp/F1_curve.png", width=400, height=400))
display(Image(paths["DETECTOR_PATH"] / "runs/train/exp/PR_curve.png", width=400, height=400))
display(Image(paths["DETECTOR_PATH"] / "runs/train/exp/confusion_matrix.png", width=500, height=500))

## 5.1.1 Run the below cell to evaluate on test images

In [None]:
# Run evaluation
!python detect.py --weights {PATHS["WEIGHTS_PATH"] / "epoch_054.pt"} --conf 0.1 --source {paths["DETECTOR_PATH"]/ "waste-segregation-3/test/images"}

## 5.1.2 Display Inference on Folder of Test Images

**Note** From the above output display copy the full path of folder where test images are stored

In [None]:
#display inference on ALL test images

import glob
from IPython.display import Image, display

i = 0
limit = 100# max images to print
for imageName in glob.glob(path["DETECTOR_PATH"] / '/runs/detect/exp1/*.jpg'):
    #Assuming JPG
    if i < limit:
      display(Image(filename=imageName))
      print("\n")
    i = i + 1

#display(Image("/content/gdrive/MyDrive/yolov7/runs/detect/exp3/52_jpg.rf.c3931652d0d6e62034543e92ec110c0b.jpg", width=400, height=400))

# **5.2 Now it's time to Infer on Custom Images**

## 5.2.1 Helper Code For Inference

In [None]:
import os
import sys
sys.path.append('/content/gdrive/MyDrive/yolov7')


import argparse
import time
from pathlib import Path
import cv2
import torch
import numpy as np
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
    scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel


def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = img.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better test mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return img, ratio, (dw, dh)

## 5.2.2 Configuration Parameters

Change the path of both **weights** and **yaml** file

**weights** will be in yolov7 main folder -> runs -> train and then select the appropriate weight

**yaml** yolov7 main folder -> Trash-5, there you will find yaml file

In [None]:
classes_to_filter = None  #You can give list of classes to filter by name, Be happy you don't have to put class number. ['train','person' ]


opt  = {
    
    "weights": "/content/gdrive/MyDrive/yolov7/runs/train/exp/weights/epoch_024.pt", # Path to weights file default weights are for nano model
    "yaml"   : "Trash-5/data.yaml",
    "img-size": 640, # default image size
    "conf-thres": 0.25, # confidence threshold for inference.
    "iou-thres" : 0.45, # NMS IoU threshold for inference.
    "device" : '0',  # device to run our model i.e. 0 or 0,1,2,3 or cpu
    "classes" : classes_to_filter  # list of classes to filter or None

}

# **5.3. Inference on Single Image**


In [None]:
%cd /content/gdrive/MyDrive/yolov7
!gdown https://drive.google.com/uc?id=1c96hId8WNsOASKHcAxsQeM4N-N2wuwy9
#This does not work in Safari Browser

In [None]:
source_image_path = '/content/gdrive/MyDrive/yolov7/trash.jpg'
#Change the Path Name to your file name.

In [None]:
# Give path of source image.
#%cd /content/gdrive/MyDrive/yolov7
#source_image_path = '/content/trash.png'

with torch.no_grad():
  weights, imgsz = opt['weights'], opt['img-size']
  set_logging()
  device = select_device(opt['device'])
  half = device.type != 'cpu'
  model = attempt_load(weights, map_location=device)  # load FP32 model
  stride = int(model.stride.max())  # model stride
  imgsz = check_img_size(imgsz, s=stride)  # check img_size
  if half:
    model.half()

  names = model.module.names if hasattr(model, 'module') else model.names
  colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
  if device.type != 'cpu':
    model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))

  img0 = cv2.imread(source_image_path)
  img = letterbox(img0, imgsz, stride=stride)[0]
  img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
  img = np.ascontiguousarray(img)
  img = torch.from_numpy(img).to(device)
  img = img.half() if half else img.float()  # uint8 to fp16/32
  img /= 255.0  # 0 - 255 to 0.0 - 1.0
  if img.ndimension() == 3:
    img = img.unsqueeze(0)

  # Inference
  t1 = time_synchronized()
  pred = model(img, augment= False)[0]

  # Apply NMS
  classes = None
  if opt['classes']:
    classes = []
    for class_name in opt['classes']:

      classes.append(opt['classes'].index(class_name))


  pred = non_max_suppression(pred, opt['conf-thres'], opt['iou-thres'], classes= classes, agnostic= False)
  t2 = time_synchronized()
  for i, det in enumerate(pred):
    s = ''
    s += '%gx%g ' % img.shape[2:]  # print string
    gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
    if len(det):
      det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()

      for c in det[:, -1].unique():
        n = (det[:, -1] == c).sum()  # detections per class
        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
    
      for *xyxy, conf, cls in reversed(det):

        label = f'{names[int(cls)]} {conf:.2f}'
        plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)



In [None]:
from google.colab.patches import cv2_imshow
cv2_imshow(img0)