# How to Train YOLOv7 on a Custom Dataset

This tutorial is based on the [YOLOv7 repository](https://github.com/WongKinYiu/yolov7) by WongKinYiu. This notebook shows training on **your own custom objects**. Many thanks to WongKinYiu and AlexeyAB for putting this repository together.

### **Steps Covered in this Tutorial**
To train our detector we take the following steps:
* Install YOLOv7 dependencies
* Load Trained model
* To detect Real video

# Step 1 - Install Dependencies Library of Yolov7 

_(Remember to choose GPU in Runtime if not already selected. Runtime --> Change Runtime Type --> Hardware accelerator --> GPU)_


In [None]:

# Download YOLOv7 repository and install requirements
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7
!pip install -r requirements.txt
#!pip install wandb
!pip install torchvision==0.11.3+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html
import torch
from IPython.display import Image, clear_output  # to display images
#from utils.google_utils import gdrive_download  # to download models/datasets
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt 
clear_output()
print('Setup completed')

Setup completed


#step 2 - Training data access from shared file

In [None]:
# Commented out IPython magic to ensure Python compatibility.
from google.colab import drive
# %cd /content/RSDD2022
!mkdir /content/yolov7/runs
!mkdir /content/yolov7/runs/exp
drive.mount('/content/drive')
#!cp -av "/content/drive/MyDrive/DEVRDD/GRSDD2022/data.yaml" -d "/content/yolov7/"
#!unzip -u "/content/drive/MyDrive/DEVRDD/GRSDD2022/Datasources/AdditionalSource.zip" -d "/content/RSDD2022/dataset/"
#!unzip -u "/content/drive/MyDrive/DEVRDD/TNRSDD2023.zip" -d "/content/yolov7/dataset/Data"
!cp -av "/content/drive/MyDrive/DEVRDD/TNRSDD2023/ImageAugmentRSDD2023_trained_model.pt" -d "/content/yolov7/runs/exp"
!unzip -u "/content/drive/MyDrive/DEVRDD/TNRSDD2023/TNRSDD2023_model.zip" -d "/content/yolov7/runs/exp"

drive.flush_and_unmount()
clear_output()

# step 3 - create folder structure, Parameter changes
## the dataset slicing 80% for train and 20% for test

In [None]:
"""
## Split the Data's 80, 20  -- Training
"""
# Commented out IPython magic to ensure Python compatibility.
import os,glob
%cd /content/yolov7/dataset
#!mkdir dataset
!mkdir train
!mkdir test
!mkdir train/images
!mkdir train/labels
!mkdir test/images
!mkdir test/labels


base_path = '/content/yolov7/dataset/Data'

No_of_files = len(glob.glob(base_path+'/*.txt'))

# print('total Number of files :',No_of_files)
# print('80% percent :',round((No_of_files/100)*97))
# print('20% percent :',round((No_of_files/100)*3))

part1=round((No_of_files/100)*80)
part2=round((No_of_files/100)*20)
#part3=round((No_of_files/100)*5)


fcntr=1
file_list = [filename for filename in glob.glob(base_path+'/*.txt')]
for file in file_list:
  if len(glob.glob(file.replace('.txt','*'))) <2 :
    print ('Jpeg file not found',file)
    break
  if fcntr<= part1 :   #  80% files copied in Train files
    try:
      #shutil.copyfile( file, '/content/RSDD2022/dataset/train/labels')
      os.system('cp '+file+ ' /content/yolov7/dataset/train/labels/')
      file=file.replace('.txt','.jpg')
      #shutil.copyfile( file, '/content/RSDD2022/dataset/train/images' )
      os.system('cp '+file+ ' /content/yolov7/dataset/train/images/')
    except IOError:
      print('file not found')
  else:
    try:
      os.system('cp '+file+ ' /conten/yolov7/dataset/test/labels/')
      file=file.replace('.txt','.jpg')
      os.system('cp '+file+ ' /content/yolov7/dataset/test/images/')
    except IOError:
      print('file not found  ' + file)
  fcntr=fcntr+1

print('Total No. of Train Image: ',len(glob.glob('/content/yolov7/dataset/train/images/*.jpg')))
print('Total No. of Test Image: ',len(glob.glob('/content/yolov7/dataset/test/images/*.jpg')))


/content/yolov7/dataset
Total No. of Train Image:  1507
Total No. of Test Image:  377


## Step 4 - Optional to draw graph for input data

In [None]:
"""## Model Training -- Training"""

# Commented out IPython magic to ensure Python compatibility.
import collections
import os
import glob
import matplotlib.pyplot as plt
import matplotlib as matplot
import seaborn as sns


base_path = '/content/yolov7/dataset/Data'
damageTypes=['D00','D10','D20','D40']

# govs corresponds to municipality name.
#govs = ['Sunny', 'Raining', 'Winter', 'Darkness']

# the number of each class labels.
#count_dict = collections.Counter(cls_names)
cls_count = []
total_images=0
cl01=cl02=cl03=cl04=0
file_list = [filename for filename in glob.glob(base_path+'/*.txt')]
for file in file_list:
  total_images = total_images + 1
  f = open(file,'r')
  data = f.read()
  cl01= cl01+data.count("0 ")
  cl02= cl02+data.count("1 ")
  cl03= cl03+data.count("2 ")
  cl04= cl04+data.count("3 ")
  f.close()
  
  cls_count= [cl01,cl02,cl03,cl04]

# function to add value labels
def addlabels(x,y):
    for i in range(len(x)):
        plt.text(i, y[i], y[i], ha = 'center')
  
if __name__ == '__main__':
    
    # creating data on which bar chart will be plot
    x = damageTypes
    y = cls_count
      
    # setting figure size by using figure() function 
    plt.figure(figsize = (10, 5))
    # making the bar chart on the data
    plt.bar(x, y,color=['black', 'red', 'green', 'blue'])
    # calling the function to add value labels
    addlabels(x, y)
      
    # giving title to the plot
    plt.title("Road Surface Damage Classes - Training Data")
      
    # giving X and Y labels
    plt.xlabel("Object Class")
    plt.ylabel("Class counts - Training")
      
    # visualizing the plot
    plt.show()

# Step 5 - to create training model 

In [None]:
# %cd /content/yolov7
# !python train.py  --weights /content/yolov7/yolov7_training.pt --data "data/custom.yaml" --workers 4 --batch-size 8 --img 416 --cfg cfg/training/yolov7x.yaml --name RSDD2023_Jan_Normal --hyp data/hyp.scratch.p5.yaml --epochs 50

# #!python train.py --workers 8 --device 0 --batch-size 8 --data data/custom.yaml --img 1280 720 --cfg cfg/training/yolov7.yaml --weights yolov7_training.pt --name TNRSDD2023 --hyp data/hyp.scratch.custom.yaml --epochs 50




# Step Real time Evaluation

We can evaluate the performance of our custom training using the provided evalution script.

In [None]:
''' Training model download from GDrive'''

from google.colab import drive
%cd /content/yolov7
# !mkdir /content/yolov7/runs
# !mkdir /content/yolov7/runs/exp
drive.mount('/content/drive')
!cp -av "/content/drive/MyDrive/DEVRDD/GRSDD2022/data.yaml" -d "/content/yolov7/"
#!unzip -u "/content/drive/MyDrive/DEVRDD/GRSDD2022/Datasources/AdditionalSource.zip" -d "/content/RSDD2022/dataset/"
#!unzip -u "/content/drive/MyDrive/DEVRDD/TNRSDD2023.zip" -d "/content/yolov7/dataset/Data"
!unzip -u "/content/drive/MyDrive/DEVRDD/TNRSDD2023/TNRSDD2023_model.zip" -d "/content/yolov7/runs/"
drive.flush_and_unmount()
clear_output()

In [None]:
import shutil,glob
shutil.rmtree('/content/yolov7/runs')
#shutil.rmtree('/content/yolov7/dataset/train')
#shutil.rmtree('/content/RSDD2022/dataset/Data')
#shutil.rmtree('/content/RSDD2022/runs')

In [None]:
# code for displaying multiple images in one figure

# #import libraries
# import cv2
# from matplotlib import pyplot as plt

# # create figure
# fig = plt.figure(figsize=(10, 7))

# # setting values to rows and column variables
# rows = 2
# columns = 3

# # reading images
# Image1 = cv2.imread('/content/yolov7/dataset/train/images/00001_Crack038.jpg')
# Image2 = cv2.imread('/content/yolov7/dataset/train/images/00001_Damage034.jpg')
# Image3 = cv2.imread('/content/yolov7/dataset/train/images/00001_Long023.jpg')
# Image4 = cv2.imread('/content/yolov7/dataset/train/images/00001_Long039.jpg')
# Image5 = cv2.imread('/content/yolov7/dataset/train/images/IRSDD_23042022_114.jpg')
# Image6 = cv2.imread('/content/yolov7/dataset/train/images/00001_Damage019.jpg')


# # Adds a subplot at the 1st position
# fig.add_subplot(rows, columns, 1)

# # showing image
# plt.imshow(Image1)
# plt.axis('off')
# plt.title("First")

# # Adds a subplot at the 2nd position
# fig.add_subplot(rows, columns, 2)

# # showing image
# plt.imshow(Image2)
# plt.axis('off')
# plt.title("Second")

# # Adds a subplot at the 3rd position
# fig.add_subplot(rows, columns, 3)

# # showing image
# plt.imshow(Image3)
# plt.axis('off')
# plt.title("Third")

# # Adds a subplot at the 4th position
# fig.add_subplot(rows, columns, 4)

# # showing image
# plt.imshow(Image4)
# plt.axis('off')
# plt.title("Fourth")
# fig.add_subplot(rows, columns, 5)
# # showing image
# plt.imshow(Image5)
# plt.axis('off')
# plt.title("Fifth")
# fig.add_subplot(rows, columns, 6)
# # showing image
# plt.imshow(Image6)
# plt.axis('off')
# plt.title("Sixth")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# OPTIONAL: Deployment files moves to local disck
To deploy, you'll need to export your weights and save them to use later.

In [None]:
# ''' 
# Trained folder zip it and download
# '''
# from google.colab import files
# %cd /content
# #!zip -r /content/RSDD2023_14.zip /content/yolov7/runs/train/RSDD2023_15/weights/best.pt
# !files.download('/content/RSDD2023_14.zip')

In [None]:
# # example upload: if prediction is below a given confidence threshold, upload it 

# confidence_interval = [10,70]                                   # [lower_bound_percent, upper_bound_percent]

# for prediction in predictions:                                  # predictions list to loop through
#   if(prediction['confidence'] * 100 >= confidence_interval[0] and 
#           prediction['confidence'] * 100 <= confidence_interval[1]):
        
#           # upload on success!
#           print(' >> image uploaded!')
#           upload_project.upload(image, num_retry_uploads=3)     # upload image in question

# Next steps

Congratulations, you've trained a custom YOLOv7 model! Next, start thinking about deploying and [building an MLOps pipeline](https://docs.roboflow.com) so your model gets better the more data it sees in the wild.