# Train using OID for YOLOv7.

### By-Pravar Kochar

## Get the Data in YOLOv7 labeled format.

Download the dataset to train the model on, using a OIDv4 toolkit.

In [None]:
%cd /content
!git clone https://github.com/EscVM/OIDv4_ToolKit.git
# Install requirements.
%cd /content/OIDv4_ToolKit
!pip install -r requirements.txt

In [4]:
# Define classes to download (change the classes.txt file)
!echo -e 'Man' > /content/OIDv4_ToolKit/classes.txt
!echo -e 'Woman' >> /content/OIDv4_ToolKit/classes.txt

Start the download with conditions:
*   Using classes as defined in classes.txt
*   CSV_type: train
*   Allow multiclass
*   Photo limit: 500



In [None]:
# Download the files.
!python main.py downloader -y --classes /content/OIDv4_ToolKit/classes.txt --type_csv train --multiclass 1 --limit 500

The OID format is different than YOLOv7 format, create the labels txt file using the csv file provided.

Pre-process: Reading, filtering, and preparing the data from the CSV file.

In [6]:
# Create yolo txt labels from csv file.
import pandas as pd
import os

classes_data = pd.read_csv('/content/OIDv4_ToolKit/OID/csv_folder/class-descriptions-boxable.csv', header=None)

# Get the classes.
cl_fl = open('/content/OIDv4_ToolKit/classes.txt', 'r')
dt = cl_fl.read()
cl_fl.close()

classes = dt.split("\n")[:-1]

# Get the Class string ID.
class_string = []
for i in classes:
  req_classes = classes_data.loc[classes_data[1] == i]
  string = req_classes.iloc[0][0]
  class_string.append(string)

# Get columns from the annotation csv file.
annotation_data = pd.read_csv('/content/OIDv4_ToolKit/OID/csv_folder/train-annotations-bbox.csv',
                              usecols=['ImageID', 'LabelName',
                                        'XMin', 'XMax',
                                        'YMin', 'YMax'])

# Filter the classes.
filtered_class_data = annotation_data.loc[annotation_data['LabelName'].isin(class_string)].copy()

# Add columns for for YOLO format.
filtered_class_data['classNumber'] = ''
filtered_class_data['center x'] = ''
filtered_class_data['center y'] = ''
filtered_class_data['width'] = ''
filtered_class_data['height'] = ''

# Assign a class
for i in range(len(class_string)):
  filtered_class_data.loc[filtered_class_data['LabelName'] == class_string[i], 'classNumber'] = i

# Calc x-center, y-center, width, height.
filtered_class_data['center x'] = (filtered_class_data['XMax'] + filtered_class_data['XMin']) / 2
filtered_class_data['center y'] = (filtered_class_data['YMax'] + filtered_class_data['YMin']) / 2
filtered_class_data['width'] = filtered_class_data['XMax'] - filtered_class_data['XMin']
filtered_class_data['height'] = filtered_class_data['YMax'] - filtered_class_data['YMin']

YOLO_values = filtered_class_data.loc[:, ['ImageID', 'classNumber', 'center x', 'center y', 'width', 'height']].copy()

Create the YOLO format label files.

In [9]:
# Change current dir to images.
img_path = '/content/OIDv4_ToolKit/OID/Dataset/train/Man_Woman'
os.chdir(img_path)

# loop through all files in dir.
for curr_dir, dirs, files in os.walk('.'):
  for f in files:
    if f.endswith('.jpg'):
      img_title = f[:-4]  # Get name of img.
      YOLO_file = YOLO_values.loc[YOLO_values['ImageID'] == img_title]

      # Create copy.
      df = YOLO_file.loc[:, ['classNumber', 'center x', 'center y', 'width', 'height']].copy()

      # Path to save on.
      save_path = img_path + '/' + img_title + '.txt'

      # Generate file.
      df.to_csv(save_path, header=False, index=False, sep=' ')

# Remove the old labels to clear up space.
!rm -r /content/OIDv4_ToolKit/OID/Dataset/train/Man_Woman/Label

In [None]:
# Verification to check if labels are successfully created.
!echo 'All files: ' | ls -l /content/OIDv4_ToolKit/OID/Dataset/train/Man_Woman/*.* | grep -v ^l | wc -l
!echo 'JPG files: ' | ls -l /content/OIDv4_ToolKit/OID/Dataset/train/Man_Woman/*.jpg | grep -v ^l | wc -l
!echo 'TXT files: ' | ls -l /content/OIDv4_ToolKit/OID/Dataset/train/Man_Woman/*.txt | grep -v ^l | wc -l

After correct labels are created, split data to train, val, test data. (Move the files using split-folders library)

In [None]:
# To split data in train and test.
!pip install split-folders

In [None]:
# Split data.
!splitfolders --output /content/gender_data --move --group_prefix 2 --ratio .8 .1 .1 -- /content/OIDv4_ToolKit/OID/Dataset/train

Split the JPG and TXT files to be in images and labels under the respective type of data.

In [12]:
# train folder correction.
!cd /content/gender_data/train
!mkdir /content/gender_data/train/images
!mkdir /content/gender_data/train/labels
!mv /content/gender_data/train/Man_Woman/*.jpg /content/gender_data/train/images
!mv /content/gender_data/train/Man_Woman/*.txt /content/gender_data/train/labels
!rm -r /content/gender_data/train/Man_Woman

# test folder correction.
!cd /content/gender_data/test
!mkdir /content/gender_data/test/images
!mkdir /content/gender_data/test/labels
!mv /content/gender_data/test/Man_Woman/*.jpg /content/gender_data/test/images
!mv /content/gender_data/test/Man_Woman/*.txt /content/gender_data/test/labels
!rm -r /content/gender_data/test/Man_Woman

# validation folder correction.
!mv /content/gender_data/val /content/gender_data/valid
!cd /content/gender_data/valid
!mkdir /content/gender_data/valid/images
!mkdir /content/gender_data/valid/labels
!mv /content/gender_data/valid/Man_Woman/*.jpg /content/gender_data/valid/images
!mv /content/gender_data/valid/Man_Woman/*.txt /content/gender_data/valid/labels
!rm -r /content/gender_data/valid/Man_Woman

Generate the YAML file to direct the YOLOv7 trining file.

In [13]:
# Make yaml file.
!cd /content/gender_data
!touch /content/gender_data/gender_yolo.yaml
!echo -e 'names:' > /content/gender_data/gender_yolo.yaml
!echo -e '- Man' >> /content/gender_data/gender_yolo.yaml
!echo -e '- Woman' >> /content/gender_data/gender_yolo.yaml
!echo -e 'nc: 2' >> /content/gender_data/gender_yolo.yaml
!echo -e 'train: /content/gender_data/train/images' >> /content/gender_data/gender_yolo.yaml
!echo -e 'val: /content/gender_data/valid/images' >> /content/gender_data/gender_yolo.yaml
!echo -e 'test: /content/gender_data/test/images' >> /content/gender_data/gender_yolo.yaml

## Get the YOLOv7 repository preped up for training on OID dataset.

Clone the YOLOv7 repo to host and install the requirements for the model.

In [None]:
!git clone https://github.com/WongKinYiu/yolov7.git
%cd yolov7
!pip install -r requirements.txt

In the cloned repository, download the weigths file of desire to run the respective YOLOv7 model. (Here the custom trained YOLOv7 is chosen as the model to run)

In [None]:
%cd /content/yolov7/
!wget "https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-tiny.pt"

Train the model with the given dataset with the following parameters.
*   Batch size: 82
*   Epochs: 200
*   CFG: /content/yolov7/cfg/training/yolov7-tiny.yaml
*   Using the downloaded weights, data, and the train.py.

In [14]:
%cd /content/yolov7/
!python train.py --batch 82 --cfg /content/yolov7/cfg/training/yolov7-tiny.yaml --epochs 200 --data /content/gender_data/gender_yolo.yaml --weights /content/yolov7/yolov7-tiny.pt

/content/yolov7
2023-08-20 20:07:45.609192: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
YOLOR 🚀 v0.1-126-g84932d7 torch 2.0.1+cu118 CUDA:0 (Tesla T4, 15101.8125MB)

Namespace(weights='/content/yolov7/yolov7-tiny.pt', cfg='/content/yolov7/cfg/training/yolov7-tiny.yaml', data='/content/gender_data/gender_yolo.yaml', hyp='data/hyp.scratch.p5.yaml', epochs=2, batch_size=64, img_size=[640, 640], rect=False, resume=False, nosave=False, notest=False, noautoanchor=False, evolve=False, bucket='', cache_images=False, image_weights=False, device='', multi_scale=False, single_cls=False, adam=False, sync_bn=False, local_rank=-1, workers=8, project='runs/train', entity=None, name='exp', exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_datas

## After successfull training, test the best weights.

In [None]:
# Testing the model (Check the exp# to be the correct one).
!python test.py --data /content/gender_data/gender_yolo.yaml --img 640 --batch 64 --conf 0.01 --weights /content/yolov7/runs/train/exp/weights/best.pt --name OID_yolo_test

Save the best.pt and every other relevent file to drive.

In [None]:
!cp /content/yolov7/runs/test/OID_yolo_test /content/drive/

In [None]:
# If want to run detection on random 10 samples.
%cd /content/yolov7
!python detect.py --weights /content/yolov7/custom_face_yolo.pt --conf 0.4 --source /content/yolov7/data/images

import glob
from IPython.display import Image, display

i = 0
limit = 10
# Check if the detect run to be printed is exp or exp1/2/...
for imageName in glob.glob('/content/yolov7/runs/detect/exp/*.jpg'):
  if i < limit:
    display(Image(filename=imageName))
    print("-"*15)
  i = i + 1