<a href="https://colab.research.google.com/github/4rn3/ai_howest_project-/blob/main/yolov7_bloodcell_detection_fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import os, sys, random, shutil
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from shutil import copyfile
import pandas as pd
from sklearn import preprocessing, model_selection
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import patches
import numpy as np
print(f"Python version: {sys.version}, {sys.version_info} ")
print(f"Pytorch version: {torch.__version__} ")

### Importing dataset

In [None]:
!git clone 'https://github.com/Shenggan/BCCD_Dataset.git'

### data preprocessing

In [None]:
annotations = sorted(glob('/content/BCCD_Dataset/BCCD/Annotations/*.xml'))

df = []
cnt = 0
for file in annotations:
  prev_filename = file.split('/')[-1].split('.')[0] + '.jpg'
  filename = str(cnt) + '.jpg'
  row = []
  parsedXML = ET.parse(file)
  for node in parsedXML.getroot().iter('object'):
    blood_cells = node.find('name').text
    xmin = int(node.find('bndbox/xmin').text)
    xmax = int(node.find('bndbox/xmax').text)
    ymin = int(node.find('bndbox/ymin').text)
    ymax = int(node.find('bndbox/ymax').text)

    row = [prev_filename, filename, blood_cells, xmin, xmax, ymin, ymax]
    df.append(row)
  cnt += 1

data = pd.DataFrame(df, columns=['prev_filename', 'filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])
data[['prev_filename','filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('/content/blood_cell_detection.csv', index=False)
data.head(10)

In [None]:
img_width = 640
img_height = 480

def width(df):
  return int(df.xmax - df.xmin)
def height(df):
  return int(df.ymax - df.ymin)
def x_center(df):
  return int(df.xmin + (df.width/2))
def y_center(df):
  return int(df.ymin + (df.height/2))
def w_norm(df):
  return df/img_width
def h_norm(df):
  return df/img_height

df = pd.read_csv('/content/blood_cell_detection.csv')

le = preprocessing.LabelEncoder()
le.fit(df['cell_type'])
print(le.classes_)
labels = le.transform(df['cell_type'])
df['labels'] = labels

df['width'] = df.apply(width, axis=1)
df['height'] = df.apply(height, axis=1)

df['x_center'] = df.apply(x_center, axis=1)
df['y_center'] = df.apply(y_center, axis=1)

df['x_center_norm'] = df['x_center'].apply(w_norm)
df['width_norm'] = df['width'].apply(w_norm)

df['y_center_norm'] = df['y_center'].apply(h_norm)
df['height_norm'] = df['height'].apply(h_norm)

df.head(30)

In [None]:
df_train, df_valid = model_selection.train_test_split(df, test_size=0.1, random_state=13, shuffle=True)
print(df_train.shape, df_valid.shape)

os.mkdir('/content/bc/')
os.mkdir('/content/bc/images/')
os.mkdir('/content/bc/images/train/')
os.mkdir('/content/bc/images/valid/')

os.mkdir('/content/bc/labels/')
os.mkdir('/content/bc/labels/train/')
os.mkdir('/content/bc/labels/valid/')

def segregate_data(df, img_path, label_path, train_img_path, train_label_path):
  filenames = []
  for filename in df.filename:
    filenames.append(filename)
  filenames = set(filenames)
  
  for filename in filenames:
    yolo_list = []

    for _,row in df[df.filename == filename].iterrows():
      yolo_list.append([row.labels, row.x_center_norm, row.y_center_norm, row.width_norm, row.height_norm])

    yolo_list = np.array(yolo_list)
    txt_filename = os.path.join(train_label_path,str(row.prev_filename.split('.')[0])+".txt")
    # Save the .img & .txt files to the corresponding train and validation folders
    np.savetxt(txt_filename, yolo_list, fmt=["%d", "%f", "%f", "%f", "%f"])
    shutil.copyfile(os.path.join(img_path,row.prev_filename), os.path.join(train_img_path,row.prev_filename))
 
## Apply function ## 
src_img_path = "/content/BCCD_Dataset/BCCD/JPEGImages/"
src_label_path = "/content/BCCD_Dataset/BCCD/Annotations/"

train_img_path = "/content/bc/images/train"
train_label_path = "/content/bc/labels/train"

valid_img_path = "/content/bc/images/valid"
valid_label_path = "/content/bc/labels/valid"

segregate_data(df_train, src_img_path, src_label_path, train_img_path, train_label_path)
segregate_data(df_valid, src_img_path, src_label_path, valid_img_path, valid_label_path)

print("No. of Training images", len(os.listdir('/content/bc/images/train')))
print("No. of Training labels", len(os.listdir('/content/bc/labels/train')))

print("No. of valid images", len(os.listdir('/content/bc/images/valid')))
print("No. of valid labels", len(os.listdir('/content/bc/labels/valid')))

### clone yolov7

In [None]:
!# Download YOLOv7 code
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7

### Download weights

In [None]:
!# Download trained weights
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt

### Install requirements

In [None]:
!pip install -qr '/content/yolov7/requirements.txt'

### make a custom yaml file
<p>The file should have the following content:</p>

train: /content/bc/images/train</br>
val: /content/bc/images/valid</br>
</br>
nc: 3</br> 
names: ['Platelets', 'RBC', 'WBC']

In [None]:
!echo -e "train: /content/bc/images/train\nval: /content/bc/images/valid\n\nnc: 3\nnames: ['Platelets', 'RBC', 'WBC']" > ./data/bc.yaml

### Get the custom model files

In [None]:
!wget https://raw.githubusercontent.com/4rn3/yolov7_yaml_file/main/yolov7-bloodcells.yaml -O ./cfg/training/yolo_bloodcell.yaml
!wget https://raw.githubusercontent.com/4rn3/yolov7_yaml_file/main/yolov7-tiny-bloodcells.yaml -O ./cfg/training/yolo_bloodcell_tiny.yaml

### Check if GPU is recognised

In [None]:
!nvidia-smi

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs/

### Training yolov7

In [None]:
#yolov7-tiny batch-size 8 epochs 100
#!python train.py --workers 8 --device 0 --batch-size 8 --data data/bc.yaml --img 640 480 --cfg cfg/training/yolo_bloodcell_tiny.yaml --weights 'yolov7.pt' --name yolov7-bc-tiny-100 --hyp data/hyp.scratch.custom.yaml --epochs 100

In [None]:
#yolov7-tiny batch-size 8 epochs 300
#!python train.py --workers 8 --device 0 --batch-size 8 --data data/bc.yaml --img 640 480 --cfg cfg/training/yolo_bloodcell_tiny.yaml --weights 'yolov7.pt' --name yolov7-bc-tiny-300 --hyp data/hyp.scratch.custom.yaml

In [None]:
#yolov7-tiny batch-size 16 epochs 300
#!python train.py --workers 8 --device 0 --batch-size 16 --data data/bc.yaml --img 640 480 --cfg cfg/training/yolo_bloodcell_tiny.yaml --weights 'yolov7.pt' --name yolov7-bc-tiny-300-16 --hyp data/hyp.scratch.custom.yaml

In [None]:
#yolov7-normal batch-size 8 epochs 100
#!python train.py --workers 8 --device 0 --batch-size 8 --data data/bc.yaml --img 640 480 --cfg cfg/training/yolo_bloodcell.yaml --weights 'yolov7.pt' --name yolov7-bc-normal-100 --hyp data/hyp.scratch.custom.yaml --epochs 100

In [None]:
#yolov7-normal batch-size 8 epochs 300
#!python train.py --workers 8 --device 0 --batch-size 8 --data data/bc.yaml --img 640 480 --cfg cfg/training/yolo_bloodcell.yaml --weights 'yolov7.pt' --name yolov7-bc-normal-300 --hyp data/hyp.scratch.custom.yaml

In [None]:
#yolov7-normal batch-size 32 epochs 300
!python train.py --workers 8 --device 0 --batch-size 32 --data data/bc.yaml --img 640 480 --cfg cfg/training/yolo_bloodcell.yaml --weights 'yolov7.pt' --name yolov7-bc-normal-300-32 --hyp data/hyp.scratch.custom.yaml

In [None]:
#yolov7-extra batch-size 32 epochs 300
#!python train.py --workers 8 --device 0 --batch-size 32 --data data/bc.yaml --img 640 480 --cfg cfg/training/yolo_bloodcell.yaml --weights 'yolov7.pt' --name yolov7-bc-extra-300-32 --hyp data/hyp.scratch.custom.yaml