In [None]:
import pandas as pd 
import numpy as np 
import os
import random

In [None]:
from google.colab import drive
drive.mount('/content/drive')
os.chdir("/content/drive/MyDrive/")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# check whether GPU is provided
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_Jul_22_19:09:09_PDT_2020
Cuda compilation tools, release 11.0, V11.0.221
Build cuda_11.0_bu.TC445_37.28845127_0


#Data Preparation Class

In [None]:
#class of important functions
#Yolo format rules 
''' 
x_center=( (x_max-x_min)/2 + x_min)/ image_with 
y_center=( (y_max-y_min)/2 + y_min)/ image_with
width   =(x_max-x_min)/image_width
height	=(y_max-y_min)/image_height'''

class Preparator:

  def __init__(self,csv_path):
    self.csv=pd.read_csv(csv_path)
    self.number_of_classes=len(self.csv.class_name.unique())-1 #-1 here because we removed normal class 

  def drop_normal_images(self):
    print('befor dropping size = ',len(self.csv))
    self.csv=self.csv.dropna()
    print('after dropping dropping size = ',len(self.csv))
    return self.csv


  def write_into_file(self,file_path='',file_name='',lst=[]):

    #open a classes.names file to write class names into it 
    outF = open(file_path + file_name, "w")

    #write class names line by line 
    for line in lst:
      outF.write(line)
      outF.write("\n")
    outF.close()


  def create_class_names(self,output_path='/content/drive/MyDrive/',file_name=''):

    #open csv file and extract class names except(no finding)
    self.class_names=sorted(self.csv.class_name.unique())
    del self.class_names[self.class_names.index('No finding')]

    #write the list into file line by line
    self.write_into_file(output_path,file_name,self.class_names)


  def create_train_test_txt(self,train_path='/content/drive/MyDrive/',output_path='/content/drive/MyDrive/',exten='.jpg'):

    self.image_ids= train_path + self.csv.image_id.unique()+exten
    self.imade_ids=self.image_ids.tolist()

    #shuffle our dataset
    random.shuffle(self.image_ids)
    
    #write into train file 80%
    print(int(len(self.image_ids)*(80/100)))
    self.write_into_file(output_path,'train.txt',self.image_ids[0: int(len(self.image_ids)*(80/100)) ])

    #write into test file 20%
    print(len(self.image_ids[int(len(self.image_ids)*(80/100)):: ]))
    self.write_into_file(output_path,'test.txt',self.image_ids[int(len(self.image_ids)*(80/100)):: ])
    

  def convert_to_yolo_format(self,class_id,x_min,y_min,x_max,y_max,height=1,width=1):
    # note that all of these numbers are normalized 
    x_center =( (x_max-x_min)/2 + x_min)/ width 
    y_center =( (y_max-y_min)/2 + y_min)/ height
    rec_width    =(x_max-x_min)/ width
    rec_height	 =(y_max-y_min)/ height
    #print(class_id,"%.6f"%x_center,"%.6f"%y_center,"%.6f"%rec_width,"%.6f"%rec_height) to visualize the output 
    return class_id,"%.6f"%x_center,"%.6f"%y_center,"%.6f"%rec_width,"%.6f"%rec_height


  def create_object_file(self,output_path='',train_path='',test_path='',names_path='',backup_path=''):
    classes_number=self.number_of_classes
    file_name='obj.data'
    outF = open(output_path + file_name, "w")
    outF.write('classes= '+str(classes_number)+'\n')
    outF.write('train= '+train_path+'\n')
    outF.write('valid= '+test_path+'\n')
    outF.write('names= '+names_path+'\n')
    outF.write('backup= '+backup_path+'\n')


  def create_txt_for_each_image(self, path_for_height_width_file='',path_for_dataset=''):

    hw_csv = pd.read_csv( path_for_height_width_file )
    image_ids=self.csv.image_id.unique().tolist()

    for image in image_ids:

      #get height and width for this image
      #.values to convert it to numpy array 
      #.reshape(-1) to flatting 2d array to be 1d array 
      #to list to convert numpy array to list to be able ti use it 
      h,w= hw_csv.query('image_id==@image')[['height','width']].values.reshape(-1).tolist()
      
      #get all images with same image id 
      df =self.csv.query("image_id==@image")

      #group bound boxes for each class then get single bound box by taking the mean of them 
      bboxes_df = df.groupby('class_id')[['class_id','x_min','y_min','x_max','y_max']].mean().round()
      print(bboxes_df)

      #open text file to write to it
      outF = open(path_for_dataset + image +'.txt', "w")

      for i in range(len(bboxes_df)):
        class_name,x_center,y_center,width,height = self.convert_to_yolo_format(bboxes_df.iloc[i,0],
                                                                        bboxes_df.iloc[i,1],
                                                                        bboxes_df.iloc[i,2],
                                                                        bboxes_df.iloc[i,3],
                                                                        bboxes_df.iloc[i,4], h,w)
        line=str(class_name)+" "+str(x_center)+" "+str(y_center)+" "+str(width)+" "+str(height)+"\n"
        outF.write(line)

      outF.close()
  
  

#preparation

In [None]:

#prep=Preparator('/content/drive/MyDrive/Detection/train.csv')
#prep.create_class_names('/content/drive/MyDrive/Detection/','classes.names')
#prep.drop_normal_images()
#prep.create_train_test_txt('/content/drive/MyDrive/Detection/darknet_for_colab/data/chestxray256/','/content/drive/MyDrive/Detection/darknet_for_colab/data/')
#prep.create_txt_for_each_image("/content/drive/MyDrive/Detection/image_height_width.csv","/content/drive/MyDrive/dataset/chest xray256/")
#prep.create_object_file('/content/drive/MyDrive/Detection/darknet_for_colab/data/','/content/drive/MyDrive/Detection/darknet_for_colab/data/train.txt','/content/drive/MyDrive/Detection/darknet_for_colab/data/test.txt','/content/drive/MyDrive/Detection/darknet_for_colab/data/classes.names','/content/drive/MyDrive/Detection/darknet_for_colab/backup')

In [None]:
dataset=os.listdir('/content/drive/MyDrive/dataset/chest xray256/')
csv=pd.read_csv('/content/drive/MyDrive/Detection/train.csv').image_id.unique()+'.jpg'
csv=csv.tolist()counter=0
for image_id in dataset:
  if image_id not in csv: 
    if '.txt' not in image_id:
      counter+=1
      print('remove this image_id ',image_id)
      #os.remove('/content/drive/MyDrive/dataset/chest xray256/'+image_id)
print(counter)

0


#Setup Darknet Environment


In [None]:
assert os.getcwd()=='/content/drive/My Drive/Detection','Directory should be "/content/drive/My Drive/Detection" instead of {}'.format(os.getcwd())
!git clone https://github.com/quangnhat185/darknet_for_colab.git
%cd darknet_for_colab
!make
!chmod +x ./darknet.py

Cloning into 'darknet_for_colab'...
remote: Enumerating objects: 1083, done.[K
remote: Total 1083 (delta 0), reused 0 (delta 0), pack-reused 1083[K
Receiving objects: 100% (1083/1083), 5.16 MiB | 6.99 MiB/s, done.
Resolving deltas: 100% (233/233), done.
Checking out files: 100% (977/977), done.
/content/drive/My Drive/Detection/darknet_for_colab


# Download yolov4 pre-trained weights

In [None]:
assert os.getcwd()=='/content/drive/My Drive/Detection/darknet_for_colab', 'Directory should be "/content/drive/My Drive/Detection/darknet_for_colab" instead of "{}"'.format(os.getcwd())
!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137

--2021-07-08 11:41:31--  https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137
Resolving github.com (github.com)... 140.82.112.4
Connecting to github.com (github.com)|140.82.112.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github-releases.githubusercontent.com/75388965/48bfe500-889d-11ea-819e-c4d182fcf0db?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20210708%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20210708T114132Z&X-Amz-Expires=300&X-Amz-Signature=2afd72637eea0aeaa773655d284cf03f271ed22efa2b2c8eb2f9f2228e5c323e&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=75388965&response-content-disposition=attachment%3B%20filename%3Dyolov4.conv.137&response-content-type=application%2Foctet-stream [following]
--2021-07-08 11:41:32--  https://github-releases.githubusercontent.com/75388965/48bfe500-889d-11ea-819e-c4d182fcf0db?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIW

#chestX-ray dataset (yolo format)

In [None]:
#%cd data 
assert os.getcwd()=='//content/drive/My Drive/Detection/darknet_for_colab/data', 'Directory should be "//content/drive/My Drive/Detection/darknet_for_colab/data" instead of "{}"'.format(os.getcwd())
'''# download custom data of common traffic signs
!wget --no-check-certificate "https://onedrive.live.com/download?cid=A86CBC7F31A1C06B&resid=A86CBC7F31A1C06B%21121&authkey=AMUUk0Np4tqH3n4" -O ts.zip
!unzip ts.zip
!rm -f ts.zip
!ls
%cd ..'''

'/content/drive/My Drive/Detection/darknet_for_colab/data'

In [None]:
#make a copy of folder in google drive
#os.chdir('path/for/the/folder')
#%cp -av  folder_name copy_folder_name

'/content/drive/My Drive/Detection/darknet_for_colab'

# Modify yolov4 architecture
**Double click on file `yolov4_config.py` to modify the hyperpameters directly from Colab environment**

E.g: I will train my dataset with these parameters:
 - classes= 14, 
 - max_batches=8000
 - batch=64
 - subdivisions=16
 - width=256
 - height=256
 - ...

In [None]:
#os.chdir('/content/drive/MyDrive/Detection/darknet_for_colab/')
#assert os.getcwd()=='/content/drive/MyDrive/Detection/darknet_for_colab', 'Directory should be "/content/drive/MyDrive/Detection/darknet_for_colab" instead of "{}"'.format(os.getcwd())

# Run python script to create our customize yolov4_custom_train.cfg 
# and yolov4_custom_tes.cfg in folder /cfg
!python yolov4_setup.py

[INFO] Generating yolov4_custom_train.cfg successfully...
[INFO] Generating yolov4_custom_test.cfg successfully...


#Create Symbolic Link In Our Drive

In [None]:
os.chdir('/content/drive/My Drive/Detection/darknet_for_colab')
assert os.getcwd()=='/content/drive/My Drive/Detection/darknet_for_colab', 'Directory should be "/content/drive/My Drive/Detection/darknet_for_colab" instead of "{}"'.format(os.getcwd())

# delete backup folder from our 
!rm /content/drive/'My Drive'/Detection/darknet_for_colab/backup -r

# create Symlinks so we can save trained weight in our Google Drive
# create folder YOLOv4_weight/back in your Drive to store trained weights
!ln -s /content/drive/'My Drive'/YOLOv4_weight/backup /content/drive/'My Drive'/Detection/darknet_for_colab

#Training