# Configuring cuDNN on Colab for YOLOv4


In [None]:
# CUDA: Let's check that Nvidia CUDA drivers are already pre-installed and which version is it.
!/usr/local/cuda/bin/nvcc --version
# We need to install the correct cuDNN according to this output

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Oct_12_20:09:46_PDT_2020
Cuda compilation tools, release 11.1, V11.1.105
Build cuda_11.1.TC455_06.29190527_0


In [None]:
!nvidia-smi

Fri Jan  7 05:39:54 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8    26W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

#Ensure to configure right architecture

In [None]:
# This cell ensures you have the correct architecture for your respective GPU
# If you command is not found, look through these GPUs, find the respective
# GPU and add them to the archTypes dictionary

# Tesla V100
# ARCH= -gencode arch=compute_70,code=[sm_70,compute_70]

# Tesla K80 
# ARCH= -gencode arch=compute_37,code=sm_37

# GeForce RTX 2080 Ti, RTX 2080, RTX 2070, Quadro RTX 8000, Quadro RTX 6000, Quadro RTX 5000, Tesla T4, XNOR Tensor Cores
# ARCH= -gencode arch=compute_75,code=[sm_75,compute_75]

# Jetson XAVIER
# ARCH= -gencode arch=compute_72,code=[sm_72,compute_72]

# GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4
# ARCH= -gencode arch=compute_61,code=sm_61

# GP100/Tesla P100 - DGX-1
# ARCH= -gencode arch=compute_60,code=sm_60

# For Jetson TX1, Tegra X1, DRIVE CX, DRIVE PX - uncomment:
# ARCH= -gencode arch=compute_53,code=[sm_53,compute_53]

# For Jetson Tx2 or Drive-PX2 uncomment:
# ARCH= -gencode arch=compute_62,code=[sm_62,compute_62]
import os
os.environ['GPU_TYPE'] = str(os.popen('nvidia-smi --query-gpu=name --format=csv,noheader').read())

def getGPUArch(argument):
  try:
    argument = argument.strip()
    # All Colab GPUs
    archTypes = {
        "Tesla V100-SXM2-16GB": "-gencode arch=compute_70,code=[sm_70,compute_70]",
        "Tesla K80": "-gencode arch=compute_37,code=sm_37",
        "Tesla T4": "-gencode arch=compute_75,code=[sm_75,compute_75]",
        "Tesla P40": "-gencode arch=compute_61,code=sm_61",
        "Tesla P4": "-gencode arch=compute_61,code=sm_61",
        "Tesla P100-PCIE-16GB": "-gencode arch=compute_60,code=sm_60"

      }
    return archTypes[argument]
  except KeyError:
    return "GPU must be added to GPU Commands"
os.environ['ARCH_VALUE'] = getGPUArch(os.environ['GPU_TYPE'])

print("GPU Type: " + os.environ['GPU_TYPE'])
print("ARCH Value: " + os.environ['ARCH_VALUE'])

GPU Type: Tesla K80

ARCH Value: -gencode arch=compute_37,code=sm_37


#Download darknet package and modify Makefile

In [None]:
%cd /content/
%rm -rf darknet
!git clone https://github.com/AlexeyAB/darknet.git
%cd /content/darknet/


/content
Cloning into 'darknet'...
remote: Enumerating objects: 15386, done.[K
remote: Total 15386 (delta 0), reused 0 (delta 0), pack-reused 15386[K
Receiving objects: 100% (15386/15386), 13.99 MiB | 18.18 MiB/s, done.
Resolving deltas: 100% (10347/10347), done.
/content/darknet


In [None]:
##Change your Makefile (from line 20-28) to the following lines
# USE_CPP=0
# DEBUG=0

# ARCH= -gencode arch=compute_35,code=sm_35 \
#       -gencode arch=compute_50,code=[sm_50,compute_50] \
#       -gencode arch=compute_52,code=[sm_52,compute_52] \
# 	    -gencode arch=compute_61,code=[sm_61,compute_61] \
#       -gencode arch=compute_37,code=sm_37

# ARCH= -gencode arch=compute_60,code=sm_60

# OS := $(shell uname)


In [None]:
%cd /content/darknet/
!sed -i "1s/GPU=0/GPU=1/"               Makefile
!sed -i "2s/CUDNN=0/CUDNN=1/"           Makefile
!sed -i "4s/OPENCV=0/OPENCV=1/"         Makefile
!sed -i "5s/AVX=0/AVX=1/"               Makefile
!sed -i "7s/LIBSO=0/LIBSO=1/"           Makefile
!sed -i "s/ARCH= -gencode arch=compute_60,code=sm_60/ARCH= ${ARCH_VALUE}/g" Makefile
# !sed -i "21s/^/#/"                      Makefile
# !sed -i "22s/^/#/"                      Makefile
# !sed -i "23s/^/#/"                      Makefile
# !sed -i "24s/^/#/"                      Makefile
!sed -i "300s/calc_map_for_each = 4/calc_map_for_each = 1/" src/detector.c


/content/darknet


In [None]:
!make

mkdir -p ./obj/
mkdir -p backup
chmod +x *.sh
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -DOPENCV `pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv` -DGPU -I/usr/local/cuda/include/ -DCUDNN -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -ffp-contract=fast -mavx -mavx2 -msse3 -msse4.1 -msse4.2 -msse4a -Ofast -DOPENCV -DGPU -DCUDNN -I/usr/local/cudnn/include -fPIC -c ./src/image_opencv.cpp -o obj/image_opencv.o
[01m[K./src/image_opencv.cpp:[m[K In function ‘[01m[Kvoid draw_detections_cv_v3(void**, detection*, int, float, char**, image**, int, int)[m[K’:
                 float [01;35m[Krgb[m[K[3];
                       [01;35m[K^~~[m[K
[01m[K./src/image_opencv.cpp:[m[K In function ‘[01m[Kvoid draw_train_loss(char*, void**, int, float, float, int, int, float, int, char*, float, int, int, double)[m[K’:
             [01;35m[Kif[m[K (iteration_old == 0)
             [01;35m[K^~[m[K
[01m[K./src/image_

#Preparing step
###This step, you have to do it manually, (If you have some tricky ways please let me know)
####1. Prepare weight file (save in build\darknet\x64)
####2. Prepare dataset including: images and annotations (save in build\darknet\x64\data\obj\ #(create a obj folder))
####3. Prepare files including: 'modelname'.data, 'modelname'.names,train.txt, test.txt(save in build\darknet\x64\data\)
Your train.txt should contain path to images from checkpoint build/...
####4. Prepare configure files including: 'modelname'.cfg (save in build\darknet\x64\cfg); 

In [None]:
#put your dataset in a compressed file.rar and push it on google drive
#link to your google drive and copy it to colab
%cd /content/
from google.colab import drive
drive.mount('/content/gdrive')
!cp /content/gdrive/MyDrive/ImageAugmentation/obj.zip /content/darknet/build/darknet/x64/data/
#Then decomspress it
%cd /content/darknet/build/darknet/x64/data/
!unzip obj.zip 
# # !mv "/content/darknet/build/darknet/x64/data/obj/Data/frame0.jpg" "/content/darknet/build/darknet/x64/data/obj/"
#%cd ../

[1;30;43mKết quả truyền trực tuyến bị cắt bớt đến 5000 dòng cuối.[0m
  inflating: obj/image189.jpg        
  inflating: obj/image189.txt        
  inflating: obj/image1890.jpg       
  inflating: obj/image1890.txt       
  inflating: obj/image1891.jpg       
  inflating: obj/image1891.txt       
  inflating: obj/image1892.jpg       
  inflating: obj/image1892.txt       
  inflating: obj/image1893.jpg       
  inflating: obj/image1893.txt       
  inflating: obj/image1894.jpg       
  inflating: obj/image1894.txt       
  inflating: obj/image1895.jpg       
  inflating: obj/image1895.txt       
  inflating: obj/image1896.jpg       
  inflating: obj/image1896.txt       
  inflating: obj/image1897.jpg       
  inflating: obj/image1897.txt       
  inflating: obj/image1898.jpg       
  inflating: obj/image1898.txt       
  inflating: obj/image1899.jpg       
  inflating: obj/image1899.txt       
  inflating: obj/image19.jpg         
  inflating: obj/image19.txt         
  inflating: obj/

In [None]:
%cd obj/
%ls

/content/darknet/build/darknet/x64/data/obj
frame0.jpg     frame6720.jpg  image1410.jpg  image2321.jpg  image3232.jpg
frame0.txt     frame6720.txt  image1410.txt  image2321.txt  image3232.txt
frame1044.jpg  frame6721.jpg  image1411.jpg  image2322.jpg  image3233.jpg
frame1044.txt  frame6721.txt  image1411.txt  image2322.txt  image3233.txt
frame1045.jpg  frame6722.jpg  image1412.jpg  image2323.jpg  image3234.jpg
frame1045.txt  frame6722.txt  image1412.txt  image2323.txt  image3234.txt
frame1046.jpg  frame672.jpg   image1413.jpg  image2324.jpg  image3235.jpg
frame1046.txt  frame672.txt   image1413.txt  image2324.txt  image3235.txt
frame1047.jpg  frame6735.jpg  image1414.jpg  image2325.jpg  image3236.jpg
frame1047.txt  frame6735.txt  image1414.txt  image2325.txt  image3236.txt
frame1048.jpg  frame6736.jpg  image1415.jpg  image2326.jpg  image3237.jpg
frame1048.txt  frame6736.txt  image1415.txt  image2326.txt  image3237.txt
frame1049.jpg  frame6737.jpg  image1416.jpg  image2327.jpg  image323

In [None]:
#To prepare train.txt, test.txt, there is a very useful way to do it:
#Preparing training and testing files
import glob2
import numpy as np
import os
all_files = []
# Move to your dataset directory
# Here is one of mine:
# %cd ImageAugmentation/
%cd /content/darknet/build/darknet/x64/data/obj/

for ext in ["*.png", "*.jpeg", "*.jpg"]:
#Remember to config your directory right here:
  images = glob2.glob(os.path.join("/content/darknet/build/darknet/x64/data/obj/", ext))
  all_files += images
print(all_files)
rand_idx = np.random.randint(0, len(all_files), 200)

# Create train.txt
with open("train.txt", "w") as f:
  for idx in np.arange(len(all_files)):
    if idx not in rand_idx:
      f.write(all_files[idx]+'\n')

# Create valid.txt
with open("test.txt", "w") as f:
  for idx in np.arange(len(all_files)):
    if idx in rand_idx:
      f.write(all_files[idx]+'\n')

/content/darknet/build/darknet/x64/data/obj
['/content/darknet/build/darknet/x64/data/obj/frame122.jpg', '/content/darknet/build/darknet/x64/data/obj/image210.jpg', '/content/darknet/build/darknet/x64/data/obj/image302.jpg', '/content/darknet/build/darknet/x64/data/obj/frame13.jpg', '/content/darknet/build/darknet/x64/data/obj/image284.jpg', '/content/darknet/build/darknet/x64/data/obj/image589.jpg', '/content/darknet/build/darknet/x64/data/obj/frame310.jpg', '/content/darknet/build/darknet/x64/data/obj/frame145.jpg', '/content/darknet/build/darknet/x64/data/obj/image259.jpg', '/content/darknet/build/darknet/x64/data/obj/image389.jpg', '/content/darknet/build/darknet/x64/data/obj/image115.jpg', '/content/darknet/build/darknet/x64/data/obj/image17.jpg', '/content/darknet/build/darknet/x64/data/obj/frame213.jpg', '/content/darknet/build/darknet/x64/data/obj/frame242.jpg', '/content/darknet/build/darknet/x64/data/obj/image453.jpg', '/content/darknet/build/darknet/x64/data/obj/image134.jpg

In [None]:
!mv "/content/darknet/build/darknet/x64/data/obj/train.txt" "/content/darknet/build/darknet/x64/data/"
!mv "/content/darknet/build/darknet/x64/data/obj/test.txt" "/content/darknet/build/darknet/x64/data/"


#Start training

In [None]:
%cd /content/darknet
%ls
!./darknet detector train build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/obj_5000.weights -gpus 0 -map -dont_show 2>&1 | grep -E "hours left|mean_average"




 7799: 0.109445, 0.102397 avg loss, 0.000026 rate, 2.139041 seconds, 499136 images, 0.182038 hours left
 7800: 0.115207, 0.103678 avg loss, 0.000026 rate, 2.194992 seconds, 499200 images, 0.181412 hours left
 7801: 0.135322, 0.106842 avg loss, 0.000026 rate, 2.135370 seconds, 499264 images, 0.180817 hours left
 7802: 0.137206, 0.109879 avg loss, 0.000026 rate, 2.179414 seconds, 499328 images, 0.180190 hours left
 7803: 0.090347, 0.107926 avg loss, 0.000026 rate, 2.170598 seconds, 499392 images, 0.179586 hours left
 7804: 0.093871, 0.106520 avg loss, 0.000026 rate, 2.173604 seconds, 499456 images, 0.178978 hours left
 7805: 0.092900, 0.105158 avg loss, 0.000026 rate, 2.173174 seconds, 499520 images, 0.178372 hours left
 7806: 0.100846, 0.104727 avg loss, 0.000026 rate, 2.151176 seconds, 499584 images, 0.177765 hours left
 7807: 0.106683, 0.104923 avg loss, 0.000026 rate, 2.150972 seconds, 499648 images, 0.177147 hours left
 7808: 0.157438, 0.110174 avg loss, 0.000026 rate, 2.153803 seco

#Evaluating


In [None]:
%cd /content/darknet
print('-------------------obj_last.weight-------------------------------')
!./darknet detector map build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/backup/obj_last.weights
print('-------------------obj_best.weight-------------------------------')
!./darknet detector map build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/backup/obj_best.weights
print('-------------------obj_4000.weight-------------------------------')
!./darknet detector map build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/backup/obj_4000.weights
print('-------------------obj_5000.weight-------------------------------')
!./darknet detector map build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/backup/obj_5000.weights
print('-------------------obj_6000.weight-------------------------------')
!./darknet detector map build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/backup/obj_6000.weights
print('-------------------obj_7000.weight-------------------------------')
!./darknet detector map build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/backup/obj_7000.weights
print('-------------------obj_8000.weight-------------------------------')
!./darknet detector map build/darknet/x64/data/obj.data build/darknet/x64/cfg/obj.cfg build/darknet/x64/backup/obj_8000.weights


/content/darknet
-------------------obj_last.weight-------------------------------
 CUDA-version: 11010 (11020), cuDNN: 7.6.5, GPU count: 1  
 OpenCV version: 3.2.0
 0 : compute_capability = 370, cudnn_half = 0, GPU: Tesla K80 
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
   layer   filters  size/strd(dil)      input                output
   0 Create CUDA-stream - 0 
 Create cudnn-handle 0 
conv     32       3 x 3/ 2    416 x 416 x   3 ->  208 x 208 x  32 0.075 BF
   1 conv     64       3 x 3/ 2    208 x 208 x  32 ->  104 x 104 x  64 0.399 BF
   2 conv     64       3 x 3/ 1    104 x 104 x  64 ->  104 x 104 x  64 0.797 BF
   3 route  2 		                       1/2 ->  104 x 104 x  32 
   4 conv     32       3 x 3/ 1    104 x 104 x  32 ->  104 x 104 x  32 0.199 BF
   5 conv     32       3 x 3/ 1    104 x 104 x  32 ->  104 x 104 x  32 0.199 BF
   6 route  5 4 	                           ->  104 x 104 x  64 
   7 conv     64       1 x 1/ 1    104 x 104 x

In [None]:
##Wait signals
import time
i = 0
while (True):
  i = i+1
  time.sleep(10)
  print(i)

KeyboardInterrupt: ignored