#Reference note:- Commands between steps 2 to 9 have been referred from the website https://colab.research.google.com/drive/1zqRb08ljHvIIMR4fgAXeNy1kUtjDU85B?usp=sharing. Program in step 10 is the program for real time object detection and recognition using webcam.

#1)In your drive, create the folders namely yolov4 and training

#2) Mount drive, link your folder and navigate to /mydrive/yolov4 folder

In [None]:
#mount drive
%cd ..
from google.colab import drive
drive.mount('/content/gdrive')

# this creates a symbolic link so that now the path /content/gdrive/My\ Drive/ is equal to /mydrive
!ln -s /content/gdrive/My\ Drive/ /mydrive

# list the contents of /mydrive
!ls /mydrive

#Navigate to /mydrive/yolov4
%cd /mydrive/yolov4

# 3) Clone the git repository of darknet


In [None]:
!git clone https://github.com/AlexeyAB/darknet

# 4) For training a custom object detector, create and upload the following files

# 4(a) Upload the obj.zip file consisting of labeled custom dataset to the yolov4 folder on your drive
# 4(b) A config file should be create and upload on the drive
# 4(c) obj.names and obj.data files should be create and upload on the drive



# 5) Run below commands to enable OPENCV and GPU

In [None]:

%cd darknet/
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile
!sed -i 's/LIBSO=0/LIBSO=1/' Makefile

# 6) Run make command to build darknet

In [None]:
!make

# 7) Copy all the files from the yolov4 folder to the darknet directory

In [None]:

%cd data/
!find -maxdepth 1 -type f -exec rm -rf {} \;
%cd ..

%rm -rf cfg/
%mkdir cfg

In [None]:

!unzip /mydrive/yolov4/obj.zip -d data/

In [None]:
# Copy the yolov4-custom.cfg file so that it is now in /darknet/cfg/ folder 

!cp /mydrive/yolov4/yolov4-custom.cfg cfg

# verify if your custom file is in cfg folder
!ls cfg/

In [None]:
# Copy the obj.names and obj.data files from your drive so that they are now in /darknet/data/ folder 

!cp /mydrive/yolov4/obj.names data
!cp /mydrive/yolov4/obj.data  data

# verify if the above files are in data folder
!ls data/

labels	obj  obj.data  obj.names


In [None]:
# Copy the process.py file to the current darknet directory 

!cp /mydrive/yolov4/process.py .

# 8) Run the below command in order to download the pretrained weights of the YOLO v4 model

In [None]:
# Download the yolov4 pre-trained weights file
!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137

# 9) Run below command to Train your YOLO v4 model

In [None]:

!./darknet detector train data/obj.data cfg/yolov4-custom.cfg yolov4.conv.137 -dont_show -map

#10) Program for real time traffic lights and stop signs detections using live webcam streaming 

In [None]:
#all the required libraries are imported
from IPython.display import display, HTML,Image
from google.colab.output import eval_js
from google.colab.patches import cv2_imshow
from base64 import b64decode, b64encode
import cv2
import numpy as np
import PIL
import io
import json
import html
import time
import matplotlib.pyplot as plt
from darknet import *
%matplotlib inline

#HTML document type is passed as a string parameter to HTML function. HTML function is called inside the display function. 
display(HTML('''
<!DOCTYPE html>
<html>
  <script>
//all required variables are declared here     
var videoStream;
var clearHandle;
var frameCount = 0;
var currFrame;
var width = 0;
var height = 0;
var videoElement;
var canvasElement;
var containerElement;
var streaming = false;
var prevbboxes = [];
var detectionsDiv;
var stopRequested = false;

//streamVideo() gets invoked when Start Video button is clicked. This starts the webcam of the machine.
function streamVideo() {
    videoElement = document.getElementById("videoElement");
    canvasElement = document.getElementById("canvas");
    containerElement = document.getElementById("container");

    if (navigator.mediaDevices.getUserMedia) {
        navigator.mediaDevices.getUserMedia({ video: {facingMode: "environment"}, audio: false })
        .then(function (stream) {
                videoElement.srcObject = stream;
                videoStream = stream;
            videoElement.play();
        })
        .catch(function (error) {
            console.log("video failed with error: " + error);
        });
    }

//addEventListener() function handles events when any button is clicked. 
    videoElement.addEventListener('canplay', function(ev) {
      if (!streaming) {
        console.log(videoElement.videoHeight, videoElement.videoWidth);
        height = videoElement.videoHeight; 
        width = videoElement.videoWidth;
        containerElement.style.width = width+ "px";
        containerElement.style.height = height + "px";
        videoElement.style.width = width + "px";
        videoElement.style.height = height + "px";
        canvasElement.style.width = width + "px";
        canvasElement.style.height = height + "px";
        streaming = true;
      }
    }, false)
}

//stopVideo() function gets invoked when stop video button is pressed.
function stopVideo() {
    videoStream.getTracks().forEach(function(track) {
        track.stop();
    });
}

//saveVideoFrames() function saves the video frames captured by the webcam. When Start Capture button is pressed after starting webcam, 
//video frames are saved after every 5 ms interval.
function saveVideoFrames() {
  if (stopRequested) {
    currFrame = null
    return
  }
  var context = canvasElement.getContext('2d');
  console.log("using width ", width, " height ", height)
  canvasElement.width = width;
  canvasElement.height = height;
  context.drawImage(videoElement, 0, 0, width, height);
  var data = canvasElement.toDataURL('image/jpeg', 0.8);
  currFrame = data;
}

//startSavingVideoFrames() function gets invoked when Start Capture button is pressed.
function startSavingVideoFrames() {
  clearHandle = setInterval(saveVideoFrames, 5);
}

//stopSavingVideoFrames( ) function gets invoked when Stop Capture button is pressed.
function stopSavingVideoFrames( ) {
  clearInterval(clearHandle);
  stopRequested = true;
}

//removeboxfromvideo() is called from createdetectoinbboxes(). It removes the previuos bounding boxes. 
function removeboxfromvideo() {
  for (let i = 0; i < prevbboxes.length; i++) {
      prevbboxes[i].remove();
  }
  prevbboxes = [];
}

//createdetectoinbboxes() function accepts 3 parameters objects data in json format, its weidth ratio and height ratio
function createdetectoinbboxes(data, wr, hr) {
    removeboxfromvideo();
    var detections = JSON.parse(data);
    for (let i = 0; i < detections.length; i++) {
      var det = detections[i][2];
      var accuracy = parseFloat(det[i][1]);
      createbbox(detections[i][0], detections[i][1], det[0], det[2], det[1], det[3]);
    }
}

//createbbox() accepts 6 parameters. type of object i.e., traffic light or stop sign, accuracy, x coordinate, y coordinate, width and height.
//This function creates bounding boxes along with class label and accuracies.
function createbbox(type, accuracy, startx, starty, w, h) {
    detectionsDiv = document.getElementById("detections");
    var idiv = document.createElement('div');
    var labdiv = document.createElement('label');
    labdiv.style.color = "red";
    labdiv.innerText = type + " " + accuracy;

    idiv.className = 'box'
    idiv.style.width = w + "px";
    idiv.style.height = h + "px";
    idiv.style.left = startx + "px";
    idiv.style.top = starty + "px";
    idiv.appendChild(labdiv);
    detectionsDiv.appendChild(idiv);
    prevbboxes.push(idiv);
}

function setdebugimg(data) {
  var image = new Image();
  image.src = data;
  image.width = width;
  image.height = height;
  debugDiv = document.getElementById("debug");
  debugDiv.appendChild(image);
}

//getFrameAndClear() calls removeboxfromvideo() which removes the previuos bounding boxes and return current frame.
function getFrameAndClear() {
  removeboxfromvideo()
  return currFrame;
}

  </script>
  <head>
    <meta charset="utf-8">
    <title>Display Webcam Stream</title>

    <style>
      #container {
        margin: 0px;
        width: 400px;
        height: 400px;
        border: 10px #333 solid;
      }
      #videoElement {
        position: absolute;
        width: 400px;
        height: 400px;
        background-color: #666;
      }
      #detections: {
        position: absolute;
      }
      #canvas {
        display: none;
      }
      .box {
        position: absolute;
        z-index: 1000;
        border: solid 3px green;
      }
    </style>
  </head>

  <body>
    <canvas id="canvas"></canvas>
    <div id="container"> 
      <div id="detections">
      <video autoplay="true" id="videoElement"></video>
      <div class="box"></div>
      </div>
    </div>
    <div id="debug">
    </div>
    <div id="control">
      <button onClick=streamVideo() id="startVideo"> Start Video </button>
      <button onClick=stopVideo() id="stopVideo"> Stop Video </button>
      <br/>
      <button onClick=startSavingVideoFrames() id="startCapture"> Start Capture </button>
      <button onClick=stopSavingVideoFrames() id="stopCapture"> Stop Capture </button>
    </div>
    <script>

    </script>
  </body>
</html>

'''))

#load network function accepts parameters namely config file, data file and weights of the trained model.
network, class_names, class_colors = load_network("cfg/yolov4-custom.cfg", "data/obj.data", "/mydrive/yolov4/training/yolov4-custom_best.weights")
nwidth = network_width(network)
nheight = network_height(network)
img_height = 0;
img_width = 0;
#By using eval_js() python function, we can call JavaScript functions through Python program.
while True:
    cf = eval_js('getFrameAndClear()')
    if cf is None:
      continue
    decoded_img = b64decode(cf.split(',')[1])
    imgnparr = np.frombuffer(decoded_img, dtype=np.uint8)
    cvimg = cv2.imdecode(imgnparr, 1)
    img_rgb = cv2.cvtColor(cvimg, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img_rgb, (nwidth, nheight),
                              interpolation=cv2.INTER_LINEAR)
    
    retval, buffer = cv2.imencode('.jpeg', img_resized)
    jpeg_as_text = b64encode(buffer)
    jat = 'data:image/png;base64,' + jpeg_as_text.decode('utf-8')


    # get image ratios to convert bounding boxes to proper size
    if img_height == 0:
      img_height = eval_js('height')
    if img_width == 0:
      img_width = eval_js('width')
    width_ratio = img_width/nwidth
    height_ratio = img_height/nheight

    # run model on darknet style image to get detections
    darknet_image = make_image(nwidth, nheight, 3)
    copy_image_from_bytes(darknet_image, img_resized.tobytes())
    detections = detect_image(network, class_names, darknet_image)
    free_image(darknet_image)

    
    if len(detections) > 0:
      newdets = []
      for det in detections:
        xmin, ymin, xmax, ymax = bbox2points(det[2])
        xmin, ymin, xmax, ymax = int(xmin * width_ratio), int(ymin * height_ratio), int(xmax * width_ratio), int(ymax * height_ratio)
        newdets.append((det[0], det[1], (xmin, (xmax - xmin), ymin, (ymax - ymin))))
      eval_js('createdetectoinbboxes(\' {} \', {}, {})'.format(json.dumps(newdets), width_ratio, height_ratio))