<a href="https://colab.research.google.com/github/CedricFont/Deep-learning-for-autonomous-vehicle/blob/develop/Final_project/Object_detection/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import cv2
import numpy as np

In [3]:
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from google.colab.patches import cv2_imshow
from base64 import b64decode, b64encode
import cv2

In [4]:
!git clone https://github.com/spmallick/learnopencv.git

Cloning into 'learnopencv'...
remote: Enumerating objects: 10158, done.[K
remote: Total 10158 (delta 0), reused 0 (delta 0), pack-reused 10158[K
Receiving objects: 100% (10158/10158), 1.22 GiB | 11.86 MiB/s, done.
Resolving deltas: 100% (3023/3023), done.
Checking out files: 100% (4489/4489), done.


In [7]:
# Constants.
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45

# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1

# Colors
BLACK  = (0,0,0)
BLUE   = (255,178,50)
YELLOW = (0,255,255)
RED = (0,0,255)

In [8]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

In [9]:
def draw_label(input_image, label, left, top):
    """Draw text onto image at location."""
    
    # Get text size.
    text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
    dim, baseline = text_size[0], text_size[1]
    # Use text size to create a BLACK rectangle. 
    cv2.rectangle(input_image, (left, top), (left + dim[0], top + dim[1] + baseline), BLACK, cv2.FILLED);
    # Display text inside the rectangle.
    cv2.putText(input_image, label, (left, top + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)


def pre_process(input_image, net):
	# Create a 4D blob from a frame.
	blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)

	# Sets the input to the network.
	net.setInput(blob)

	# Runs the forward pass to get output of the output layers.
	output_layers = net.getUnconnectedOutLayersNames()
	outputs = net.forward(output_layers)
	# print(outputs[0].shape)

	return outputs

In [10]:
def post_process(input_image, outputs):
	# Lists to hold respective values while unwrapping.
	class_ids = []
	confidences = []
	boxes = []

	# Rows.
	rows = outputs[0].shape[1]

	image_height, image_width = input_image.shape[:2]

	# Resizing factor.
	x_factor = image_width / INPUT_WIDTH
	y_factor =  image_height / INPUT_HEIGHT

	# Iterate through 25200 detections.
	for r in range(rows):
		row = outputs[0][0][r]
		confidence = row[4]

		# Discard bad detections and continue.
		if confidence >= CONFIDENCE_THRESHOLD:
			classes_scores = row[5:]

			# Get the index of max class score.
			class_id = np.argmax(classes_scores)

			#  Continue if the class score is above threshold.
			if (classes_scores[class_id] > SCORE_THRESHOLD):
				confidences.append(confidence)
				class_ids.append(class_id)

				cx, cy, w, h = row[0], row[1], row[2], row[3]

				left = int((cx - w/2) * x_factor)
				top = int((cy - h/2) * y_factor)
				width = int(w * x_factor)
				height = int(h * y_factor)
			  
				box = np.array([left, top, width, height])
				boxes.append(box)

	# Perform non maximum suppression to eliminate redundant overlapping boxes with
	# lower confidences.
	indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
	for i in indices:
		box = boxes[i]
		left = box[0]
		top = box[1]
		width = box[2]
		height = box[3]
		cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
		label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])
		draw_label(input_image, label, left, top)

	return input_image

In [11]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [24]:
%%bash
ls -a
cd YOLOv5/models

.
..
.config
learnopencv
sample_data
YOLOv5
YOLOv5s.pt
YOLOv5s.pt.1
YOLOv5s.pt.2
YOLOv5s.pt.3
YOLOv5s.pt.4


In [36]:
%ls -a
%cd ..
%ls -a

[0m[01;34m.[0m/         experimental.py  tf.py         yolov5m.yaml  yolov5x.yaml
[01;34m..[0m/        [01;34mhub[0m/             yolo.py       yolov5n.yaml
common.py  __init__.py      yolov5l.yaml  yolov5s.yaml
/content/YOLOv5
[0m[01;34m.[0m/               .dockerignore   hubconf.py               setup.cfg
[01;34m..[0m/              export.py       LICENSE                  train.py
CONTRIBUTING.md  [01;34m.git[0m/           [01;34mmodels[0m/                  tutorial.ipynb
[01;34mdata[0m/            .gitattributes  .pre-commit-config.yaml  [01;34mutils[0m/
detect.py        [01;34m.github[0m/        README.md                val.py
Dockerfile       [01;32m.gitignore[0m*     [01;32mrequirements.txt[0m*


In [32]:
%ls -a

[0m[01;34m.[0m/   [01;34m.config[0m/      [01;34msample_data[0m/  YOLOv5s.pt    YOLOv5s.pt.2  YOLOv5s.pt.4
[01;34m..[0m/  [01;34mlearnopencv[0m/  [01;34mYOLOv5[0m/       YOLOv5s.pt.1  YOLOv5s.pt.3


In [37]:
# Clone the repository. 
!git clone https://github.com/ultralytics/YOLOv5

%cd YOLOv5 # Install dependencies.
!pip install -r requirements.txt  # install

# Download .pt model.
!wget https://github.com/ultralytics/YOLOv5/releases/download/v6.1/YOLOv5s.pt

# %cd .. # Export to ONNX.
!python export.py --weights models/YOLOv5s.pt --include onnx
!python export.py --weights models/yolov5n.pt --include onnx

Cloning into 'YOLOv5'...
remote: Enumerating objects: 12824, done.[K
remote: Total 12824 (delta 0), reused 0 (delta 0), pack-reused 12824[K
Receiving objects: 100% (12824/12824), 11.75 MiB | 11.34 MiB/s, done.
Resolving deltas: 100% (8914/8914), done.
[Errno 2] No such file or directory: 'YOLOv5 # Install dependencies.'
/content/YOLOv5
Collecting PyYAML>=5.3.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 4.1 MB/s 
Collecting thop
  Downloading thop-0.0.31.post2005241907-py3-none-any.whl (8.7 kB)
Installing collected packages: thop, PyYAML
  Attempting uninstall: PyYAML
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-6.0 thop-0.0.31.post2005241907
--2022-04-14 13:11:36--  https://github.com/ultralytics/YOLOv5/releases/download/v6.1/YOLOv5s.pt
Resolving 

In [45]:
%cd ..

/content


In [46]:
%ls -a

[0m[01;34m.[0m/   [01;34m.config[0m/      [01;34msample_data[0m/  YOLOv5s.pt    YOLOv5s.pt.2  YOLOv5s.pt.4
[01;34m..[0m/  [01;34mlearnopencv[0m/  [01;34mYOLOv5[0m/       YOLOv5s.pt.1  YOLOv5s.pt.3  YOLOv5s.pt.5


In [50]:
# Download the file.
from google.colab import files
files.download('YOLOv5/models/yolov5n.onnx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
%ls -a

[0m[01;34m.[0m/   [01;34m.config[0m/      [01;34msample_data[0m/  YOLOv5s.pt    YOLOv5s.pt.2  YOLOv5s.pt.4
[01;34m..[0m/  [01;34mlearnopencv[0m/  [01;34mYOLOv5[0m/       YOLOv5s.pt.1  YOLOv5s.pt.3  YOLOv5s.pt.5


In [17]:
# Load class names.
classesFile = "learnopencv/Object-Detection-using-YOLOv5-and-OpenCV-DNN-in-CPP-and-Python/coco.names"
classes = None
with open(classesFile, 'rt') as f:
  classes = f.read().rstrip('\n').split('\n')

# Load image.
frame = cv2.imread('sample.jpg')

# Give the weight files to the model and load the network using them.
modelWeights = "learnopencv/Object-Detection-using-YOLOv5-and-OpenCV-DNN-in-CPP-and-Python/models/yolov5s.onnx"
net = cv2.dnn.readNet(modelWeights)

error: ignored

In [None]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0 
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    frame = js_to_image(js_reply["img"])

    # create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)

    # grayscale image for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)

    # get face region coordinates
    faces = face_cascade.detectMultiScale(gray)
    # get face bounding box for overlay
    for (x,y,w,h) in faces:
      bbox_array = cv2.rectangle(bbox_array,(x,y),(x+w,y+h),(255,0,0),2)

    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes

In [48]:
# Load class names.
classesFile = "learnopencv/Object-Detection-using-YOLOv5-and-OpenCV-DNN-in-CPP-and-Python/coco.names"
classes = None
with open(classesFile, 'rt') as f:
  classes = f.read().rstrip('\n').split('\n')

# Load image.
# frame = cv2.imread('sample.jpg')

# Give the weight files to the model and load the network using them.
modelWeights = "YOLOv5/models/yolov5n.onnx"
net = cv2.dnn.readNet(modelWeights)

# Process image.
detections = pre_process(frame, net)
img = post_process(frame.copy(), detections)

# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
print(label)
cv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE, RED, THICKNESS, cv2.LINE_AA)

cv2.imshow('Output', img)
cv2.waitKey(0)

error: ignored

In [None]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0 
a = True
while a == True:
    a = False
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    frame = js_to_image(js_reply["img"])  

    # create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)

    # call our darknet helper on video frame
    # detections, width_ratio, height_ratio = darknet_helper(frame, width, height)
    run_detector(detector, frame)

    # loop through detections and draw them on transparent overlay image
    for label, confidence, bbox in detections:
      left, top, right, bottom = bbox2points(bbox)
      left, top, right, bottom = int(left * width_ratio), int(top * height_ratio), int(right * width_ratio), int(bottom * height_ratio)
      bbox_array = cv2.rectangle(bbox_array, (left, top), (right, bottom), class_colors[label], 2)
      bbox_array = cv2.putText(bbox_array, "{} [{:.2f}]".format(label, float(confidence)),
                        (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        class_colors[label], 2)
      
      # get mask
      person_detected, mask = get_mask_for_person(img, left, top, right, bottom, label)


      # ------------------------  Put Cedric's code here --------------------------
      #if person_detected:
      # Apply his algo on this image 
      #  person_cropped = (img*np.expand_dims(mask, axis=2)).astype(int)

      # ------------------------  Put Cedric's code here --------------------------

    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes