# Detectando objetos em vídeos com YOLOv4 e OpenCV

## Etapa 1 - Importando as bibliotecas

In [1]:
import cv2
print(cv2.__version__)

4.8.0


In [None]:
!pip install opencv-python==4.4.0.40

Collecting opencv-python==4.4.0.40
[?25l  Downloading https://files.pythonhosted.org/packages/bd/8a/7a01233c28f4f0b49536498f2ae39aa9f70c6de85fe74dc17f53ec7d0b0e/opencv_python-4.4.0.40-cp36-cp36m-manylinux2014_x86_64.whl (49.4MB)
[K     |████████████████████████████████| 49.4MB 81kB/s 
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
Installing collected packages: opencv-python
  Found existing installation: opencv-python 4.1.2.30
    Uninstalling opencv-python-4.1.2.30:
      Successfully uninstalled opencv-python-4.1.2.30
Successfully installed opencv-python-4.4.0.40


In [2]:
import cv2
import numpy as np
import time
import os
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
import zipfile
print(cv2.__version__)

4.8.0


## Etapa 2 - Conectando com o Google Drive

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## Etapa 3 - Carregando os arquivos do modelo treinado

In [4]:
path =  '/content/gdrive/MyDrive/Yolo/YoloV4Files.zip'
zip_object = zipfile.ZipFile(file=path, mode="r")
zip_object.extractall("./")
zip_object.close()

In [9]:
labels_path = os.path.sep.join(['/content/cfg', "coco.names"])#Read the labels file
LABELS = open(labels_path).read().strip().split("\n")#Loads the label

In [10]:
weights_path = os.path.sep.join(['/content/', "yolov4.weights"])#Read the config file
config_path = os.path.sep.join(['/content/cfg', "yolov4.cfg"])#Loads the config

In [11]:
net = cv2.dnn.readNet(config_path, weights_path)#Loads the neural network

## Etapa 4 - Definindo mais configurações para a detecção

In [12]:
np.random.seed(42)#Define the seed of the random value
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")#Create a random array of colors

In [16]:
ln = net.getLayerNames()#Get the layers names
print("Todas as camadas (layers):")
print(ln)
print("Total: "+ str(len(ln)))
print("Camadas de saída: ")
print(net.getUnconnectedOutLayers())#Get the output layers index
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]
print(ln)#Show the output layers names

Todas as camadas (layers):
('conv_0', 'bn_0', 'mish_1', 'conv_1', 'bn_1', 'mish_2', 'conv_2', 'bn_2', 'mish_3', 'identity_3', 'conv_4', 'bn_4', 'mish_5', 'conv_5', 'bn_5', 'mish_6', 'conv_6', 'bn_6', 'mish_7', 'shortcut_7', 'conv_8', 'bn_8', 'mish_9', 'concat_9', 'conv_10', 'bn_10', 'mish_11', 'conv_11', 'bn_11', 'mish_12', 'conv_12', 'bn_12', 'mish_13', 'identity_13', 'conv_14', 'bn_14', 'mish_15', 'conv_15', 'bn_15', 'mish_16', 'conv_16', 'bn_16', 'mish_17', 'shortcut_17', 'conv_18', 'bn_18', 'mish_19', 'conv_19', 'bn_19', 'mish_20', 'shortcut_20', 'conv_21', 'bn_21', 'mish_22', 'concat_22', 'conv_23', 'bn_23', 'mish_24', 'conv_24', 'bn_24', 'mish_25', 'conv_25', 'bn_25', 'mish_26', 'identity_26', 'conv_27', 'bn_27', 'mish_28', 'conv_28', 'bn_28', 'mish_29', 'conv_29', 'bn_29', 'mish_30', 'shortcut_30', 'conv_31', 'bn_31', 'mish_32', 'conv_32', 'bn_32', 'mish_33', 'shortcut_33', 'conv_34', 'bn_34', 'mish_35', 'conv_35', 'bn_35', 'mish_36', 'shortcut_36', 'conv_37', 'bn_37', 'mish_38'

## Etapa 5 - Criando as funções para detecção e processamento do video


### Função para exibir imagens no Colab

In [15]:
def mostrar(img):
  fig = plt.gcf()
  fig.set_size_inches(16, 10)
  plt.axis("off")
  plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
  plt.show()

### Construindo o blob da imagem


In [38]:
def detectionFunction(net, imagem, mostrar_texto=True):
  inicio = time.time()
  blob = cv2.dnn.blobFromImage(imagem, 1 / 255.0, (416, 416), swapRB=True, crop=False)
  net.setInput(blob)
  layerOutputs = net.forward(ln)
  termino = time.time()
  if mostrar_texto:
    print("YOLO levou {:.2f} segundos".format(termino - inicio))
  return net, imagem, layerOutputs

### Realizando a detecção

In [39]:
def resultProcessing(detection, _threshold, caixas, confiancas, IDclasses):
  scores = detection[5:]
  classeID = np.argmax(scores)
  confianca = scores[classeID]

  if confianca > _threshold:
      caixa = detection[0:4] * np.array([W, H, W, H])
      (centerX, centerY, width, height) = caixa.astype("int")

      x = int(centerX - (width / 2))
      y = int(centerY - (height / 2))

      caixas.append([x, y, int(width), int(height)])
      confiancas.append(float(confianca))
      IDclasses.append(classeID)

  return caixas, confiancas, IDclasses

### Mostrando o resultado da detecção no video

In [19]:
def funcoes_imagem(imagem, i, confiancas, caixas, COLORS, LABELS, mostrar_texto=True):
  (x, y) = (caixas[i][0], caixas[i][1])#Get the top-left position of the bouding box
  (w, h) = (caixas[i][2], caixas[i][3])#Get the width and height of the bouding box
  cor = [int(c) for c in COLORS[IDclasses[i]]]#Get a random color in the random color array
  cv2.rectangle(imagem, (x, y), (x + w, y + h), cor, 2)#Defines a bounding box in the image
  fundo = np.full((imagem.shape), (0,0,0), dtype=np.uint8)#Create the background of the text
  texto = "{}: {:.4f}".format(LABELS[IDclasses[i]], confiancas[i])#Define the text of the bouding box

  cv2.putText(fundo, texto, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 2)

  fx,fy,fw,fh = cv2.boundingRect(fundo[:,:,2])

  cv2.rectangle(imagem, (x, y), (x + w, y + h), cor, 2)

  cv2.rectangle(imagem, (fx, fy), (fx + fw, fy + fh), cor, -1)
  cv2.rectangle(imagem, (fx, fy), (fx + fw, fy + fh), cor, 3)
  cv2.putText(imagem, texto, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)

  if mostrar_texto:
    print("> " + texto)
    print(x,y,w,h)

  return imagem,x,y,w,h

## Etapa 6 - Carregando o vídeo onde será feita a detecção

### 6.1 - De uma url

In [20]:
!wget https://github.com/gabevr/yolo/raw/master/videos/video_pessoas01.mp4

--2023-12-19 13:52:03--  https://github.com/gabevr/yolo/raw/master/videos/video_pessoas01.mp4
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/gabevr/yolo/master/videos/video_pessoas01.mp4 [following]
--2023-12-19 13:52:03--  https://raw.githubusercontent.com/gabevr/yolo/master/videos/video_pessoas01.mp4
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6129513 (5.8M) [application/octet-stream]
Saving to: ‘video_pessoas01.mp4’


2023-12-19 13:52:03 (187 MB/s) - ‘video_pessoas01.mp4’ saved [6129513/6129513]



### Lendo o arquivo de vídeo com o OpenCV

In [26]:
arquivo_video = 'video_pessoas01.mp4'#File name
cap = cv2.VideoCapture(arquivo_video)#Read the video
conectado, video = cap.read()

In [27]:
conectado

True

In [28]:
video.shape

(720, 1280, 3)

In [29]:
video_largura = video.shape[1]#width
video_altura = video.shape[0]#height
video_largura, video_altura

(1280, 720)

## Etapa 7 - Redimensionamento do tamanho do video (opcional)

In [30]:
def redimensionar(largura, altura, largura_maxima = 600):
  if (largura > largura_maxima):
    proporcao = largura / altura
    video_largura = largura_maxima
    video_altura = int(video_largura / proporcao)
  else:
    video_largura = largura
    video_altura = altura

  return video_largura, video_altura

In [31]:
video_largura, video_altura = redimensionar(video.shape[1], video.shape[0])
print(video_largura,video_altura)

600 337


## Etapa 8 - Definindo as configurações do vídeo

- Mais exemplos de outras configurações com o fourcc que é possível usar: https://www.programcreek.com/python/example/89348/cv2.VideoWriter_fourcc

In [32]:
nome_arquivo = 'resultado.avi'#Say the name of the result file
fourcc = cv2.VideoWriter_fourcc(*'XVID') # Say that the result file will be saved as a .avi

In [33]:
fps = 24#Define the final FPS

In [34]:
saida_video = cv2.VideoWriter(nome_arquivo, fourcc, fps, (video_largura, video_altura))

## Etapa 9 - Definindo as variáveis

In [35]:
threshold = 0.5
threshold_NMS = 0.3
fonte_pequena, fonte_media = 0.4, 0.6
fonte = cv2.FONT_HERSHEY_SIMPLEX

In [41]:
amostras_exibir = 10#Number of frames that will be shown
amostra_atual = 0

## Etapa 10 - Processamento do vídeo e exibição do resultado

In [42]:
while (cv2.waitKey(1) < 0):#While the user didnt clicked in the keyboard
  conectado, frame = cap.read()#Get the frame
  if not conectado:# If the video end
    break#Break the loop
  t = time.time()#Start the timer
  frame = cv2.resize(frame, (video_largura, video_altura))# Resize the image
  try:
    (H, W) = frame.shape[:2]#Det the height and the width
  except:
    print('Erro')
    continue

  imagem_cp = frame.copy()#Creates a copy of the frame
  net, frame, layerOutputs = detectionFunction(net, frame)#Try detect the objects
  caixas = []
  confiancas = []
  IDclasses = []

  for output in layerOutputs:#For each output layer
    for detection in output:#For each object detected in the output layer
      caixas, confiancas, IDclasses = resultProcessing(detection, threshold, caixas, confiancas, IDclasses)#Process the result

  objs = cv2.dnn.NMSBoxes(caixas, confiancas, threshold, threshold_NMS)#Apply the Non-max suppression

  if len(objs) > 0:#If exist some object detected
    for i in objs.flatten():#For each object detected
      frame, x, y, w, h = funcoes_imagem(frame, i, confiancas, caixas, COLORS, LABELS, mostrar_texto=False)#

  cv2.putText(frame, " frame processado em {:.2f} segundos".format(time.time() - t),
              (20, video_altura-20), fonte, fonte_pequena, (250, 250, 250), 0, lineType=cv2.LINE_AA)# Put the processing time of the frame

  if amostra_atual <= amostras_exibir:#Show the first 10 frames
    cv2_imshow(frame)
    amostra_atual += 1

  saida_video.write(frame)#Add this processed frame in the video object

print('Terminou')
saida_video.release()#close the process
cv2.destroyAllWindows()#Close the window

Output hidden; open in https://colab.research.google.com to view.

In [43]:
!du -h resultado.avi

14M	resultado.avi


In [44]:
!cp ./resultado.avi /content/gdrive/MyDrive/Yolo/resultados/resultado3.avi