#**Procesamiento de Imágenes**
##Trabajo Final: Identificación de Imágenes con YOLO

###Alumnos:
*   Alcides Charapaqui
*   Diego Humbser
*   Franco Simonini

###Sección: SC62

###Docente:
*   Peter Montalvo



In [None]:
!pip install opencv-python



In [None]:
import cv2
import numpy as np
from google.colab import files

In [None]:
# Clonar el repositorio de YOLO
!git clone https://github.com/AlexeyAB/darknet

# Cambiar al directorio de darknet
%cd darknet

# Compilar darknet
!make

# Descargar los pesos preentrenados de YOLOv3
!wget https://pjreddie.com/media/files/yolov3.weights

# Descargar el archivo de configuración y los nombres de clases
!wget https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg
!wget https://github.com/pjreddie/darknet/blob/master/data/coco.names

Cloning into 'darknet'...
remote: Enumerating objects: 15833, done.[K
remote: Counting objects: 100% (294/294), done.[K
remote: Compressing objects: 100% (152/152), done.[K
remote: Total 15833 (delta 157), reused 219 (delta 140), pack-reused 15539[K
Receiving objects: 100% (15833/15833), 14.76 MiB | 9.07 MiB/s, done.
Resolving deltas: 100% (10587/10587), done.
/content/darknet
mkdir -p ./obj/
mkdir -p backup
mkdir -p results
chmod +x *.sh
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -c ./src/image_opencv.cpp -o obj/image_opencv.o
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -c ./src/http_stream.cpp -o obj/http_stream.o
[01m[K./src/http_stream.cpp:[m[K In member function ‘[01m[Kbool JSON_sender::write(const char*)[m[K’:
  253 |                 int [01;35m[Kn[m[K = _write(clien

##Parte Uno
####1.Se configuran los pesos de YOLO
####2.Se carga la red de YOLO

In [None]:
# Configuración de YOLO
config_path = '/content/darknet/cfg/yolov3.cfg'
weights_path = '/content/darknet/yolov3.weights'
names_path = '/content/darknet/data/coco.names'

# Cargar la red YOLO
net = cv2.dnn.readNet(config_path, weights_path)
layer_names = net.getUnconnectedOutLayersNames()
with open(names_path, 'r') as f:
    classes = f.read().strip().split('\n')

# Cargar el video
video_path = '/content/tf_video.mp4'
cap = cv2.VideoCapture(video_path)

# Configurar VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Puedes usar 'x264' dependiendo de las codecs disponibles
output_path_with_rectangles = '/content/video_con_detecciones.mp4'
out_with_rectangles = cv2.VideoWriter(output_path_with_rectangles, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

In [None]:
# Obtener la cantidad total de frames en el video
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print("Total number of frames in the video:", total_frames)

Total number of frames in the video: 0


In [None]:
#previamente, se han detectado la altura y el ancho de todos los frames de este video.
count_sub = 0
height = 480
width = 848
print(height)
print(width)

480
848


####Ahora, se hará el proceso para la deteccion de personas en cada Frame.

In [None]:
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocesar la imagen para YOLO
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(layer_names)

    # Información sobre detecciones
    class_ids = []
    confidences = []
    boxes = []

    # Filtrar detecciones
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == 0:  # Class_id 0 es para "persona"
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([x, y, w, h])

    # Contar personas
    num_people = len(boxes)
    count_sub += 1
    print("Frame: {} / {}".format(count_sub, total_frames))

    # Dibujar cajas delimitadoras
    for i in range(num_people):
        x, y, w, h = boxes[i]
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Escribir el frame modificado en el VideoWriter
    out_with_rectangles.write(frame)

    # Presiona 'q' para salir del bucle
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Liberar recursos
cap.release()
out_with_rectangles.release()
cv2.destroyAllWindows()

# Descargar el nuevo video con rectángulos
files.download(output_path_with_rectangles)

Frame: 1 / 0
Frame: 2 / 0
Frame: 3 / 0
Frame: 4 / 0
Frame: 5 / 0
Frame: 6 / 0
Frame: 7 / 0
Frame: 8 / 0
Frame: 9 / 0
Frame: 10 / 0
Frame: 11 / 0
Frame: 12 / 0
Frame: 13 / 0
Frame: 14 / 0
Frame: 15 / 0
Frame: 16 / 0
Frame: 17 / 0
Frame: 18 / 0
Frame: 19 / 0
Frame: 20 / 0
Frame: 21 / 0
Frame: 22 / 0
Frame: 23 / 0
Frame: 24 / 0
Frame: 25 / 0
Frame: 26 / 0
Frame: 27 / 0
Frame: 28 / 0
Frame: 29 / 0
Frame: 30 / 0
Frame: 31 / 0
Frame: 32 / 0
Frame: 33 / 0
Frame: 34 / 0
Frame: 35 / 0
Frame: 36 / 0
Frame: 37 / 0
Frame: 38 / 0
Frame: 39 / 0
Frame: 40 / 0
Frame: 41 / 0
Frame: 42 / 0
Frame: 43 / 0
Frame: 44 / 0
Frame: 45 / 0
Frame: 46 / 0
Frame: 47 / 0
Frame: 48 / 0
Frame: 49 / 0
Frame: 50 / 0
Frame: 51 / 0
Frame: 52 / 0
Frame: 53 / 0
Frame: 54 / 0
Frame: 55 / 0
Frame: 56 / 0
Frame: 57 / 0
Frame: 58 / 0
Frame: 59 / 0
Frame: 60 / 0
Frame: 61 / 0
Frame: 62 / 0
Frame: 63 / 0
Frame: 64 / 0
Frame: 65 / 0
Frame: 66 / 0
Frame: 67 / 0
Frame: 68 / 0
Frame: 69 / 0
Frame: 70 / 0
Frame: 71 / 0
Frame: 72 / 0
F

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

##Parte 2

####La tercera fucnión es de hacer detección de frames en donde existen personas, parecido a lo que se realizó en el trabajo Parcial.

####Como ejemplo, como en todos los frames del video si existen persoanas, entonces no apunta frames sin personas.

In [None]:
# Cargar el video grabado
video_path = '/content/tf_video.mp4'
cap = cv2.VideoCapture(video_path)

# Cargar la red YOLO preentrenada para la detección de personas
config_path = '/content/darknet/cfg/yolov3.cfg'
weights_path = '/content/darknet/yolov3.weights'
names_path = '/content/darknet/data/coco.names'

net = cv2.dnn.readNet(config_path, weights_path)
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
with open(names_path, 'r') as f:
    classes = f.read().strip().split('\n')


In [None]:
# Obtener la cantidad total de frames en el video
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print("Total number of frames in the video:", total_frames)

Total number of frames in the video: 3718


In [None]:
frame_width = 848
frame_height = 480
out_sub = cv2.VideoWriter('output_video_area.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height))
count_sub = 0
frames_free = 0
frames_occupied = 0
while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Redimensionar el cuadro para reducir la resolución
    frame = cv2.resize(frame, (frame_width, frame_height))

    # Detectar objetos en el cuadro con YOLO
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []

    # Obtener información de detección
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == 0:  # Clase 0 para personas
                # Coordenadas del cuadro del objeto detectado
                center_x = int(detection[0] * frame_width)
                center_y = int(detection[1] * frame_height)
                w = int(detection[2] * frame_width)
                h = int(detection[3] * frame_height)
                # Coordenadas del cuadro
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Aplicar supresión no máxima para eliminar duplicados
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Dibujar cuadros y contar personas
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = confidences[i]
            if label == 'person':
                frames_occupied += 1
                color = (0, 0, 255)  # Color rojo
            else:
                frames_free += 1
                color = (0, 255, 0)  # Color verde
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Escribir el cuadro en el video de salida
    text = f'Frames Libres: {frames_free}'
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.68
    font_color = (0, 255, 0)  # Color verde
    thickness = 1
    x, y = 50, 306  # Coordenadas para colocar el texto
    cv2.putText(frame, text, (x, y), font, font_scale, font_color, thickness)

    text = f'Frames Ocupados: {frames_occupied}'
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.68
    font_color = (0, 0, 255)  # Color rojo
    thickness = 1
    x, y = 50, 332  # Coordenadas para colocar el texto
    cv2.putText(frame, text, (x, y), font, font_scale, font_color, thickness)

    out_sub.write(frame)
    count_sub += 1
    print("Frame: {} / {}".format(count_sub, total_frames))

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out_sub.release()
cv2.destroyAllWindows()

Frame: 1 / 3718
Frame: 2 / 3718
Frame: 3 / 3718
Frame: 4 / 3718
Frame: 5 / 3718
Frame: 6 / 3718
Frame: 7 / 3718
Frame: 8 / 3718
Frame: 9 / 3718
Frame: 10 / 3718
Frame: 11 / 3718
Frame: 12 / 3718
Frame: 13 / 3718
Frame: 14 / 3718
Frame: 15 / 3718
Frame: 16 / 3718
Frame: 17 / 3718
Frame: 18 / 3718
Frame: 19 / 3718
Frame: 20 / 3718
Frame: 21 / 3718
Frame: 22 / 3718
Frame: 23 / 3718
Frame: 24 / 3718
Frame: 25 / 3718
Frame: 26 / 3718
Frame: 27 / 3718
Frame: 28 / 3718
Frame: 29 / 3718
Frame: 30 / 3718
Frame: 31 / 3718
Frame: 32 / 3718
Frame: 33 / 3718
Frame: 34 / 3718
Frame: 35 / 3718
Frame: 36 / 3718
Frame: 37 / 3718
Frame: 38 / 3718
Frame: 39 / 3718
Frame: 40 / 3718
Frame: 41 / 3718
Frame: 42 / 3718
Frame: 43 / 3718
Frame: 44 / 3718
Frame: 45 / 3718
Frame: 46 / 3718
Frame: 47 / 3718
Frame: 48 / 3718
Frame: 49 / 3718
Frame: 50 / 3718
Frame: 51 / 3718
Frame: 52 / 3718
Frame: 53 / 3718
Frame: 54 / 3718
Frame: 55 / 3718
Frame: 56 / 3718
Frame: 57 / 3718
Frame: 58 / 3718
Frame: 59 / 3718
Frame:

In [None]:
files.download('output_video_area.mp4')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>