In [1]:
#Instalamos librerías en caso de ser necesario

In [1]:
#Importamos librerías importantes
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator 
from PIL import Image, ImageEnhance, ImageFilter, ImageDraw
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import socket
import os

In [2]:
#Funciones
def viewCam(index):
    cap = cv.VideoCapture(index)
    while True:
        ret, frame = cap.read()
        cv.imshow("Feed", frame)
        key = cv.waitKey(1) & 0xFF
        if key == ord('q'):
            break
    cap.release()
    cv.destroyAllWindows()

def visualize_yolo(index, model):
    cap = cv.VideoCapture(index)
    while True:
        ret, frame = cap.read()
        results = model.predict(frame)

        for result in results:
            
            annotator = Annotator(frame)
            
            boxes = result.boxes
            for box in boxes:
                b = box.xyxy[0].numpy()
                c = box.cls
                annotator.box_label(b, model.names[int(c)])
                center_x = int((b[0] + b[2]) / 2)
                center_y = int((b[1] + b[3]) / 2)
                cv.circle(frame, (center_x, center_y), 5, (0, 0, 255), -1)
        img = annotator.result()  
        cv.imshow('YOLO V8 Detection', img)     
        key = cv.waitKey(1)
        if key == ord('q'):
            break

    cap.release()
    cv.destroyAllWindows()

def YOLO_On_Image(imagePath, title, model):
    image = Image.open(imagePath)
    results = model.predict(image)
    centers = []
    for result in results:
        annotator = Annotator(image)
        boxes = result.boxes
        for box in boxes:
            b = box.xyxy[0].numpy()
            c = box.cls
            annotator.box_label(b, model.names[int(c)])
            center_x = int((b[0] + b[2]) / 2)
            center_y = int((b[1] + b[3]) / 2)
            centers.append([center_x, center_y])
    img = annotator.result() 
    plt.figure()
    plt.imshow(img)
    for center in centers:
        plt.scatter(center[0], center[1], s=200, c="r", marker="+")
    plt.axis("off")
    plt.title(title)
    print(centers)
    return centers

#Vamos a hacer una función para cambiar un modelo de yolo pytorch a yolo tensorflow lite
def pytorch_to_tflite_model(pytorch_model_path):
    model = YOLO(pytorch_model_path)
    model.export(format="tflite")

El procesamiento es muy lento y las cámaras los frames del servidor y transmisor no corresponden
Vamos a mandar una imagen sin necesariamente mostrarla primero en transmisor y luego en servidor. Tomamos la imagen, la mandamos a servidor a procesar, mandamos la imagen procesada de servidor a transmisor y ahí la mostramos.

In [3]:
#Cambiamos el modelo a tflite
pytorch_to_tflite_model("models/best_yolo_2/best_model_2.pt")

Ultralytics 8.3.24 🚀 Python-3.10.12 torch-2.5.1+cu124 CPU (AMD Ryzen 7 3700U with Radeon Vega Mobile Gfx)
Model summary (fused): 168 layers, 3,010,133 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from 'models/best_yolo_2/best_model_2.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 27, 8400) (5.9 MB)


E0000 00:00:1732052720.463173   85049 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732052720.480575   85049 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



[34m[1mTensorFlow SavedModel:[0m starting export with tensorflow 2.18.0...

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.36...
[34m[1mONNX:[0m export success ✅ 1.9s, saved as 'models/best_yolo_2/best_model_2.onnx' (11.8 MB)
[34m[1mTensorFlow SavedModel:[0m starting TFLite export with onnx2tf 1.22.3...


I0000 00:00:1732052736.484046   85049 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
I0000 00:00:1732052736.484191   85049 single_machine.cc:361] Starting new session
W0000 00:00:1732052737.484862   85049 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1732052737.484906   85049 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
I0000 00:00:1732052738.606319   85049 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
I0000 00:00:1732052738.606456   85049 single_machine.cc:361] Starting new session
W0000 00:00:1732052739.425287   85049 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1732052739.425320   85049 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.


[34m[1mTensorFlow SavedModel:[0m export success ✅ 21.8s, saved as 'models/best_yolo_2/best_model_2_saved_model' (29.5 MB)

[34m[1mTensorFlow Lite:[0m starting export with tensorflow 2.18.0...
[34m[1mTensorFlow Lite:[0m export success ✅ 0.0s, saved as 'models/best_yolo_2/best_model_2_saved_model/best_model_2_float32.tflite' (11.7 MB)

Export complete (22.6s)
Results saved to [1m/home/jesus/Documents/Servicio_Social/models/best_yolo_2[0m
Predict:         yolo predict task=detect model=models/best_yolo_2/best_model_2_saved_model/best_model_2_float32.tflite imgsz=640  
Validate:        yolo val task=detect model=models/best_yolo_2/best_model_2_saved_model/best_model_2_float32.tflite imgsz=640 data=/content/drive/MyDrive/train_yolo_models/conditioned_db/training.yaml  
Visualize:       https://netron.app


In [None]:
#Vamos a usar un modelo en tflite de 16 bits para hacer el proceso más ligero
model = YOLO("models/best_yolo_2/best_model_2_saved_model/best_model_2_float16.tflite")
# model = YOLO("models/new_best_yolov8n.pt")
# Server configuration (Receiver)
HOST = '0.0.0.0'  # Listen on all network interfaces
PORT = 9999       # Port to listen on

# Set up the server socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind((HOST, PORT))
server_socket.listen(1)
print("Waiting for connection...")

# Accept a client connection
conn, addr = server_socket.accept()
print(f"Connected to {addr}")

data = b''  # Buffer for received data
payload_size = 4  # Size of the header (int indicating the frame size)

while True:
    # Receive the frame size from the transmitter
    while len(data) < payload_size:
        packet = conn.recv(4096)
        if not packet:
            break
        data += packet
    if not data:
        break

    # Extract the frame size
    frame_size = int.from_bytes(data[:payload_size], byteorder="big")
    data = data[payload_size:]

    # Receive the frame data based on the frame size
    while len(data) < frame_size:
        data += conn.recv(4096)

    # Extract and decode the frame data
    frame_data = data[:frame_size]
    data = data[frame_size:]
    frame = cv.imdecode(np.frombuffer(frame_data, dtype=np.uint8), cv.IMREAD_COLOR)

    # Process the frame (for example, converting it to grayscale)
    results = model.predict(frame)

    for result in results:
            
        annotator = Annotator(frame)            
        boxes = result.boxes
        for box in boxes:
            b = box.xyxy[0].numpy()
            c = box.cls
            annotator.box_label(b, model.names[int(c)])
            #center_x = int((b[0] + b[2]) / 2)
            #center_y = int((b[1] + b[3]) / 2)
            #cv.circle(frame, (center_x, center_y), 5, (0, 0, 255), -1)
        img = annotator.result()  
        #cv.imshow('YOLO V8 Detection', img)     
        #key = cv.waitKey(1)
        #if key == ord('q'):
            #break
    #centers = [center_x, center_y]
    #processed_frame = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    processed_frame = img

    #Here we get the accuracy of the model on the image:
    


    # Encode the processed frame
    _, encoded_processed_frame = cv.imencode('.jpg', processed_frame)
    encoded_processed_frame_bytes = encoded_processed_frame.tobytes()

    # Send the processed frame size and data back to the transmitter
    processed_frame_size_bytes = len(encoded_processed_frame_bytes).to_bytes(4, byteorder="big")
    conn.sendall(processed_frame_size_bytes + encoded_processed_frame_bytes)

# Close the connection
conn.close()
server_socket.close()

Waiting for connection...
Connected to ('192.168.225.102', 35838)


E0000 00:00:1731710630.561749  116130 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731710630.568943  116130 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading models/new_best_yolov8n_float16.tflite for TensorFlow Lite inference...



INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


0: 640x640 1 earbuds, 263.6ms
Speed: 8.1ms preprocess, 263.6ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 256.0ms
Speed: 3.9ms preprocess, 256.0ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 286.9ms
Speed: 3.6ms preprocess, 286.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 260.4ms
Speed: 2.4ms preprocess, 260.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 260.7ms
Speed: 3.4ms preprocess, 260.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 279.7ms
Speed: 2.4ms preprocess, 279.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 261.5ms
Speed: 4.0ms preprocess, 261.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 264.7ms
Speed: 3.3ms preprocess, 264.7ms inference, 1.0ms postprocess per i

In [3]:
#Segunda versión
#Vamos a hacer un append de las accuracies
accuracies = []
# Load the YOLO model
model = YOLO("models/best_yolo_2/best_model_2_saved_model/best_model_2_float16.tflite")

# Server configuration (Receiver)
HOST = '0.0.0.0'  # Listen on all network interfaces
PORT = 9999       # Port to listen on

# Set up the server socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind((HOST, PORT))
server_socket.listen(1)
print("Waiting for connection...")

# Accept a client connection
conn, addr = server_socket.accept()
print(f"Connected to {addr}")

data = b''  # Buffer for received data
payload_size = 4  # Size of the header (int indicating the frame size)

while True:
    # Receive the frame size from the transmitter
    while len(data) < payload_size:
        packet = conn.recv(4096)
        if not packet:
            break
        data += packet
    if not data:
        break

    # Extract the frame size
    frame_size = int.from_bytes(data[:payload_size], byteorder="big")
    data = data[payload_size:]

    # Receive the frame data based on the frame size
    while len(data) < frame_size:
        data += conn.recv(4096)

    # Extract and decode the frame data
    frame_data = data[:frame_size]
    data = data[frame_size:]
    frame = cv.imdecode(np.frombuffer(frame_data, dtype=np.uint8), cv.IMREAD_COLOR)

    # Process the frame using the YOLO model
    results = model.predict(frame)
    annotator = Annotator(frame)

    total_confidence = 0.0
    object_count = 0

    for result in results:
        boxes = result.boxes
        for box in boxes:
            b = box.xyxy[0].numpy()
            c = box.cls
            confidence = box.conf[0].numpy()  # Confidence score of the detected object
            total_confidence += confidence
            object_count += 1

            # Annotate the frame with box and label
            annotator.box_label(b, f"{model.names[int(c)]} {confidence:.2f}")

    # Calculate the accuracy (average confidence score)
    accuracy = total_confidence / object_count if object_count > 0 else 0.0
    processed_frame = annotator.result()
    accuracies.append(accuracy)

    # Overlay the accuracy on the frame
    accuracy_text = f"Accuracy: {accuracy:.2%}"
    cv.putText(
        processed_frame,
        accuracy_text,
        (10, 30),  # Position of the text
        cv.FONT_HERSHEY_SIMPLEX,  # Font type
        0.7,  # Font scale
        (0, 255, 0),  # Font color (green)
        2,  # Thickness
        cv.LINE_AA  # Line type
    )

    # Encode the processed frame
    _, encoded_processed_frame = cv.imencode('.jpg', processed_frame)
    encoded_processed_frame_bytes = encoded_processed_frame.tobytes()

    # Send the processed frame size and data back to the transmitter
    processed_frame_size_bytes = len(encoded_processed_frame_bytes).to_bytes(4, byteorder="big")
    conn.sendall(processed_frame_size_bytes + encoded_processed_frame_bytes)

# Close the connection
conn.close()
server_socket.close()

Waiting for connection...
Connected to ('192.168.225.102', 54276)


E0000 00:00:1732055970.743234    7258 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732055970.772596    7258 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading models/best_yolo_2/best_model_2_saved_model/best_model_2_float16.tflite for TensorFlow Lite inference...



INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


0: 640x640 1 earbuds, 509.3ms
Speed: 36.0ms preprocess, 509.3ms inference, 43.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 496.9ms
Speed: 16.6ms preprocess, 496.9ms inference, 13.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 602.8ms
Speed: 22.2ms preprocess, 602.8ms inference, 20.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 502.5ms
Speed: 11.8ms preprocess, 502.5ms inference, 12.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 786.5ms
Speed: 16.1ms preprocess, 786.5ms inference, 24.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 733.0ms
Speed: 24.9ms preprocess, 733.0ms inference, 20.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 earbuds, 760.0ms
Speed: 20.4ms preprocess, 760.0ms inference, 44.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 protoboard, 1 earbuds, 417.7ms
Speed: 23.3ms preprocess, 417.7ms infer

KeyboardInterrupt: 