In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install ultralytics
!pip install deep_sort_realtime

Collecting deep_sort_realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep_sort_realtime
Successfully installed deep_sort_realtime-1.3.2


In [None]:
import tensorflow as tf

# Carregar o modelo de temperatura treinado
temperature_model = tf.keras.models.load_model('/content/drive/MyDrive/temperature_digits/temperature_model.h5')




In [None]:
# Function to preprocess the digit image
def preprocess_digit(digit):
    digit_resized = cv2.resize(digit, (16, 21))
    digit_normalized = digit_resized.astype('float32') / 255.0
    digit_ready = np.expand_dims(np.expand_dims(digit_normalized, axis=0), axis=-1)
    return digit_ready

# Function to recognize the digit using the CNN model
def recognize_digit(digit):
    preprocessed_digit = preprocess_digit(digit)
    prediction = temperature_model.predict(preprocessed_digit, verbose=0)
    predicted_class = np.argmax(prediction)

    class_mapping = {
        0: '0', 1: '1', 2: '2', 3: '3', 4: '4',
        5: '5', 6: '6', 7: '7', 8: '8', 9: '9',
        10: 'minus', 11: 'nothing'
    }

    return class_mapping.get(predicted_class, 'unknown')

# Huber loss calculation for robust estimation of temperature
def huber_loss(residual, delta=1.0):
    if abs(residual) <= delta:
        return 0.5 * residual ** 2
    else:
        return delta * (abs(residual) - 0.5 * delta)

# Function to calculate robust mean using Huber loss-based weighting
def robust_temperature_mean(temperatures, delta=1.0):
    mean_temp = np.mean(temperatures)
    weighted_sum = 0
    total_weight = 0
    for temp in temperatures:
        residual = temp - mean_temp
        loss = huber_loss(residual, delta)
        weight = 1 / (1 + loss)
        weighted_sum += temp * weight
        total_weight += weight

    if total_weight == 0:
        return mean_temp

    return weighted_sum / total_weight

# Function to extract temperature from a rectangular region
def extract_temperature(image, x1, y1, x2, y2):
    roi = image[y1:y2, x1:x2]

    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

    digit1 = gray_roi[:, :16]
    digit2 = gray_roi[:, 16:32]
    digit3 = gray_roi[:, 48:64]

    recognized_digit1 = recognize_digit(digit1)
    recognized_digit2 = recognize_digit(digit2)
    recognized_digit3 = recognize_digit(digit3)

    if recognized_digit1 == 'minus':
        recognized_number = f"-{recognized_digit2}.{recognized_digit3}"
    else:
        recognized_number = f"{recognized_digit1}{recognized_digit2}.{recognized_digit3}"

    #print(f"Recognized temperature: {recognized_number}")

    try:
        return float(recognized_number)
    except ValueError:
        return None

# Function to process the thermal image and compute the final robust temperature
def process_thermal_image(image_path, x1, y1, x2, y2):
    img = image_path

    if img is None:
        #print(f"Could not read the image: {image_path}")
        return

    x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])

    height, width = img.shape[:2]

    # Clamp the coordinates
    x1 = max(0, min(x1, width - 1))
    y1 = max(0, min(y1, height - 1))
    x2 = max(0, min(x2, width - 1))
    y2 = max(0, min(y2, height - 1))

    # Extract high and low reference temperatures
    temp_high = extract_temperature(img, 510, 67, 575, 88)
    temp_low = extract_temperature(img, 510, 403, 575, 424)

    if temp_high is not None and temp_low is not None:
        #print(f"High temperature: {temp_high}°C")
        #print(f"Low temperature: {temp_low}°C")

        temp_range = temp_high - temp_low

        def get_pixel_temp(pixel_value):
            normalized = pixel_value / 255.0
            return temp_low + (normalized * temp_range)

        # Extract pixel temperatures in the defined rectangle
        pixel_temps = []
        for y in range(y1, y2):
            for x in range(x1, x2):
                pixel_value = img[y, x, 0]
                pixel_temp = get_pixel_temp(pixel_value)
                pixel_temps.append(pixel_temp)

        # Compute the robust mean temperature using Huber loss-based estimation
        final_temp = robust_temperature_mean(pixel_temps, delta=1.0)
        return final_temp
    else:
        #print("Could not extract temperatures from the image.")
        return None

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

# Paths
model_path = '/content/drive/MyDrive/yolo/best (2).pt'
input_video_path = '/content/drive/MyDrive/yolo/Cópia de video_teste3.mp4'
output_video_path = '/content/output_video.mp4'

In [None]:

model = YOLO(model_path)

# Inicializar o DeepSORT tracker
tracker = DeepSort(max_age=5,
                   n_init=3,
                   max_cosine_distance=0.3,
                   gating_only_position=True)

# Defina os IDs das classes conforme seu modelo YOLOv8
CLASS_HEAD = 0  # Substitua pelo ID real da classe "cabeça" no seu modelo
CLASS_EYE = 1   # Substitua pelo ID real da classe "olho" no seu modelo

# Inicialize conjuntos para armazenar IDs únicos de cabeças e olhos
head_ids = set()
eye_ids = set()


cap = cv2.VideoCapture(input_video_path)

# Defina a saída do vídeo
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, 25.0, (int(cap.get(3)), int(cap.get(4))))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Use o modelo YOLOv8 para fazer inferências no frame
    results = model(frame)

    # Inicialize a lista de detecções
    detections = []

    # Processar os resultados do modelo YOLOv8
    for box in results[0].boxes:
        # Extrair as coordenadas da caixa delimitadora
        bbox_array = box.xyxy.cpu().numpy().flatten()
        x1, y1, x2, y2 = bbox_array.astype(int)
        w = x2 - x1
        h = y2 - y1
        bbox = [x1, y1, w, h]

        # Extrair a confiança e a classe
        conf = box.conf.cpu().numpy()[0]
        cls = int(box.cls.cpu().numpy()[0])

        if conf > 0.5:  # Filtrar por confiança
            detections.append([bbox, conf, cls])  # Cada detecção é [bbox, conf, cls]

    # Verificar as detecções
    print(f'Detections: {detections}')

    # Atualize o tracker com as detecções
    tracks = tracker.update_tracks(detections, frame=frame)

    # Processar os tracks retornados
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()  # [left, top, right, bottom]
        cls = track.det_class  # Classe associada ao track

        x1, y1, x2, y2 = map(int, ltrb)

        if cls == CLASS_HEAD:
            head_ids.add(track_id)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
            cv2.putText(frame, f'Cabeça ID {track_id}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
        elif cls == CLASS_EYE:
            eye_ids.add(track_id)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 2)
            cv2.putText(frame, f'Olho ID {track_id}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)

            # Processar o olho se necessário
            # eye_temp = process_thermal_image(frame, x1, y1, x2, y2)

    # Escreve o frame processado no vídeo de saída
    out.write(frame)


# Após o processamento, obtenha as contagens
total_cabecas = len(head_ids)
total_olhos = len(eye_ids)

print(f'Total de cabeças detectadas: {total_cabecas}')
print(f'Total de olhos detectados: {total_olhos}')

# Limpeza
cap.release()
out.release()
cv2.destroyAllWindows()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 1.5ms preprocess, 9.3ms inference, 1.3ms postprocess per image at shape (1, 3, 512, 640)
Detections: [[[299, 40, 174, 259], 0.8950054, 0], [[179, 325, 119, 133], 0.6588023, 0], [[332, 127, 36, 44], 0.6333064, 1]]

0: 512x640 2 cabecas, 2 olhos, 9.9ms
Speed: 2.3ms preprocess, 9.9ms inference, 1.5ms postprocess per image at shape (1, 3, 512, 640)
Detections: [[[313, 33, 180, 282], 0.911425, 0], [[344, 119, 37, 47], 0.5916074, 1], [[173, 321, 135, 136], 0.591028, 0]]

0: 512x640 2 cabecas, 2 olhos, 8.7ms
Speed: 1.9ms preprocess, 8.7ms inference, 1.3ms postprocess per image at shape (1, 3, 512, 640)
Detections: [[[320, 27, 196, 263], 0.8894532, 0], [[172, 320, 143, 134], 0.7461854, 0], [[353, 111, 41, 46], 0.5964692, 1]]

0: 512x640 2 cabecas, 3 olhos, 8.2ms
Speed: 1.2ms preprocess, 8.2ms inference, 1.3ms postprocess per image at shape (1, 3, 512, 640)
Detections: [[[331, 22, 200, 268], 0.9170004, 0], [[173, 315, 147, 

In [None]:
print(head_ids)
print(eye_ids)

{'136', '145', '60', '166', '195', '234', '256', '93', '99', '88', '139', '96', '53', '177', '52', '43', '239', '252', '124', '263', '302', '118', '84', '294', '23', '33', '34', '295', '12', '80', '212', '187', '298', '58', '227', '17', '24', '160', '180', '255', '266', '307', '159', '61', '205', '247', '27', '151', '217', '282', '297', '83', '35', '1', '221'}
{'170', '207', '144', '25', '158', '117', '3', '128', '137', '193', '240', '147', '204', '66', '85', '67', '110', '167', '210', '222', '56', '92', '123', '131', '50', '29', '143', '178', '291', '79', '97', '173', '186', '270', '107', '303', '306', '165', '40', '135', '197', '157'}


In [None]:
output_video_path = '/content/output_video_no_tracking.mp4'
model = YOLO(model_path)


# Defina os IDs das classes conforme seu modelo YOLOv8
CLASS_HEAD = 0  # Substitua pelo ID real da classe "cabeça" no seu modelo
CLASS_EYE = 1   # Substitua pelo ID real da classe "olho" no seu modelo

# Inicialize conjuntos para armazenar IDs únicos de cabeças e olhos
head_ids = set()
eye_ids = set()


cap = cv2.VideoCapture(input_video_path)

# Defina a saída do vídeo
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, 25.0, (int(cap.get(3)), int(cap.get(4))))


while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Use o modelo YOLOv8 para fazer inferências no frame
    results = model(frame)

    # Processar os resultados do modelo YOLOv8
    for box in results[0].boxes:
        # Extrair as coordenadas da caixa delimitadora
        bbox_array = box.xyxy.cpu().numpy().flatten()
        x1, y1, x2, y2 = bbox_array.astype(int)
        w = x2 - x1
        h = y2 - y1

        # Extrair a confiança e a classe
        conf = box.conf.cpu().numpy()[0]
        cls = int(box.cls.cpu().numpy()[0])

        if conf > 0.2:  # Filtrar por confiança
            # Desenhar a caixa delimitadora e o rótulo diretamente no frame
            if cls == CLASS_HEAD:
                color = (0, 255, 255)  # Amarelo para "cabeça"
                label = f'Cabeça: {conf:.2f}'
            elif cls == CLASS_EYE:
                color = (255, 0, 255)  # Magenta para "olho"
                label = f'Olho: {conf:.2f}'
            else:
                color = (0, 255, 0)  # Verde para outras classes
                label = f'Classe {cls}: {conf:.2f}'

            # Desenhar a caixa delimitadora
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

            # Desenhar o rótulo
            cv2.putText(frame, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    # Opcional: Mostrar o frame em uma janela (se não estiver em um ambiente headless)
    # cv2.imshow('Detecções YOLOv8', frame)

    # Escreve o frame processado no vídeo de saída
    out.write(frame)

    # Verificar se a tecla 'q' foi pressionada para sair
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Limpeza
cap.release()
out.release()
cv2.destroyAllWindows()


0: 512x640 1 cabeca, 2 olhos, 8.9ms
Speed: 1.4ms preprocess, 8.9ms inference, 1.4ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 cabeca, 1 olho, 8.2ms
Speed: 1.9ms preprocess, 8.2ms inference, 1.3ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 cabeca, 1 olho, 8.1ms
Speed: 1.8ms preprocess, 8.1ms inference, 1.2ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 cabeca, 1 olho, 8.1ms
Speed: 1.6ms preprocess, 8.1ms inference, 1.2ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 cabeca, 1 olho, 8.8ms
Speed: 1.8ms preprocess, 8.8ms inference, 1.3ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 cabeca, 1 olho, 7.9ms
Speed: 1.6ms preprocess, 7.9ms inference, 1.2ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 cabeca, 1 olho, 8.3ms
Speed: 1.2ms preprocess, 8.3ms inference, 1.3ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 cabeca, 1 olho, 9.9ms
Speed: 2.0ms preprocess, 9.9ms inferenc