In [11]:
import cv2
import torch
from ultralytics import YOLO
import os
from ultralytics.utils.plotting import Annotator

In [2]:
def detect_fire_smoke(model_path1, input_path, output_path, model_path2=None):
    # Load the models
    model1 = YOLO(model_path1)
    if model_path2:
        model2 = YOLO(model_path2)

    # Define colors for bounding boxes for each model
    color_model1 = (0, 255, 0)  # Green for model1
    color_model2 = (0, 0, 255)  # Red for model2

    def draw_boxes(annotator, results, model, color):
        for r in results:
            boxes = r.boxes
            for box in boxes:
                b = box.xyxy[0]  # get box coordinates in (top, left, bottom, right) format
                c = box.cls
                label = f"{model.names[int(c)]} {box.conf.item():.2f}"  # Add confidence to the label
                annotator.box_label(b, label, color=color)

    # Check if the input is an image or a video
    if input_path.endswith('.jpg') or input_path.endswith('.png') or input_path.endswith('.jpeg'):
        # Handle image
        img = cv2.imread(input_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Predict with both models
        results1 = model1.predict(img, conf=0.2)
        annotator = Annotator(img)

        draw_boxes(annotator, results1, model1, color_model1)

        if model_path2:
            results2 = model2.predict(img, conf=0.2)
            draw_boxes(annotator, results2, model2, color_model2)

        img = annotator.result()

        # Save the result
        cv2.imwrite(output_path, img)

    else:
        # Handle video
        cap = cv2.VideoCapture(input_path)

        # Get video frame size
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # Define the codec and create a VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, 20.0, (frame_width, frame_height))

        while cap.isOpened():
            ret, frame = cap.read()

            if ret:  # if frame read successfully
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                # Predict with both models
                results1 = model1.predict(img, conf=0.2)
                annotator = Annotator(frame)

                draw_boxes(annotator, results1, model1, color_model1)

                if model_path2:
                    results2 = model2.predict(img, conf=0.2)
                    draw_boxes(annotator, results2, model2, color_model2)

                frame = annotator.result()

                # write the flipped frame
                out.write(frame)
            else:  # if frame not read successfully, then we reached the end of the video
                break

        # Release everything when job is finished
        cap.release()
        out.release()
        cv2.destroyAllWindows()


In [12]:
model_path1 = "/Users/lefterisfthenos/Desktop/MSBA - AUEB/Semester 3/Artificial Intelligence/CourseProject/models/model_chris/weights/best.pt"
model_path2 = 'models/model_chris_0908/weights/best.pt'
input_ = "test_files/rodos_maris.mp4"
output_ = "runs_pret/out_comparison_4_Rodos_Maris.mp4"   

In [18]:
detect_fire_smoke(model_path1, input_, output_, model_path2)



0: 384x640 1 Smoke, 131.2ms
Speed: 5.8ms preprocess, 131.2ms inference, 12.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 smoke, 207.2ms
Speed: 2.2ms preprocess, 207.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Smoke, 119.3ms
Speed: 1.7ms preprocess, 119.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 smoke, 264.3ms
Speed: 1.4ms preprocess, 264.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Smokes, 127.0ms
Speed: 1.7ms preprocess, 127.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 smoke, 258.6ms
Speed: 1.3ms preprocess, 258.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Smoke, 84.9ms
Speed: 1.5ms preprocess, 84.9ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 smoke, 179.2ms
Speed: 1.3ms preprocess, 179.2ms inference, 0.8ms postprocess per image at shape (

In [10]:
import os

for idx, file in enumerate(os.listdir("test_files")):
    if idx > 60 and idx < 120: 
        if file.endswith(".png") or file.endswith(".jpg") or file.endswith(".jpeg"):
            input_path = os.path.join("test_files", file)
            output_path = os.path.join("runs_comparison", file)
            try:
                detect_fire_smoke(model_path1, input_path, output_path, model_path2)
            except:
                print("Error with file: ", file)
                continue
        else:
            continue
    elif idx <= 60:
        continue
    else:
        break


0: 416x640 1 Fire, 1 Smoke, 89.7ms
Speed: 2.5ms preprocess, 89.7ms inference, 0.5ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 2 fires, 177.3ms
Speed: 1.5ms preprocess, 177.3ms inference, 0.4ms postprocess per image at shape (1, 3, 416, 640)

0: 320x640 3 Fires, 68.5ms
Speed: 1.2ms preprocess, 68.5ms inference, 0.4ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 3 fires, 180.3ms
Speed: 1.2ms preprocess, 180.3ms inference, 0.4ms postprocess per image at shape (1, 3, 320, 640)

0: 352x640 1 Fire, 74.0ms
Speed: 1.4ms preprocess, 74.0ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 1 fire, 150.6ms
Speed: 1.3ms preprocess, 150.6ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)

0: 448x640 1 Fire, 1 Smoke, 95.7ms
Speed: 1.4ms preprocess, 95.7ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 fire, 2 smokes, 193.3ms
Speed: 1.3ms preprocess, 193.3ms inference, 0.4ms postprocess per i

# Show video during predicting

In [13]:
model = YOLO(model_path1)
res = model.predict(input_, show=True, save=True, conf=0.2)



    causing potential out-of-memory errors for large sources or long-running streams/videos.

    Usage:
        results = model(source=..., stream=True)  # generator of Results objects
        for r in results:
            boxes = r.boxes  # Boxes object for bbox outputs
            masks = r.masks  # Masks object for segment masks outputs
            probs = r.probs  # Class probabilities for classification outputs

video 1/1 (1/2920) /Users/lefterisfthenos/Desktop/MSBA - AUEB/Semester 3/Artificial Intelligence/CourseProject/test_files/rodos_maris.mp4: 384x640 (no detections), 160.4ms
video 1/1 (2/2920) /Users/lefterisfthenos/Desktop/MSBA - AUEB/Semester 3/Artificial Intelligence/CourseProject/test_files/rodos_maris.mp4: 384x640 (no detections), 88.9ms
video 1/1 (3/2920) /Users/lefterisfthenos/Desktop/MSBA - AUEB/Semester 3/Artificial Intelligence/CourseProject/test_files/rodos_maris.mp4: 384x640 (no detections), 81.4ms
video 1/1 (4/2920) /Users/lefterisfthenos/Desktop/MSBA - AUEB/

KeyboardInterrupt: 

# Live Webcam Video Predicting and make sound if conf > 0.2

In [14]:
import cv2
import beepy as beep


def draw_boxes(annotator, results, model, color):
        for r in results:
            boxes = r.boxes
            for box in boxes:
                b = box.xyxy[0]  # get box coordinates in (top, left, bottom, right) format
                c = box.cls
                conf = box.conf.item()
                if conf > 0.2:
                    beep.beep(3)
                label = f"{model.names[int(c)]} {box.conf.item():.2f}"  # Add confidence to the label
                annotator.box_label(b, label, color=color)
                

def capture_webcam_video(model):
    color_model1 = (0, 255, 0)
    # Open the default camera (usually the built-in webcam)
    cap = cv2.VideoCapture(0)

    # Check if the camera opened successfully
    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()

        # If frame is read correctly, ret is True
        if not ret:
            print("Error: Can't receive frame. Exiting ...")
            break

        # Display the resulting frame
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Predict with both models
        results1 = model.predict(img, conf=0.2)
        annotator = Annotator(frame)

        draw_boxes(annotator, results1, model, color_model1)

        frame = annotator.result()

        # write the flipped frame
        cv2.imshow('Webcam Video', frame)

        # Press 'q' to exit the video window
        if cv2.waitKey(1) == ord('q'):
            break

    # Release the capture when everything is done
    cap.release()
    cv2.destroyAllWindows()


In [15]:
capture_webcam_video(model=YOLO(model_path2))


0: 384x640 (no detections), 176.6ms
Speed: 1.7ms preprocess, 176.6ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 171.4ms
Speed: 1.5ms preprocess, 171.4ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 166.3ms
Speed: 1.4ms preprocess, 166.3ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 158.0ms
Speed: 1.3ms preprocess, 158.0ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 156.4ms
Speed: 1.4ms preprocess, 156.4ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 154.5ms
Speed: 1.4ms preprocess, 154.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 158.5ms
Speed: 1.5ms preprocess, 158.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 smoke, 156.3ms
Speed: 1.5ms preprocess, 1

KeyboardInterrupt: 

: 