In [6]:
import cv2
from ultralytics import YOLO
import pickle 
import numpy as np
import os 
import time 
from tensorflow import keras

In [7]:
file_path = "/Users/maxkucher/pytorch/howitzer_detector/analyzer_nn.pickle"

with open(file_path, "rb") as file:
    analyzer_model = pickle.load(file)

In [8]:
analyzer_model.summary()

In [9]:
model = YOLO("/Users/maxkucher/pytorch/howitzer_detector/best.pt")
pre_cap = cv2.VideoCapture("/Users/maxkucher/pytorch/howitzer-detector/howitzer_detector/train_video.mp4")

targer_width = 570
known_distance = 400

focal_length = None

threshold = 0.5

pre_ret, pre_frame = pre_cap.read()

while focal_length is None:
    
    pre_results = model(pre_frame)[0]
    for pre_result in pre_results.boxes.data.tolist():
        x1, _, x2, _, score, class_id = pre_result
        if score > class_id:
            pixel_width = x2 - x1

            focal_length = (pixel_width * known_distance) / targer_width
            break
        
    if focal_length is None:
        print("Focus destination is not detected.")
        exit()




0: 384x640 1 target, 702.5ms
Speed: 2.8ms preprocess, 702.5ms inference, 564.4ms postprocess per image at shape (1, 3, 384, 640)


In [10]:
cap = cv2.VideoCapture("/Users/maxkucher/pytorch/howitzer-detector/video_1.mp4")
yolo_model = YOLO("yolov8l")

# frame counter
frame_counter = 0  
last_saved_time = 0  
save_interval = 10 

# folder where we will save screenshots
output_folder = "/Users/maxkucher/pytorch/outputs"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

fps = cap.get(cv2.CAP_PROP_FPS) 
frame_interval = fps * save_interval 



names = model.names
yolo_names = yolo_model.names
threshold = 0.5

while True:

    total_objects = 0
    howitzers = 0
    decoys = 0
    unkowns = 0

    person_value = 0
    car_value = 0
    bus_value = 0
    truck_value = 0


    ret, frame = cap.read()
    results = model(frame)[0]
    current_time = time.time()

    for result in results.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = result

        new_real_width = x2 - x1

        x_center = int((x1 + x2) / 2)
        y_center = int((y1 + y2) / 2)

        

        if score > threshold:
            total_objects += 1

            new_x1 = max(0, x1 - (x2 - x1) // 0.5)
            new_y1 = max(0, y1 - (y2 - y1) // 0.5)
            new_x2 = min(frame.shape[1], x2 + (x2 - x1) // 0.5)
            new_y2 = min(frame.shape[0], y2 + (y2 - y1) // 0.5)
            cropped_frames = frame[int(new_y1):int(new_y2), int(new_x1):int(new_x2)]


            new_results = yolo_model(cropped_frames)[0]

            for new_result in new_results.boxes.data.tolist():
                x_11, y_11, x_22, y_22, new_score, new_class_id = new_result
                class_name = yolo_names[int(new_class_id)]

                new_x_center = ((x_11 + x_22) / 2)
                new_y_center = ((y_11 + y_22) / 2)

                obj_color = (255, 0, 0)

                if new_score > threshold and class_name in ["person", "car", "bus", "truck"]:
                    cv2.rectangle(frame, (int(x_11), int(y_11)), (int(x_22), int(y_22)), obj_color, 2)
                    cv2.putText(frame, class_name.upper(), (int(x_11), int(y_11 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.3, obj_color, 3, cv2.LINE_AA)


                    if class_name == "person":
                        person_value = 1
                    else:
                        person_value = 0

                    
                    if class_name == "car":
                        car_value = 1
                    else:
                        car_value = 0

                    
                    if class_name == "bus":
                        bus_value = 1
                    else:
                        bus_value = 0

                    
                    if class_name == "truck":
                        truck_value = 1
                    else:
                        truck_value = 0


            pred = np.argmax(analyzer_model.predict(np.array([[person_value, car_value, bus_value, truck_value]])))
            pred_list = ["Decoy", "Howitzer", "Undecided"]
            text = pred_list[int(pred)]
                
            if text == "Decoy":
                color = (0, 255, 0)
                decoys += 1
            elif text == "Howitzer":
                color =  (0, 0, 255)
                howitzers += 1
            elif text == "Undecided":
                color =  (0, 255, 255)
                unkowns += 1

            dist_to_target = ((targer_width * focal_length) / new_real_width) // 100
                        
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            cv2.circle(frame, (x_center, y_center), 5, color, thickness=cv2.FILLED)
            cv2.putText(frame, f"Target status: {text.upper()} / D: {dist_to_target} m", (int(x1), int(y1 - 30)), cv2.FONT_HERSHEY_SIMPLEX, 1.3, color, 3, cv2.LINE_AA)

            if current_time - last_saved_time >= save_interval:
                    frame_filename = os.path.join(output_folder, f"frame_{frame_counter}_{text}.jpg")
                    cv2.imwrite(frame_filename, frame)
                    print(f"Saved frame {frame_counter} with detected object {text} as {frame_filename}")
                    ast_saved_time = current_time

            
            
            frame_counter += 1
        
    cv2.rectangle(frame, (8, 15), (350, 25),  (255, 255, 255), 20)
    cv2.putText(frame, f"Total object count: {total_objects}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

    cv2.rectangle(frame, (8, 55), (350, 65),  (255, 255, 255), 20)
    cv2.putText(frame, f"Real howitzers: {howitzers}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

    cv2.rectangle(frame, (8, 95), (350, 105),  (255, 255, 255), 20)
    cv2.putText(frame, f"Undecided targets: {unkowns}", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
        
    cv2.rectangle(frame, (8, 135), (350, 145),  (255, 255, 255), 20)
    cv2.putText(frame, f"Decoys: {decoys}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)


        
    


                    
    if not ret:
        break

    cv2.imshow("Object Detection", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 353.7ms
Speed: 11.6ms preprocess, 353.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 303.3ms
Speed: 32.4ms preprocess, 303.3ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 506.6ms
Speed: 1.2ms preprocess, 506.6ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 351.9ms
Speed: 1.5ms preprocess, 351.9ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 363.4ms
Speed: 19.9ms preprocess, 363.4ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 400.1ms
Speed: 17.8ms preprocess, 400.1ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 306.2ms
Speed: 1.7ms preprocess, 306.2ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 310.8ms
Speed: 2.4ms p