In [2]:
import cv2
from ultralytics import YOLO
import pickle 
import numpy as np



In [3]:
file_path = "/Users/maxkucher/pytorch/howitzer-detector/analyzer_nn.pickle"

with open(file_path, "rb") as file:
    analyzer_model = pickle.load(file)

In [4]:
analyzer_model.summary()

Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_169 (Dense)           (None, 6)                 30        
                                                                 
 dense_170 (Dense)           (None, 8)                 56        
                                                                 
 dense_171 (Dense)           (None, 5)                 45        
                                                                 
 dense_172 (Dense)           (None, 7)                 42        
                                                                 
 dense_173 (Dense)           (None, 4)                 32        
                                                                 
 dense_174 (Dense)           (None, 3)                 15        
                                                                 
Total params: 220 (880.00 Byte)
Trainable params: 220

In [5]:
cap = cv2.VideoCapture("/Users/maxkucher/pytorch/howitzer-detector/video_1.mp4")
model = YOLO("/Users/maxkucher/pytorch/howitzer-detector/best.pt")
yolo_model = YOLO("yolov8l")


names = model.names
yolo_names = yolo_model.names
threshold = 0.5

while True:

    total_objects = 0
    howitzers = 0
    decoys = 0
    unkowns = 0

    person_value = 0
    car_value = 0
    bus_value = 0
    truck_value = 0


    ret, frame = cap.read()
    results = model(frame)[0]

    for result in results.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = result

        x_center = int((x1 + x2) / 2)
        y_center = int((y1 + y2) / 2)

        

        if score > threshold:
            total_objects += 1

            new_x1 = max(0, x1 - (x2 - x1) // 0.5)
            new_y1 = max(0, y1 - (y2 - y1) // 0.5)
            new_x2 = min(frame.shape[1], x2 + (x2 - x1) // 0.5)
            new_y2 = min(frame.shape[0], y2 + (y2 - y1) // 0.5)
            cropped_frames = frame[int(new_y1):int(new_y2), int(new_x1):int(new_x2)]


            new_results = yolo_model(cropped_frames)[0]

            for new_result in new_results.boxes.data.tolist():
                x_11, y_11, x_22, y_22, new_score, new_class_id = new_result
                class_name = yolo_names[int(new_class_id)]

                new_x_center = ((x_11 + x_22) / 2)
                new_y_center = ((y_11 + y_22) / 2)

                obj_color = (255, 0, 0)

                if new_score > threshold and class_name in ["person", "car", "bus", "truck"]:
                    cv2.rectangle(frame, (int(x_11), int(y_11)), (int(x_22), int(y_22)), obj_color, 2)
                    cv2.putText(frame, class_name.upper(), (int(x_11), int(y_11 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.3, obj_color, 3, cv2.LINE_AA)


                    if class_name == "person":
                        person_value = 1
                    else:
                        person_value = 0

                    
                    if class_name == "car":
                        car_value = 1
                    else:
                        car_value = 0

                    
                    if class_name == "bus":
                        bus_value = 1
                    else:
                        bus_value = 0

                    
                    if class_name == "truck":
                        truck_value = 1
                    else:
                        truck_value = 0


                

            pred = np.argmax(analyzer_model.predict(np.array([[person_value, car_value, bus_value, truck_value]])))
            pred_list = ["Decoy", "Howitzer", "Undecided"]
            text = pred_list[int(pred)]
                
            if text == "Decoy":
                color = (0, 255, 0)
                decoys += 1
            elif text == "Howitzer":
                color =  (0, 0, 255)
                howitzers += 1
            elif text == "Undecided":
                color =  (0, 255, 255)
                unkowns += 1
                        
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            cv2.circle(frame, (x_center, y_center), 5, color, thickness=cv2.FILLED)
            cv2.putText(frame, f"Target status: {text.upper()}", (int(x1), int(y1 - 30)), cv2.FONT_HERSHEY_SIMPLEX, 1.3, color, 3, cv2.LINE_AA)

        
    cv2.rectangle(frame, (8, 15), (350, 25),  (255, 255, 255), 20)
    cv2.putText(frame, f"Total object count: {total_objects}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

    cv2.rectangle(frame, (8, 55), (350, 65),  (255, 255, 255), 20)
    cv2.putText(frame, f"Real howitzers: {howitzers}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

    cv2.rectangle(frame, (8, 95), (350, 105),  (255, 255, 255), 20)
    cv2.putText(frame, f"Undecided targets: {unkowns}", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
        
    cv2.rectangle(frame, (8, 135), (350, 145),  (255, 255, 255), 20)
    cv2.putText(frame, f"Decoys: {decoys}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)


                    
    if not ret:
        break
    
    # даем имя рамке и выводим ее 
    cv2.imshow("Object Detection", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 429.6ms
Speed: 7.1ms preprocess, 429.6ms inference, 3.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 379.3ms
Speed: 1.7ms preprocess, 379.3ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 526.9ms
Speed: 1.4ms preprocess, 526.9ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 320.9ms
Speed: 2.1ms preprocess, 320.9ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 392.5ms
Speed: 1.5ms preprocess, 392.5ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 342.7ms
Speed: 8.3ms preprocess, 342.7ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 380.2ms
Speed: 1.5ms preprocess, 380.2ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 401.5ms
Speed: 1.7ms prepr

KeyboardInterrupt: 