In [24]:
import numpy as np
import torch
from torchvision import transforms
import cv2
from ultralytics import YOLO
from torchvision.models.detection import ssd300_vgg16, SSD300_VGG16_Weights

import time
import json
from collections import defaultdict

from tqdm import tqdm
import os

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [51]:
model = YOLO("yolov8s.pt") 
model.to(device)

path_in = 'src/orig/'
video_name = "video_1.mp4"
cap = cv2.VideoCapture(path_in + video_name)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

output_path = 'src/processed/yolo/detected_' + video_name

fourcc = cv2.VideoWriter_fourcc(*'mp4v')

out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

all_frames_data = []
frame_number = 0
class_names = model.names

total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
start_time = time.time()
frame_processing_times = []

for _ in tqdm(range(total_frames), desc="Обработка видео"):
    frame_start_time = time.time()

    ret, frame = cap.read()

    if not ret:
        break
    
    frame_number += 1

    results = model.track(frame, persist=False, verbose=False) 

    frame_data = {
        'frame_id': frame_number,
        'objects': [],
        'class_counts': {}
    }

    frame_class_counts = {}

    if results[0].boxes is not None:
        class_ids = results[0].boxes.cls.cpu().numpy()
        confidences = results[0].boxes.conf.cpu().numpy()
        
        for i in range(len(class_ids)):
            class_id = int(class_ids[i])
            confidence = float(confidences[i])
            class_name = class_names[class_id]

            object_data = {
                'class_name': class_name,
                'confidence': confidence,
            }
            frame_data['objects'].append(object_data)

            frame_class_counts[class_name] = frame_class_counts.get(class_name, 0)

    frame_data['class_counts'] = frame_class_counts

    all_frames_data.append(frame_data)

    annotated_frame = results[0].plot()

    if len(confidences) > 0:
        avg_confidence = confidences.mean()

        conf_text = f"Avg Conf: {avg_confidence:.2f}"
        cv2.putText(
            annotated_frame, 
            conf_text, 
            (10, 60), 
            cv2.FONT_HERSHEY_SIMPLEX, 
            1, 
            (255, 255, 0), # Цвет (желтый)
            2
        )

    out.write(annotated_frame)

    frame_end_time = time.time()

    time_taken = frame_end_time - frame_start_time
    frame_processing_times.append(time_taken)

cap.release()
out.release()
cv2.destroyAllWindows()

end_time = time.time()
total_time = end_time - start_time

avg_frame_time = sum(frame_processing_times) / total_frames
processing_fps = 1.0 / avg_frame_time

print(f"Общее количество обработанных кадров: {total_frames}")
print(f"Общее время обработки: {total_time:.2f} сек.")
print(f"Среднее время на кадр: {avg_frame_time * 1000:.2f} мс")
print(f"Фактический FPS обработки: {processing_fps:.2f} FPS")

output_json_path = f'src/processed/yolo/detection_data{1}.json'
with open(output_json_path, 'w', encoding='utf-8') as f:
    json.dump(all_frames_data, f, indent=2, ensure_ascii=False)

Обработка видео: 100%|██████████| 1486/1486 [01:23<00:00, 17.90it/s]

Общее количество обработанных кадров: 1486
Общее время обработки: 83.03 сек.
Среднее время на кадр: 55.69 мс
Фактический FPS обработки: 17.96 FPS





In [42]:
weights = SSD300_VGG16_Weights.DEFAULT
model_ssd = ssd300_vgg16(weights=weights).to(device)
model_ssd.eval()

# список классов
COCO_CLASSES = weights.meta["categories"]

transform = transforms.Compose([
    transforms.ToTensor(),
])

path_in = 'src/orig/'
video_name = "video_1.mp4"
cap = cv2.VideoCapture(path_in + video_name)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

output_dir = 'src/processed/ssd/'
output_path = os.path.join(output_dir, 'detected_' + video_name)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

all_frames_data = []
frame_number = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
start_time = time.time()
frame_processing_times = []

for _ in tqdm(range(total_frames), desc="Обработка SSD300", leave=True):
    frame_start_time = time.time()

    ret, frame = cap.read()
    if not ret:
        break
    
    frame_number += 1

    # подготовка кадра
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_tensor = transform(rgb).to(device)

    # детекция
    with torch.no_grad():
        output = model_ssd([img_tensor])[0]

    # результаты
    boxes = output["boxes"].cpu().numpy()
    scores = output["scores"].cpu().numpy()
    labels = output["labels"].cpu().numpy()

    # data для json
    frame_data = {
        'frame_id': frame_number,
        'objects': [],
        'class_counts': {}
    }
    frame_class_counts = {}
    
    # уверенность для объектов, что проошли порог
    scores = []

    for box, score, label in zip(boxes, scores, labels):
        
        scores.append(score)

        # имена классов
        try:
            class_name = COCO_CLASSES[label - 1]
        except IndexError:
            class_name = f"Unknown_ID_{label}"

        # data для json
        object_data = {
            'class_name': class_name,
            'confidence': float(score),
        }
        frame_data['objects'].append(object_data)
        frame_class_counts[class_name] = frame_class_counts.get(class_name, 0) + 1

        # отрисовка
        x1, y1, x2, y2 = box.astype(int)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        text = f"{class_name}: {score:.2f}"
        cv2.putText(frame, text, (x1, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    frame_data['class_counts'] = frame_class_counts
    all_frames_data.append(frame_data)

    if len(scores) > 0:
        avg_confidence = sum(scores) / len(scores)
        conf_text = f"Avg Conf: {avg_confidence:.2f}"
        cv2.putText(
            frame, 
            conf_text, 
            (10, 60), 
            cv2.FONT_HERSHEY_SIMPLEX, 
            1, 
            (255, 255, 0), # Желтый
            2
        )
    
    out.write(frame)

    frame_end_time = time.time()
    time_taken = frame_end_time - frame_start_time
    frame_processing_times.append(time_taken)

cap.release()
out.release()
cv2.destroyAllWindows()

end_time = time.time()
total_time = end_time - start_time
total_processed_frames = len(frame_processing_times)

if total_processed_frames > 0:
    avg_frame_time = sum(frame_processing_times) / total_processed_frames
    processing_fps = 1.0 / avg_frame_time
else:
    avg_frame_time = 0
    processing_fps = 0

print(f"Общее количество обработанных кадров: {total_processed_frames}")
print(f"Общее время обработки: **{total_time:.2f} секунд**")
print(f"Среднее время на кадр: {avg_frame_time * 1000:.2f} мс")
print(f"Фактический FPS обработки: **{processing_fps:.2f} FPS**")

output_json_path = os.path.join(output_dir, 'detection_data_ssd.json')
with open(output_json_path, 'w', encoding='utf-8') as f:
    json.dump(all_frames_data, f, indent=2, ensure_ascii=False)

Обработка SSD300: 100%|██████████| 1486/1486 [02:37<00:00,  9.45it/s]

Общее количество обработанных кадров: 1486
Общее время обработки: **157.34 секунд**
Среднее время на кадр: 105.58 мс
Фактический FPS обработки: **9.47 FPS**





In [47]:
def analyze_detection_data(file_path):
    """
    Вспомогательная функция для анализа одного JSON-файла.
    Собирает агрегированную статистику.
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    if not data:
        return {
            'total_frames': 0,
            'total_detections': 0,
            'avg_objects_per_frame': 0,
            'avg_confidence_total': 0,
            'total_detections_by_class': defaultdict(int),
            'avg_confidence_by_class': defaultdict(float)
        }

    total_frames = len(data)
    total_detections = 0
    total_confidence_sum = 0
    
    total_detections_by_class = defaultdict(int)
    total_confidence_by_class = defaultdict(float)

    for frame in data:
        num_objects_in_frame = len(frame['objects'])
        total_detections += num_objects_in_frame
        
        for obj in frame['objects']:
            class_name = obj['class_name']
            confidence = obj['confidence']
            
            total_confidence_sum += confidence
            total_detections_by_class[class_name] += 1
            total_confidence_by_class[class_name] += confidence

    avg_confidence_total = (total_confidence_sum / total_detections) if total_detections > 0 else 0
    avg_objects_per_frame = (total_detections / total_frames) if total_frames > 0 else 0
    
    avg_confidence_by_class = defaultdict(float)
    for class_name, count in total_detections_by_class.items():
        if count > 0:
            avg_confidence_by_class[class_name] = total_confidence_by_class[class_name] / count

    return {
        'total_frames': total_frames,
        'total_detections': total_detections,
        'avg_objects_per_frame': avg_objects_per_frame,
        'avg_confidence_total': avg_confidence_total,
        'total_detections_by_class': total_detections_by_class,
        'avg_confidence_by_class': avg_confidence_by_class
    }

def compare_detection_jsons(file1_path, file2_path, model1_name="Модель 1", model2_name="Модель 2"):
    """
    Главная функция.
    Анализирует два JSON-файла и выводит сводный отчет о разнице.
    """
    
    print(f"Сравнение моделей: [{model1_name}] & [{model2_name}]")
    print(f"  {model1_name}: {file1_path}")
    print(f"  {model2_name}: {file2_path}\n")

    stats1 = analyze_detection_data(file1_path)
    stats2 = analyze_detection_data(file2_path)

    print("Общая сводка")
    
    print("\n" + "-"*40)
    print(f"| {'Метрика':<25} | {model1_name:^15} | {model2_name:^15} |")
    print(f"|{'-'*27}|{'-'*17}|{'-'*17}|")
    print(f"| {'Всего детекций':<25} | {stats1['total_detections']:^15} | {stats2['total_detections']:^15} |")
    print(f"| {'Сред. объектов на кадр':<25} | {stats1['avg_objects_per_frame']:^15.2f} | {stats2['avg_objects_per_frame']:^15.2f} |")
    print(f"| {'Сред. уверенность (общая)':<25} | {stats1['avg_confidence_total']:^15.2%} | {stats2['avg_confidence_total']:^15.2%} |")
    print(f"{'-'*61}\n")


    print("Сводка по классам")
    
    # уникальные классы 
    all_classes = set(stats1['total_detections_by_class'].keys()) | set(stats2['total_detections_by_class'].keys())
    sorted_classes = sorted(list(all_classes))

    if not sorted_classes:
        print("Ни в одном файле не найдено детекций.")
        return

    m1_col_count = f"[{model1_name}] Кол-во"
    m2_col_count = f"[{model2_name}] Кол-во"
    m1_col_conf = f"[{model1_name}] Ср.Увер."
    m2_col_conf = f"[{model2_name}] Ср.Увер."

    print(f"| {'Класс':<20} | {m1_col_count:^15} | {m2_col_count:^15} | {m1_col_conf:^15} | {m2_col_conf:^15} |")
    print(f"|{'-'*22}|{'-'*17}|{'-'*17}|{'-'*17}|{'-'*17}|")

    for class_name in sorted_classes:
        # модель 1
        count1 = stats1['total_detections_by_class'].get(class_name, 0)
        conf1 = stats1['avg_confidence_by_class'].get(class_name, 0)
        
        # модель 2
        count2 = stats2['total_detections_by_class'].get(class_name, 0)
        conf2 = stats2['avg_confidence_by_class'].get(class_name, 0)

        print(f"| {class_name:<20} | {count1:^15} | {count2:^15} | {conf1:^15.2%} | {conf2:^15.2%} |")
    
    print(f"|{'-'*22}|{'-'*17}|{'-'*17}|{'-'*17}|{'-'*17}|")

In [49]:
path = 'src/processed/'
path_model1 = os.path.join(path , 'ssd/detection_data_ssd.json')
path_model2 = os.path.join(path, 'yolo/detection_data1.json')
compare_detection_jsons(path_model1, path_model2, 'yolov8s', 'SSD300')

Сравнение моделей: [yolov8s] & [SSD300]
  yolov8s: src/processed/ssd/detection_data_ssd.json
  SSD300: src/processed/yolo/detection_data1.json

Общая сводка

----------------------------------------
| Метрика                   |     yolov8s     |     SSD300      |
|---------------------------|-----------------|-----------------|
| Всего детекций            |        0        |      6486       |
| Сред. объектов на кадр    |      0.00       |      4.36       |
| Сред. уверенность (общая) |      0.00%      |     73.44%      |
-------------------------------------------------------------

Сводка по классам
| Класс                | [yolov8s] Кол-во | [SSD300] Кол-во | [yolov8s] Ср.Увер. | [SSD300] Ср.Увер. |
|----------------------|-----------------|-----------------|-----------------|-----------------|
| backpack             |        0        |       41        |      0.00%      |     32.29%      |
| bicycle              |        0        |       282       |      0.00%      |     54.50%    