**Research notebook**

In [1]:
import cv2
import torch
import torchvision
from torchvision import transforms
import numpy as np
from tqdm import tqdm
from PIL import Image

In [2]:
input_video_path = 'data/crowd.mp4'

cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
    print(f"Ошибка при открытии видео: {input_video_path}")
    raise SystemExit

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Используемое устройство: {device}')

Используемое устройство: cuda


Detection with YOLOv5s

In [4]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
model.eval()
model.to(device)

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
output_video_path = 'data/crowd_yolov5.mp4'
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

print("Детекция с использованием YOLOv5")
for _ in tqdm(range(total_frames), desc="Обработка видео"):
    ret, frame = cap.read()
    if not ret:
        break
    
    results = model(frame)
    detections = results.xyxy[0]  # (x1, y1, x2, y2, confidence, class)
    
    for *box, conf, cls in detections:
        if int(cls) == 0:  # метка 'person'
            x1, y1, x2, y2 = map(int, box)
            confidence = conf.item()

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'{confidence:.2f}', (x1, y1 - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    out.write(frame)

out.release()
print(f"Результат сохранен в {output_video_path}")

Using cache found in C:\Users\MSI/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-14 Python-3.11.1 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 3070, 8191MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:02<00:00, 7.28MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Детекция с использованием YOLOv5


Обработка видео: 100%|██████████| 705/705 [00:29<00:00, 23.63it/s]

Результат сохранен в data/crowd_yolov5.mp4





Detection with SSD300_vgg16

In [5]:
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

True

In [6]:
model = torchvision.models.detection.ssd300_vgg16(weights=torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT)
model.eval()
model.to(device)

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # нормализация в соответствии с ImageNet
])

output_video_path = 'data/crowd_ssd.mp4'

out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

print("Детекция с использованием SSD")
for _ in tqdm(range(total_frames), desc="Обработка видео"):
    ret, frame = cap.read()
    if not ret:
        break

    frame_transformed = transform(frame).unsqueeze(0).to(device)

    with torch.no_grad():
        detections = model(frame_transformed)

    for i in range(detections[0]['boxes'].size(0)):
        score = detections[0]['scores'][i].item()
        label = int(detections[0]['labels'][i].item())

        if score > 0.5 and label == 1: # метка 'person'
            bbox = detections[0]['boxes'][i].cpu().numpy().astype(int)
            x1, y1, x2, y2 = bbox
            x1 = int(x1 / 300 * width)
            y1 = int(y1 / 300 * height)
            x2 = int(x2 / 300 * width)
            y2 = int(y2 / 300 * height)

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
            

    out.write(frame)

out.release()
print(f"Результат сохранен в {output_video_path}")


Детекция с использованием SSD


Обработка видео: 100%|██████████| 705/705 [01:23<00:00,  8.44it/s]

Результат сохранен в data/crowd_ssd.mp4





In [7]:
cap.release()

**Preparing to ONNX-runtime**

bash:

git clone https://github.com/ultralytics/yolov5

python yolov5/export.py --include onnx --weights weights/yolov5s.pt --img 640 --batch 1

In [8]:
model = torchvision.models.detection.ssd300_vgg16(weights=torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT)
model.eval()

torch.onnx.export(model, torch.randn(1, 3, 300, 300), "ssd300_vgg16.onnx", 
                  export_params=True, 
                  opset_version=11,
                  do_constant_folding=True,
                  input_names=['images'], 
                  output_names=['boxes', 'scores', 'labels'])

verbose: False, log level: Level.ERROR

