# Проектная работа: анализ посещаемости магазинов с помощью нейронных сетей в компьютерном зрении

## 1. Импорт зависимостей

In [1]:
import json
import random
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np

import cv2 as cv
import torch
from ultralytics import YOLO

## 2. Общие конфигурации и настройки

In [2]:
warnings.filterwarnings("ignore")

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.random.manual_seed(SEED)

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

In [3]:
FORCE_CPU = torch.device("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Используемое устройство: {device}")

Используемое устройство: cpu


## 3. Подготовка данных


### 3.1. Конфигурация и схема данных

In [8]:
CUR_DIR = Path.cwd()
PROJECT_DIR = CUR_DIR.parent

DATA_DIR = PROJECT_DIR / "data"
INPUT_DATA_DIR = DATA_DIR / "input"
OUTPUT_DATA_DIR = DATA_DIR / "output"
MODEL_DIR = PROJECT_DIR / "models"

for dir in [DATA_DIR, INPUT_DATA_DIR, OUTPUT_DATA_DIR, MODEL_DIR]:
    dir.mkdir(parents=True, exist_ok=True)

In [9]:
VIDEOS_SHAPE = (720, 720)
VIDEOS_FPS = 29.0

MODEL_INPUT_SHAPE = (640, 640)

In [10]:
class Shop:
    def __init__(self, name, pts):
        self.name = name
        self.pts = np.array(pts)
        self.visitors = []

    def __repr__(self):
        return f"Shop {self.name} (pts={self.pts})"
    
    def clear_visitors(self):
        self.visitors.clear()


def draw_shop(img, shop):
    cv.polylines(img, [shop.pts], isClosed=True, color=(0, 255, 0), thickness=3)

In [11]:
input_videos_filenames = [
    INPUT_DATA_DIR / "input_1.mp4",
    INPUT_DATA_DIR / "input_2.mp4",
    INPUT_DATA_DIR / "input_3.mp4",
    INPUT_DATA_DIR / "input_4.mp4",
]

SHOPS_CONFIG_FILE = INPUT_DATA_DIR / "shops_config.json"

output_videos_filenames = [
    OUTPUT_DATA_DIR / "output_1.mp4",
    OUTPUT_DATA_DIR / "output_2.mp4",
    OUTPUT_DATA_DIR / "output_3.mp4",
    OUTPUT_DATA_DIR / "output_4.mp4",
]

output_statistics_filenames = [
    OUTPUT_DATA_DIR / "output_1.json",
    OUTPUT_DATA_DIR / "output_2.json",
    OUTPUT_DATA_DIR / "output_3.json",
    OUTPUT_DATA_DIR / "output_4.json",
]

### 3.2. Экземпляры данных

In [12]:
with open(SHOPS_CONFIG_FILE, "r") as file:
    shops_configs = json.load(file)

shops = [Shop(**shop_cfg) for shop_cfg in shops_configs]

In [None]:
# Проверка работоспособности и корректности конфигурации

ex_idx = 0

input_video_filename = input_videos_filenames[ex_idx]

cap = cv.VideoCapture(str(input_video_filename))

if not cap.isOpened():
    print("Error opening video file")
else:
    ret, frame = cap.read()

    for shop in shops:
        draw_shop(frame, shop)
    cv.imshow("Video Frame", frame)

    cv.waitKey(0)
    cap.release()
    cv.destroyAllWindows()

## 4. Нейросетевая модель

In [13]:
MODEL_FILE = MODEL_DIR / "yolo11s.pt"

In [14]:
class PeopleDetectionYOLO:
    def __init__(self, model_file=MODEL_FILE, device=FORCE_CPU):
        self.model = YOLO(model_file).to(device).eval()

    def predict(self, img_tensor):
        with torch.no_grad():
            return self.model(img_tensor, verbose=False)
    
    def track(self, frame):
        with torch.no_grad():
            return self.model.track(frame, persist=True, verbose=False)

In [15]:
model = PeopleDetectionYOLO(device=device)

## 5. Подсчёт посетителей на видео

### 5.1. Методы и структуры данных обработки видео и обнаружения объектов

In [16]:
def preprocess_frame(frame, new_size=MODEL_INPUT_SHAPE):
    frame_resized = cv.resize(frame, new_size)
    frame_rgb = cv.cvtColor(frame_resized, cv.COLOR_BGR2RGB)
    frame_tensor = torch.tensor(frame_rgb, dtype=torch.float32) / 255.0
    preprocessed_frame = frame_tensor.permute(2, 0, 1).unsqueeze(0)
    return preprocessed_frame

In [17]:
class Boxes:
    def __init__(self, conf, id, xyxy, xyxyn):
        self.conf = conf
        self.id = id
        self.xyxy = xyxy
        self.xyxyn = xyxyn

    def __repr__(self):
        return f"Boxes (conf={self.conf}, id={id} xyxyn={self.xyxyn})"

In [18]:
def filter_boxes(boxes, thresh=0.5):
    mask = (boxes.cls == 0) & (boxes.conf > thresh)

    filtered_boxes = Boxes(
        conf=boxes.conf[mask],
        id=boxes.id[mask],
        xyxy=boxes.xyxy[mask],
        xyxyn=boxes.xyxyn[mask],
    )

    return filtered_boxes


def is_box_inside_polygon(box_xyxy, polygon_points):
    x1, y1, x2, y2 = box_xyxy
    box = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])

    for point in box:
        if not cv.pointPolygonTest(polygon_points, tuple(point), False) >= 0:
            return False
    return True

In [19]:
def draw_text(img, x, y, text, color=(0, 0, 255)):
    text_size = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
    cv.rectangle(
        img,
        (x - 1, y - text_size[1] - 2),
        (x + text_size[0] + 1, y + 2),
        color,
        cv.FILLED,
    )
    text_color = (255, 255, 255) if color == (0, 0, 255) else (0, 0, 0)
    cv.putText(img, text, (x, y), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1)


def draw_box(img, box_xyxyn, id, conf, color=(0, 0, 255), thickness=1):
    h, w, _ = img.shape

    x1, y1, x2, y2 = box_xyxyn
    x1_abs, y1_abs = int(x1 * w), int(y1 * h)
    x2_abs, y2_abs = int(x2 * w), int(y2 * h)

    cv.rectangle(
        img,
        (x1_abs, y1_abs),
        (x2_abs, y2_abs),
        color,
        thickness,
    )

    draw_text(img, x1_abs + 3, y1_abs + 14, f"{int(id)}", color)
    draw_text(img, x1_abs + 3, y1_abs + 34, f"{conf:.2f}", color)

### 5.2. Запуск процесса

In [None]:
# На случай, если нужно будет обновить значения id у boxes
# model = PeopleDetectionYOLO(device=device)

In [None]:
ex_idx = 3
thresh = 0.2

input_video_filename = input_videos_filenames[ex_idx]
output_video_filename = output_videos_filenames[ex_idx]
output_statistics_filename = output_statistics_filenames[ex_idx]

cap = cv.VideoCapture(str(input_video_filename))
fourcc = cv.VideoWriter_fourcc(*"mp4v")
out = cv.VideoWriter(str(output_video_filename), fourcc, VIDEOS_FPS, VIDEOS_SHAPE)

for shop in shops:
    shop.clear_visitors()
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    if frame_count % (VIDEOS_FPS // 4) == 0:
        frame_tensor = preprocess_frame(frame).to(device)
        boxes = model.track(frame)[0].boxes

        for shop in shops:
            draw_shop(frame, shop)

        if boxes and boxes.is_track:
            filtered_boxes = filter_boxes(boxes, thresh=thresh)

            ids = filtered_boxes.id.numpy()
            confs = filtered_boxes.conf.numpy()
            xyxys = filtered_boxes.xyxy.numpy()
            xyxyns = filtered_boxes.xyxyn.numpy()

            for id, conf, xyxy, xyxyn in zip(ids, confs, xyxys, xyxyns):
                draw_box(frame, xyxyn, id, conf)

                for shop in shops:
                    if is_box_inside_polygon(xyxy, shop.pts):
                        shop.visitors.append(id)
                        draw_box(frame, xyxyn, id, conf, (0, 255, 255))

            for shop in shops:
                visitor_count = len(set(shop.visitors))
                text_position = (
                    shop.pts[0][0],
                    shop.pts[0][1] - 10,
                )

                draw_text(
                    frame,
                    text_position[0],
                    text_position[1],
                    f"{shop.name}: {visitor_count}",
                    (0, 255, 0),
                )

        out.write(frame)
        cv.imshow("YOLO", frame)

        if cv.waitKey(1) & 0xFF == ord("q"):
            break

cap.release()
out.release()
cv.destroyAllWindows()

output_statistics = []
for shop in shops:
    visitor_count = len(set(shop.visitors))

    print(f"Уникальных посетителей в {shop.name}: {visitor_count}")
    
    output_statistics.append(dict(name=shop.name, visitor_count=visitor_count))
    
with open(output_statistics_filename, "w", encoding="utf-8") as json_file:
    json.dump(
        output_statistics,
        json_file,
        ensure_ascii=False,
        indent=4,
    )



Уникальных посетителей в Shop 1: 1
Уникальных посетителей в Shop 2: 2
