# YOLO 

In [4]:
from io import BytesIO

import cv2
import numpy as np
import requests
import torch
from PIL import Image
from PIL import ImageDraw, ImageFont
from matplotlib import pyplot as plt
from torchvision.transforms import transforms
from tqdm import tqdm

In [5]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# device = 'cpu'
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

Using cache found in C:\Users\Lenovo/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-5-30 Python-3.9.19 torch-2.2.2 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
  

In [6]:
# Function to load an image from a URL
def load_image(url):
    response = requests.get(url)
    return Image.open(BytesIO(response.content)).convert('RGB')


# Function to perform detection
def detect_objects(image, model):
    # Directly pass the image to the model
    with torch.no_grad():
        results = model(image)

    # Extract the bounding boxes and data
    results = results.pandas().xyxy[0]
    return results

In [7]:
# Function to convert a frame to PIL Image
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Function to convert PIL Image back to frame
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

In [8]:
# url = 'https://english.news.cn/europe/20230716/adc36d89de9f4699bdb80290a80df2b8/20230716adc36d89de9f4699bdb80290a80df2b8_2023071658a775383fd84927a756ff4aa59911c5.jpg'
url = 'https://bearizona.com/wp-content/uploads/2022/10/GD3A0304-min.jpeg'

raw_image = load_image(url)
results = detect_objects(raw_image, model)
results

def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default(size=20)
    
    for index, row in results.iterrows():
        xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']

        # if xmin > xmax or ymin > ymax:
        #     continue
        
        confidence = row['confidence']
        label = row['name'] # bear 50%
        
        colors = {
            'person': 'red', 'car': 'blue'
        }
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline=colors.get(label, 'purple'), width=3)
        
        label_with_conf = f"{label} {confidence:.2f}"

        # Put text label on image
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Draw the bounding boxes on the image
image = raw_image.copy()
image_with_boxes = draw_boxes(image, results)

In [9]:
import cv2
from tqdm import tqdm

video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_yolo.mp4'

# Open the video file
cap = cv2.VideoCapture(video_path)

# Get the FPS (Frames Per Second) of the video
fps = int(cap.get(cv2.CAP_PROP_FPS))
print(f"The FPS of the video is: {fps}")

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
codec = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

with tqdm(total=total_frames, desc=f"Processing Video") as pbar:
    while True:
        ret, frame = cap.read()
        
        if not ret:
            break
        
        image = frame_to_image(frame)
        
        # Perform detection
        results = detect_objects(image, model)
        
        # Draw the bounding boxes
        image_with_boxes = draw_boxes(image, results)
        
        # Convert back to frame
        output_frame = image_to_frame(image_with_boxes)
        
        out.write(output_frame)
        pbar.update(1)

# Release resources
cap.release()
out.release()


The FPS of the video is: 60


Processing Video: 100%|██████████| 3766/3766 [10:10<00:00,  6.17it/s]


# только для людей

In [21]:
from io import BytesIO
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import time

# Загрузка модели YOLOv5
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Использование устройства MPS, если доступно, иначе CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, model):
    with torch.no_grad():
        results = model(image)
    # Фильтрация по метке 'person' (человек)
    results = results.pandas().xyxy[0]
    people = results[results['name'] == 'person']
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for _, row in results.iterrows():
        xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']
        confidence = row['confidence']
        label = row['name']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Обработка видео и сохранение результата
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_yolo.mp4'

cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
codec = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

start_time = time.time()

with tqdm(total=total_frames, desc="Обработка видео") as pbar:
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        image = frame_to_image(frame)
        results = detect_people(image, model)
        image_with_boxes = draw_boxes(image, results)
        output_frame = image_to_frame(image_with_boxes)
        out.write(output_frame)
        pbar.update(1)

end_time = time.time()
elapsed_time = end_time - start_time
frames_per_second_processed = total_frames / elapsed_time

cap.release()
out.release()

print(f"Обработка видео заняла: {elapsed_time:.2f} секунд")
print(f"Кадров обработано в секунду: {frames_per_second_processed:.2f}")


Using cache found in C:\Users\Lenovo/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-5-30 Python-3.9.19 torch-2.2.2 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
Обработка видео: 100%|██████████| 3766/3766 [09:17<00:00,  6.75it/s]

Обработка видео заняла: 557.95 секунд
Кадров обработано в секунду: 6.75





In [22]:
import torch
print("CUDA доступен:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Количество GPU:", torch.cuda.device_count())
    print("Имя устройства:", torch.cuda.get_device_name(0))
else:
    print("CUDA недоступен. Проверьте драйверы и установку CUDA.")


CUDA доступен: True
Количество GPU: 1
Имя устройства: NVIDIA GeForce GTX 1660 Ti


# Simple

In [4]:
from io import BytesIO
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import time
from transformers import AutoImageProcessor, AutoModelForObjectDetection

# Загрузка модели YOLOS-Tiny
processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")

# Использование устройства CUDA, если доступно, иначе CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, model):
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]]).to(device)
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    
    # Извлечение людей из результатов
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Обработка видео и сохранение результата
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_yolo.mp4'

cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
codec = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

start_time = time.time()

with tqdm(total=total_frames, desc="Обработка видео") as pbar:
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        image = frame_to_image(frame)
        results = detect_people(image, model)
        image_with_boxes = draw_boxes(image, results)
        output_frame = image_to_frame(image_with_boxes)
        out.write(output_frame)
        pbar.update(1)

end_time = time.time()
elapsed_time = end_time - start_time
frames_per_second_processed = total_frames / elapsed_time

cap.release()
out.release()

print(f"Обработка видео заняла: {elapsed_time:.2f} секунд")
print(f"Кадров обработано в секунду: {frames_per_second_processed:.2f}")



Обработка видео: 100%|██████████| 3766/3766 [07:52<00:00,  7.97it/s]

Обработка видео заняла: 472.68 секунд
Кадров обработано в секунду: 7.97





# cadr

In [24]:
from io import BytesIO
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import time
from transformers import AutoImageProcessor, AutoModelForObjectDetection

# Загрузка модели YOLOS-Tiny
processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")

# Использование устройства CUDA, если доступно, иначе CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, model):
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]]).to(device)
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    
    # Извлечение людей из результатов
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Обработка видео и сохранение результата
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_yolo.mp4'

cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
codec = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

start_time = time.time()

with tqdm(total=total_frames, desc="Обработка видео") as pbar:
    for i in range(total_frames):
        ret, frame = cap.read()
        if not ret:
            break

        # Пропуск некоторых кадров
        if i % 2 != 0:
            out.write(frame)
            pbar.update(1)
            continue
        
        image = frame_to_image(frame)
        results = detect_people(image, model)
        image_with_boxes = draw_boxes(image, results)
        output_frame = image_to_frame(image_with_boxes)
        out.write(output_frame)
        pbar.update(1)

end_time = time.time()
elapsed_time = end_time - start_time
frames_per_second_processed = total_frames / elapsed_time

cap.release()
out.release()

print(f"Обработка видео заняла: {elapsed_time:.2f} секунд")
print(f"Кадров обработано в секунду: {frames_per_second_processed:.2f}")


Обработка видео: 100%|██████████| 3766/3766 [04:18<00:00, 14.59it/s]

Обработка видео заняла: 258.10 секунд
Кадров обработано в секунду: 14.59





# Final Yolo-tiny

In [3]:
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import concurrent.futures
from transformers import AutoImageProcessor, AutoModelForObjectDetection

# Загрузка модели YOLOS-Tiny
processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")

# Использование устройства CUDA, если доступно, иначе CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, processor, model, device):
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]]).to(device)
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    
    # Извлечение людей из результатов
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Функция для обработки кадра
def process_frame(frame, processor, model, device):
    image = frame_to_image(frame)
    results = detect_people(image, processor, model, device)
    image_with_boxes = draw_boxes(image, results)
    output_frame = image_to_frame(image_with_boxes)
    return output_frame

def process_video_multithreaded(video_path, output_path, processor, model, device, max_workers=2):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    codec = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        with tqdm(total=total_frames, desc="Обработка всего видео (многопоточность)") as pbar:
            for i in range(total_frames):
                ret, frame = cap.read()
                if not ret:
                    break

                if i % 2 == 0:
                    futures[executor.submit(process_frame, frame, processor, model, device)] = i
                else:
                    out.write(frame)

                pbar.update(1)

            for future in concurrent.futures.as_completed(futures):
                output_frame = future.result()
                out.write(output_frame)

    cap.release()
    out.release()

# Пути к видеофайлам
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path_multi = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_yolo_multithreaded.mp4'


# Обработка видео с многопоточностью
process_video_multithreaded(video_path, output_path_multi, processor, model, device)


Обработка всего видео (многопоточность): 100%|██████████| 3766/3766 [20:19<00:00,  3.09it/s]


In [3]:
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import concurrent.futures
from transformers import AutoImageProcessor, AutoModelForObjectDetection

# Загрузка модели YOLOS-Tiny
processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")

# Использование устройства CUDA, если доступно, иначе CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, processor, model, device):
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]]).to(device)
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    
    # Извлечение людей из результатов
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Функция для обработки кадра
def process_frame(frame, processor, model, device):
    image = frame_to_image(frame)
    results = detect_people(image, processor, model, device)
    image_with_boxes = draw_boxes(image, results)
    output_frame = image_to_frame(image_with_boxes)
    return output_frame

# Функция для загрузки видео в оперативную память
def load_video_to_memory(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def process_video_multithreaded(frames, output_path, processor, model, device, max_workers=2):
    fps = 30  # Установите здесь известное значение fps вашего видео
    frame_height, frame_width = frames[0].shape[:2]
    
    codec = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        with tqdm(total=len(frames), desc="Обработка всего видео (многопоточность)") as pbar:
            # for i, frame in enumerate(frames):
            #     # if i % 1 == 0:
            #         futures[executor.submit(process_frame, frame, processor, model, device)] = i
            #     else:
            #         out.write(frame)
            #     pbar.update(1)

            for future in concurrent.futures.as_completed(futures):
                output_frame = future.result()
                out.write(output_frame)

    out.release()

# Пути к видеофайлам
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path_multi = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_yolo_multithreaded.mp4'

# Загрузка видео в оперативную память
frames = load_video_to_memory(video_path)

# Обработка видео с многопоточностью
process_video_multithreaded(frames, output_path_multi, processor, model, device)


  from .autonotebook import tqdm as notebook_tqdm
Обработка всего видео (многопоточность):   0%|          | 0/3766 [00:00<?, ?it/s]


# Yolo5s FinalVers

In [1]:
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import concurrent.futures
import torch

# Загрузка модели YOLOv5
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Функция для выполнения детекции и фильтрации только людей
def detect_people(frame, model, device):
    results = model(frame)
    detections = results.xyxy[0].cpu().numpy()

    people = []
    for detection in detections:
        x1, y1, x2, y2, confidence, class_id = detection
        if int(class_id) == 0:  # Assuming 'person' label is 0
            people.append({'score': confidence, 'box': [x1, y1, x2, y2], 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        x1, y1, x2, y2 = res['box']
        confidence = res['score']
        label = res['label']

        draw.rectangle([x1, y1, x2, y2], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((x1, y1), label_with_conf, fill='white', font=font)

    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Функция для обработки кадра
def process_frame(frame, model, device):
    image = frame_to_image(frame)
    results = detect_people(frame, model, device)
    image_with_boxes = draw_boxes(image, results)
    output_frame = image_to_frame(image_with_boxes)
    return output_frame

# Функция для загрузки видео в оперативную память
def load_video_to_memory(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def process_video_multithreaded(frames, output_path, model, device, fps, max_workers=8):
    frame_height, frame_width = frames[0].shape[:2]

    codec = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        with tqdm(total=len(frames), desc="Обработка всего видео (многопоточность)") as pbar:
            for i, frame in enumerate(frames):
                if i % 2 == 0:
                    futures[executor.submit(process_frame, frame, model, device)] = i
                else:
                    out.write(frame)
                pbar.update(1)

            for future in concurrent.futures.as_completed(futures):
                output_frame = future.result()
                out.write(output_frame)

    out.release()

# Пути к видеофайлам
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path_multi = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_yolo_multithreaded.mp4'

# Загрузка видео в оперативную память
frames = load_video_to_memory(video_path)

# Получение FPS видео
fps = 30  # Установите здесь известное значение fps вашего видео или получите его из видеофайла

# Обработка видео с многопоточностью
process_video_multithreaded(frames, output_path_multi, model, device, fps)


Using cache found in C:\Users\Lenovo/.cache\torch\hub\ultralytics_yolov5_master
  from .autonotebook import tqdm as notebook_tqdm
YOLOv5  2024-5-30 Python-3.9.19 torch-2.2.2 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
Обработка всего видео (многопоточность): 100%|██████████| 3766/3766 [02:44<00:00, 22.83it/s]


# Faster R-CNN

In [28]:
import torch

torch.cuda.empty_cache()
torch.cuda.memory_summary(device=None, abbreviated=False)




In [3]:
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import concurrent.futures
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.transforms import functional as F

# Загрузка модели Faster R-CNN
weights = FasterRCNN_ResNet50_FPN_Weights.COCO_V1
model = fasterrcnn_resnet50_fpn(weights=weights)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, model, device):
    # Преобразование изображения
    img = F.to_tensor(image).unsqueeze(0).to(device)
    
    # Предсказание
    with torch.no_grad():
        outputs = model(img)
    
    # Обработка результатов
    results = outputs[0]
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1 and score > 0.9:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Функция для обработки кадра
def process_frame(frame, model, device):
    image = frame_to_image(frame)
    results = detect_people(image, model, device)
    image_with_boxes = draw_boxes(image, results)
    output_frame = image_to_frame(image_with_boxes)
    return output_frame

# Функция для загрузки видео в оперативную память
def load_video_to_memory(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def process_video_multithreaded(frames, output_path, model, device, fps=30, max_workers=8, process_every_nth_frame=2):
    frame_height, frame_width = frames[0].shape[:2]
    
    codec = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        with tqdm(total=len(frames), desc="Обработка всего видео (многопоточность)") as pbar:
            for i, frame in enumerate(frames):
                if i % process_every_nth_frame == 0:
                    futures[executor.submit(process_frame, frame, model, device)] = i
                else:
                    out.write(frame)
                pbar.update(1)

            for future in concurrent.futures.as_completed(futures):
                try:
                    output_frame = future.result()
                    out.write(output_frame)
                except Exception as e:
                    print(f"Ошибка при обработке кадра: {e}")

    out.release()

# Пути к видеофайлам
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path_multi = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_faster_rcnn_multithreaded.mp4'

# Загрузка видео в оперативную память
frames = load_video_to_memory(video_path)

# Обработка видео с многопоточностью
process_video_multithreaded(frames, output_path_multi, model, device)


Обработка всего видео (многопоточность): 100%|██████████| 3766/3766 [01:14<00:00, 141.15it/s]

: 

# DETR

In [1]:
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import concurrent.futures
from transformers import AutoImageProcessor, AutoModelForObjectDetection

# Загрузка модели DETR
processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, processor, model, device):
    inputs = processor(images=image, return_tensors="pt").to(device)
    
    # Предсказание
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Обработка результатов
    target_sizes = torch.tensor([image.size[::-1]]).to(device)
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    
    # Извлечение людей из результатов
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Функция для обработки кадра
def process_frame(frame, processor, model, device):
    image = frame_to_image(frame)
    results = detect_people(image, processor, model, device)
    image_with_boxes = draw_boxes(image, results)
    output_frame = image_to_frame(image_with_boxes)
    return output_frame

# Функция для загрузки видео в оперативную память
def load_video_to_memory(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def process_video_multithreaded(frames, output_path, processor, model, device, fps=30, max_workers=8, process_every_nth_frame=2):
    frame_height, frame_width = frames[0].shape[:2]
    
    codec = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        with tqdm(total=len(frames), desc="Обработка всего видео (многопоточность)") as pbar:
            for i, frame in enumerate(frames):
                if i % process_every_nth_frame == 0:
                    futures[executor.submit(process_frame, frame, processor, model, device)] = i
                else:
                    out.write(frame)
                pbar.update(1)

            for future in concurrent.futures.as_completed(futures):
                try:
                    output_frame = future.result()
                    out.write(output_frame)
                except Exception as e:
                    print(f"Ошибка при обработке кадра: {e}")

    out.release()

# Пути к видеофайлам
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path_multi = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_detr_multithreaded.mp4'

# Загрузка видео в оперативную память
frames = load_video_to_memory(video_path)

# Обработка видео с многопоточностью
process_video_multithreaded(frames, output_path_multi, processor, model, device)


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Обработка всего видео (многопоточн

In [2]:
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import concurrent.futures
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights
from torchvision.transforms import functional as F

# Очистка кеша CUDA
torch.cuda.empty_cache()

# Загрузка модели MobileNetV3
weights = SSDLite320_MobileNet_V3_Large_Weights.COCO_V1
model = ssdlite320_mobilenet_v3_large(weights=weights)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Функция для выполнения детекции и фильтрации только людей
def detect_people(image, model, device):
    # Преобразование изображения
    img = F.to_tensor(image).unsqueeze(0).to(device)
    
    # Предсказание
    with torch.no_grad():
        outputs = model(img)
    
    # Обработка результатов
    results = outputs[0]
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1 and score > 0.9:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Функция для обработки кадра
def process_frame(frame, model, device):
    image = frame_to_image(frame)
    results = detect_people(image, model, device)
    image_with_boxes = draw_boxes(image, results)
    output_frame = image_to_frame(image_with_boxes)
    return output_frame

# Функция для загрузки видео в оперативную память
def load_video_to_memory(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def process_video_multithreaded(frames, output_path, model, device, fps=30, max_workers=8, process_every_nth_frame=2):
    frame_height, frame_width = frames[0].shape[:2]
    
    codec = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        with tqdm(total=len(frames), desc="Обработка всего видео (многопоточность)") as pbar:
            for i, frame in enumerate(frames):
                if i % process_every_nth_frame == 0:
                    futures[executor.submit(process_frame, frame, model, device)] = i
                else:
                    out.write(frame)
                pbar.update(1)

            for future in concurrent.futures.as_completed(futures):
                try:
                    output_frame = future.result()
                    out.write(output_frame)
                except Exception as e:
                    print(f"Ошибка при обработке кадра: {e}")

    out.release()

# Пути к видеофайлам
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path_multi = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_mobilenet_multithreaded.mp4'

# Загрузка видео в оперативную память
frames = load_video_to_memory(video_path)

# Обработка видео с многопоточностью
process_video_multithreaded(frames, output_path_multi, model, device)


Обработка всего видео (многопоточность): 100%|██████████| 3766/3766 [04:56<00:00, 12.69it/s] 


# RetinaNet

In [3]:
import cv2
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import concurrent.futures
from torchvision.models.detection import retinanet_resnet50_fpn, RetinaNet_ResNet50_FPN_Weights
from torchvision.transforms import functional as F

# Загрузка модели RetinaNet
weights = RetinaNet_ResNet50_FPN_Weights.DEFAULT
model = retinanet_resnet50_fpn(weights=weights)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Функция для выполнения детекции и фильтрации только людей
def detect_people_retinanet(image, model, device):
    img = F.to_tensor(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(img)
    results = outputs[0]
    people = []
    for score, label, box in zip(results['scores'], results['labels'], results['boxes']):
        if label == 1 and score > 0.5:  # Assuming 'person' label is 1
            people.append({'score': score.item(), 'box': box.tolist(), 'label': 'person'})
    return people

# Функция для рисования рамок на изображении
def draw_boxes(image, results):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()
    for res in results:
        xmin, ymin, xmax, ymax = res['box']
        confidence = res['score']
        label = res['label']
        draw.rectangle([xmin, ymin, xmax, ymax], outline='red', width=3)
        label_with_conf = f"{label} {confidence:.2f}"
        draw.text((xmin, ymin), label_with_conf, fill='white', font=font)
    return image

# Функция для преобразования кадра в изображение PIL
def frame_to_image(frame):
    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# Функция для преобразования изображения PIL обратно в кадр
def image_to_frame(image):
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

# Функция для обработки кадра
def process_frame_retinanet(frame, model, device):
    image = frame_to_image(frame)
    results = detect_people_retinanet(image, model, device)
    image_with_boxes = draw_boxes(image, results)
    output_frame = image_to_frame(image_with_boxes)
    return output_frame

def process_video_multithreaded_retinanet(video_path, output_path, model, device, max_workers=2):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    codec = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, codec, fps, (frame_width, frame_height))

    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        with tqdm(total=total_frames, desc="Обработка всего видео (многопоточность)") as pbar:
            for i in range(total_frames):
                ret, frame = cap.read()
                if not ret:
                    break

                if i % 2 == 0:
                    futures[executor.submit(process_frame_retinanet, frame, model, device)] = i
                else:
                    out.write(frame)

                pbar.update(1)

            for future in concurrent.futures.as_completed(futures):
                output_frame = future.result()
                out.write(output_frame)

    cap.release()
    out.release()

# Пути к видеофайлам
video_path = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer.mp4'
output_path_retinanet = 'C:/Users/Lenovo/Downloads/Caggleton_9/kaggleton_9_longer_retinanet_multithreaded.mp4'

# Обработка видео с многопоточностью (RetinaNet)
process_video_multithreaded_retinanet(video_path, output_path_retinanet, model, device)


Обработка всего видео (многопоточность): 100%|██████████| 3766/3766 [05:44<00:00, 10.94it/s]
