In [None]:
# 使用YOLOv8模型对视频进行截图

In [1]:
from ultralytics import YOLO
import cv2
import numpy as np
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# 加载模型
model_file = "./weights/yolov8x.pt" # 最大的模型
model = YOLO(model_file)  # load a pretrained model (recommended for training)
objs_labels = model.names  # get class labels
print(objs_labels)


{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [3]:
# iterate all videos
import glob
import os
import tqdm

In [4]:
videos_list = glob.glob("./videos/*.mp4")

In [5]:
videos_list

['./videos\\D02_20230707102139.mp4',
 './videos\\D03_20230707102214.mp4',
 './videos\\D04_20230707102216.mp4',
 './videos\\D05_20230707102820.mp4',
 './videos\\D06_20230707102750.mp4',
 './videos\\D09_20230707102816.mp4']

In [6]:
# 截图
# 参数：视频文件，保存的帧数，跳过的帧数
def video2frame(video_file, save_count=1200, skip_interval=10):
    # 读取视频
    cap = cv2.VideoCapture(video_file)
    # get frame count
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    # 获取视频的宽度和高度
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # 获取视频的帧率
    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"width: {width}, height: {height}, fps: {fps}, frame_count: {frame_count}")

    # 保存的帧数
    save_frame_count = 0
    # frame index
    index = 0
    while True:
        # 读取一帧
        start_time = time.time()
        ret, frame = cap.read()

        if ret:
            index += 1  # frame index
            # skip frames
            if index % skip_interval != 0:
                print(f"skip frame: {index}")
                continue
            # check if save enough frames
            if save_frame_count >= save_count:
                break
            # 检测
            result = list(model(frame, stream=True, conf=0.4))[
                0
            ]  # inference，如果stream=False，返回的是一个列表，如果stream=True，返回的是一个生成器
            boxes = result.boxes  # Boxes object for bbox outputs
            boxes = boxes.cpu().numpy()  # convert to numpy array

            # 参考：https://docs.ultralytics.com/modes/predict/#boxes
            # 遍历每个框
            for box in boxes.data:
                l, t, r, b = box[:4].astype(np.int32)  # left, top, right, bottom
                conf, id = box[4:]  # confidence, class
                # print(f"l: {l}, t: {t}, r: {r}, b: {b}, conf: {conf}, id: {id}")
                # if id == 0: # 如果是人，保存图片
                if id == 0:
                    # filename: video_name_frame_index.jpg
                    save_img_file = f"./images/{video_file.split(os.sep)[-1].split('.')[0]}_{index}.jpg"
                    cv2.imwrite(save_img_file, frame)
                    print(save_img_file)
                    # jump out of loop
                    save_frame_count += 1
                    break
                # print(save_img_file)
                # 绘制框
                # cv2.rectangle(frame, (l,t), (r,b), (0,0,255), 2)
                # 绘制类别+置信度（格式：98.1%）
                # cv2.putText(frame, f"{objs_labels[id]} {conf*100:.1f}%", (l, t-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # end time
            end_time = time.time()
            # FPS
            fps = 1 / (end_time - start_time)
            # 绘制FPS
            # cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # 写入
            # writer.write(frame)
            # 显示
            # 缩放0.5倍
            # frame = cv2.resize(frame, (int(width/2), int(height/2)))
            # cv2.imshow("frame", frame)
            # if cv2.waitKey(1) & 0xFF == ord('q'):
            #     break

        else:
            break

In [7]:
import os

In [8]:
# 创建目录
if not os.path.exists("./images"):
    os.mkdir("./images")

In [9]:
# 遍历所有视频
for video_file in videos_list:
    # 每个视频截取100张图片，间隔50帧
    video2frame(video_file,100,50)

width: 2560, height: 1440, fps: 25.0, frame_count: 21720.0
skip frame: 1
skip frame: 2
skip frame: 3
skip frame: 4
skip frame: 5
skip frame: 6
skip frame: 7
skip frame: 8
skip frame: 9
skip frame: 10
skip frame: 11
skip frame: 12
skip frame: 13
skip frame: 14
skip frame: 15
skip frame: 16
skip frame: 17
skip frame: 18
skip frame: 19
skip frame: 20
skip frame: 21
skip frame: 22
skip frame: 23
skip frame: 24
skip frame: 25
skip frame: 26
skip frame: 27
skip frame: 28
skip frame: 29
skip frame: 30
skip frame: 31
skip frame: 32
skip frame: 33
skip frame: 34
skip frame: 35
skip frame: 36
skip frame: 37
skip frame: 38
skip frame: 39
skip frame: 40
skip frame: 41
skip frame: 42
skip frame: 43
skip frame: 44
skip frame: 45
skip frame: 46
skip frame: 47
skip frame: 48
skip frame: 49



  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


0: 384x640 (no detections), 339.6ms
Speed: 140.2ms preprocess, 339.6ms inference, 1161.6ms postprocess per image at shape (1, 3, 384, 640)
skip frame: 51
skip frame: 52
skip frame: 53
skip frame: 54
skip frame: 55
skip frame: 56
skip frame: 57
skip frame: 58
skip frame: 59
skip frame: 60
skip frame: 61
skip frame: 62
skip frame: 63
skip frame: 64
skip frame: 65
skip frame: 66
skip frame: 67
skip frame: 68
skip frame: 69
skip frame: 70
skip frame: 71
skip frame: 72
skip frame: 73
skip frame: 74
skip frame: 75
skip frame: 76
skip frame: 77
skip frame: 78
skip frame: 79
skip frame: 80
skip frame: 81
skip frame: 82
skip frame: 83
skip frame: 84
skip frame: 85
skip frame: 86
skip frame: 87
skip frame: 88
skip frame: 89
skip frame: 90
skip frame: 91
skip frame: 92
skip frame: 93
skip frame: 94
skip frame: 95
skip frame: 96
skip frame: 97
skip frame: 98
skip frame: 99

0: 384x640 (no detections), 263.1ms
Speed: 4.0ms preprocess, 263.1ms inference, 0.0ms postprocess per image at shape (1, 3, 3

In [10]:
print("done")

done


In [None]:
# 再用labelImg检查