In [None]:
import os
import cv2
import torch
from torch.utils.data import Dataset, DataLoader

## 导入文件

### 文件名获取

In [6]:
def load_dataset_files(data_path, begin, end):
    """ 获取 1~10 文件夹下以 M_ 开头的视频文件和对应的标注文件 """


    matched_files = []

    # 遍历 1~10 的文件夹
    for i in range(begin, end + 1):  # 修改为 end + 1，确保包含 'end'
        folder_path = os.path.join(data_path, str(i))  # 构造文件夹路径

        if not os.path.isdir(folder_path):  # 确保该路径是文件夹
            continue

        # 筛选文件夹中的文件并构建标注文件字典
        txt_files = {os.path.basename(txt).replace(".txt", ""): txt for txt in os.listdir(folder_path) if txt.endswith(".txt")}

        for file in os.listdir(folder_path):
            if file.startswith("M_") and file.lower().endswith('.mp4'):  # 只筛选 M_ 开头的 mp4 文件
                video_path = os.path.join(folder_path, file)
                #video_files.append(video_path)

                base_name = file.replace("M_", "").replace(".MP4", "")  # 去掉 M_ 前缀并去掉后缀
                annotation_file_name = base_name + ".txt"  # 得到相应的 txt 文件名
                 # 拼接绝对路径
                annotation_file_path = os.path.join(folder_path, annotation_file_name)
                
                # 检查文件是否存在
                if os.path.exists(annotation_file_path):
                    matched_files.append((video_path, annotation_file_path))
                else:
                    print(f"{video_path} 视频未找到相对应的文本文件")  # 如果没有对应的标注文件，加入 None

    print(f"找到 {len(matched_files)} 对（视频-文本）文件对")
    return matched_files

In [7]:
matched_files = load_dataset_files(r"D:\ESD\test",1,5)
for char in matched_files:
    print(char)
    print(r"\n")

找到 10 对（视频-文本）文件对
('D:\\ESD\\test\\1\\M_20230822145107_U2291907_1_001_0001-01.MP4', 'D:\\ESD\\test\\1\\20230822145107_U2291907_1_001_0001-01.txt')
\n
('D:\\ESD\\test\\1\\M_20230822145107_U2291907_1_001_0002-01.MP4', 'D:\\ESD\\test\\1\\20230822145107_U2291907_1_001_0002-01.txt')
\n
('D:\\ESD\\test\\2\\M_20230824101337_U2464637_1_001_0001-01.MP4', 'D:\\ESD\\test\\2\\20230824101337_U2464637_1_001_0001-01.txt')
\n
('D:\\ESD\\test\\2\\M_20230824101337_U2464637_1_001_0002-01.MP4', 'D:\\ESD\\test\\2\\20230824101337_U2464637_1_001_0002-01.txt')
\n
('D:\\ESD\\test\\3\\M_20230828105533_U2802633_1_001_0001-01.MP4', 'D:\\ESD\\test\\3\\20230828105533_U2802633_1_001_0001-01.txt')
\n
('D:\\ESD\\test\\3\\M_20230828105533_U2802633_1_001_0002-01.MP4', 'D:\\ESD\\test\\3\\20230828105533_U2802633_1_001_0002-01.txt')
\n
('D:\\ESD\\test\\4\\M_20230829124054_U2997853_1_001_0001-01.MP4', 'D:\\ESD\\test\\4\\20230829124054_U2997853_1_001_0001-01.txt')
\n
('D:\\ESD\\test\\4\\M_20230829124054_U2997853_1_001_0002-0

In [8]:
import os

def get_matched_files(annotation_files, video_files):
    """ 生成匹配的 (mp4, txt) 文件对 """
    matched_files = []

    # 为 annotation_files 创建一个字典，以便按文件名快速查找
    txt_dict = {os.path.basename(txt).replace(".txt", ""): txt for txt in annotation_files}

    # 遍历 video_files
    for video in video_files:
        video_name = os.path.basename(video)  # 获取 MP4 文件名
        txt_name = video_name.replace("M_", "").replace(".mp4", "")  # 生成对应的 txt 文件名

        # 检查 txt 文件是否存在于 txt_dict 中
        if txt_name in txt_dict:
            matched_files.append((video, txt_dict[txt_name]))
        else:
            print(f"⚠️ 未找到匹配的 txt 文件: {txt_name}")

    print(f"✅ 找到 {len(matched_files)} 对匹配的文件")
    return matched_files


In [10]:
print(matched_files)

[('D:\\ESD\\test\\1\\M_20230822145107_U2291907_1_001_0001-01.MP4', 'D:\\ESD\\test\\1\\20230822145107_U2291907_1_001_0001-01.txt'), ('D:\\ESD\\test\\1\\M_20230822145107_U2291907_1_001_0002-01.MP4', 'D:\\ESD\\test\\1\\20230822145107_U2291907_1_001_0002-01.txt'), ('D:\\ESD\\test\\2\\M_20230824101337_U2464637_1_001_0001-01.MP4', 'D:\\ESD\\test\\2\\20230824101337_U2464637_1_001_0001-01.txt'), ('D:\\ESD\\test\\2\\M_20230824101337_U2464637_1_001_0002-01.MP4', 'D:\\ESD\\test\\2\\20230824101337_U2464637_1_001_0002-01.txt'), ('D:\\ESD\\test\\3\\M_20230828105533_U2802633_1_001_0001-01.MP4', 'D:\\ESD\\test\\3\\20230828105533_U2802633_1_001_0001-01.txt'), ('D:\\ESD\\test\\3\\M_20230828105533_U2802633_1_001_0002-01.MP4', 'D:\\ESD\\test\\3\\20230828105533_U2802633_1_001_0002-01.txt'), ('D:\\ESD\\test\\4\\M_20230829124054_U2997853_1_001_0001-01.MP4', 'D:\\ESD\\test\\4\\20230829124054_U2997853_1_001_0001-01.txt'), ('D:\\ESD\\test\\4\\M_20230829124054_U2997853_1_001_0002-01.MP4', 'D:\\ESD\\test\\4\\2023

In [11]:
def load_annotation_file(annotation_file_path):
    """ 打开标注文件并获取标注信息（假设包含 Frame 和 Phase） """
    annotations = []
    try:
        # 打开并读取标注文件
        with open(annotation_file_path, 'r') as file:
            lines = file.readlines()

            # 跳过表头（如果有的话）
            if lines[0].startswith("Frame"):
                lines = lines[1:]

            # 遍历每一行，提取标注信息
            for line in lines:
                # 去除行尾换行符，并按空格分隔
                parts = line.strip().split()

                if len(parts) == 2:  # 确保每行包含 2 部分 (Frame 和 Phase)
                    try:
                        frame = int(parts[0])  # 帧号
                        phase = parts[1]  # 阶段

                        # 将解析后的数据保存为字典
                        annotations.append({
                            'frame': frame,
                            'phase': phase
                        })
                    except ValueError:
                        print(f"⚠️ 无法解析标注文件中的行: {line}")
                else:
                    print(f"⚠️ 标注文件格式不正确: {line}")

    except FileNotFoundError:
        print(f"⚠️ 找不到文件: {annotation_file_path}")
    except Exception as e:
        print(f"⚠️ 打开标注文件时发生错误: {e}")

    return annotations

In [30]:
annotations = load_annotation_file(r"D:\\ESD\\test\\1\\20230822145107_U2291907_1_001_0002-01.txt")

In [31]:
annotations

[{'frame': 0, 'phase': 'Preparation'},
 {'frame': 1, 'phase': 'Preparation'},
 {'frame': 2, 'phase': 'Preparation'},
 {'frame': 3, 'phase': 'Preparation'},
 {'frame': 4, 'phase': 'Preparation'},
 {'frame': 5, 'phase': 'Preparation'},
 {'frame': 6, 'phase': 'Preparation'},
 {'frame': 7, 'phase': 'Preparation'},
 {'frame': 8, 'phase': 'Preparation'},
 {'frame': 9, 'phase': 'Preparation'},
 {'frame': 10, 'phase': 'Preparation'},
 {'frame': 11, 'phase': 'Preparation'},
 {'frame': 12, 'phase': 'Preparation'},
 {'frame': 13, 'phase': 'Preparation'},
 {'frame': 14, 'phase': 'Preparation'},
 {'frame': 15, 'phase': 'Preparation'},
 {'frame': 16, 'phase': 'Preparation'},
 {'frame': 17, 'phase': 'Preparation'},
 {'frame': 18, 'phase': 'Preparation'},
 {'frame': 19, 'phase': 'Preparation'},
 {'frame': 20, 'phase': 'Preparation'},
 {'frame': 21, 'phase': 'Preparation'},
 {'frame': 22, 'phase': 'Preparation'},
 {'frame': 23, 'phase': 'Preparation'},
 {'frame': 24, 'phase': 'Preparation'},
 {'frame':

In [32]:
length = len(annotations)
print(length)

95150


In [27]:
def check_video_fps_and_frames(video_path):
    """ 检查视频的帧率和获取视频的总帧数 """
    
    # 打开视频文件
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"无法打开视频文件: {video_path}")
        return None, None
    
    # 获取视频的帧率 (fps) 和总帧数
    fps = cap.get(cv2.CAP_PROP_FPS)  # 获取帧率
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # 获取视频总帧数
    
    cap.release()  # 释放视频文件资源
    
    return fps, total_frames


In [33]:
video_path = r"D:\\ESD\\test\\1\\M_20230822145107_U2291907_1_001_0002-01.MP4"
fps, total_frames = check_video_fps_and_frames(video_path)

if fps is not None and total_frames is not None:
    print(f"视频帧率: {fps} FPS")
    print(f"视频总帧数: {total_frames} 帧")

视频帧率: 50.0 FPS
视频总帧数: 107167 帧


In [None]:
35250  40060       95150  107167