导入包

In [1]:
import cv2 
#python -m pip install opencv-python
# 或者
#python -m pip install opencv-python-headless
import os
import subprocess

这一部分是定位视频元素的代码

In [6]:


# ——配置区——
VIDEO_PATH = "Douyin-videos/60万开的稀烂.mp4"
TIME_SEC   = 2     # 截第 2 秒那一帧，改成你的视频确实有的时间点
MAX_W, MAX_H = 800, 600   # 窗口最大尺寸

# 全局：存原始坐标
orig_points = []

def on_mouse(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:
        scale = param['scale']
        ox = int(x/scale); oy = int(y/scale)
        print(f"Display=({x},{y}) → Original=({ox},{oy})")
        orig_points.append((ox, oy))

# 1. 读一帧
cap = cv2.VideoCapture(VIDEO_PATH)
fps = cap.get(cv2.CAP_PROP_FPS)
cap.set(cv2.CAP_PROP_POS_FRAMES, TIME_SEC * fps)
ret, frame = cap.read()
cap.release()
if not ret:
    raise RuntimeError("读帧失败，请检查 TIME_SEC 和 VIDEO_PATH")

# 2. 先按比例缩到不超过 MAX_W×MAX_H
h, w = frame.shape[:2]
scale = min(MAX_W/w, MAX_H/h, 1.0)
display = cv2.resize(frame, (int(w*scale), int(h*scale)))

# 3. 创建可缩放窗口并绑定回调
cv2.namedWindow("pick-point", cv2.WINDOW_NORMAL)
cv2.resizeWindow("pick-point", int(w*scale), int(h*scale))
cv2.setMouseCallback("pick-point", on_mouse, {'scale': scale})

print("请点击想要的点 (Esc 退出)，终端会输出对应原始坐标。")
while True:
    cv2.imshow("pick-point", display)
    if cv2.waitKey(1) & 0xFF == 27:  # Esc
        break

cv2.destroyAllWindows()
print("所有点击的原始坐标：", orig_points)


请点击想要的点 (Esc 退出)，终端会输出对应原始坐标。
Display=(3,72) → Original=(9,230)
Display=(334,145) → Original=(1068,464)
所有点击的原始坐标： [(9, 230), (1068, 464)]


划分分割区域

In [8]:
def crop_and_extract_frames(
    video_path: str,
    output_dir: str,
    crop_region: tuple,
    frame_interval: int = 30,
    title_region: tuple = None
):
    """
    Crop a specific region from a video and extract frames.
    Additionally, extract the title_region once at t=1s.

    :param video_path: Path to the input video file.
    :param output_dir: Directory to save cropped frames.
    :param crop_region: Tuple (x, y, w, h) for the main ROI.
    :param frame_interval: Extract one frame every `frame_interval` frames.
    :param title_region: Tuple (x, y, w, h) for title area; if provided,
                         extract that region once at the 1s mark.
    """
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise FileNotFoundError(f"Cannot open video: {video_path}")

    fps = cap.get(cv2.CAP_PROP_FPS)
    title_frame_idx = int(fps * 1)  # 第一秒对应的帧数
    title_saved = False

    frame_count = 0
    saved_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # —— 在第一秒时刻，单独提取标题区域 —— 
        if title_region and not title_saved and frame_count == title_frame_idx:
            x_t, y_t, w_t, h_t = title_region
            title_crop = frame[y_t:y_t+h_t, x_t:x_t+w_t]
            cv2.imwrite(
                os.path.join(output_dir, "title_frame.jpg"),
                title_crop
            )
            title_saved = True

        # —— 按间隔提取其他 ROI 帧 —— 
        if frame_count % frame_interval == 0:
            x, y, w, h = crop_region
            cropped = frame[y:y+h, x:x+w]
            fname = os.path.join(output_dir, f"frame_{saved_count:04d}.jpg")
            cv2.imwrite(fname, cropped)
            saved_count += 1

        frame_count += 1

    cap.release()
    print(f"Extracted title frame? {title_saved}, plus {saved_count} other frames to {output_dir}")


# —— 使用示例 —— 
crop_and_extract_frames(
    video_path="Douyin-videos/60万开的稀烂.mp4",
    output_dir="rois",
    crop_region=(540,469,1079,1428),   # 后续帧要裁的那个区域
    frame_interval=300,                  # 每隔 30 帧抽一次
    title_region=(9, 230, 1079, 464)     # 只在 t=1s 提取的标题区域
)


Extracted title frame? True, plus 30 other frames to rois


In [5]:
def extract_audio_same_name(
    video_path: str,
    start_time: str = None,
    duration: str = None,
    sample_rate: int = 16000,
    channels: int = 1
):
    """
    Extract audio from a video and save it with the same base filename.

    :param video_path: Path to the input video file.
    :param start_time: Optional start time ("HH:MM:SS" or seconds).
    :param duration: Optional duration to extract ("SS" or "HH:MM:SS").
    :param sample_rate: Output audio sample rate (default 16000 Hz).
    :param channels: Number of audio channels (default mono).
    """
    # Determine output audio path by replacing extension
    base, _ = os.path.splitext(video_path)
    output_dir = "audio-output"
    os.makedirs(output_dir, exist_ok=True)
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    audio_path = os.path.join(output_dir, f"{base_name}.wav")

    # Build ffmpeg command
    cmd = ["ffmpeg", "-y", "-i", video_path]
    if start_time:
        cmd += ["-ss", str(start_time)]
    if duration:
        cmd += ["-t", str(duration)]
    cmd += [
        "-vn",
        "-acodec", "pcm_s16le",
        "-ar", str(sample_rate),
        "-ac", str(channels),
        audio_path
    ]

    # Run ffmpeg
    subprocess.run(cmd, check=True)
    print(f"Extracted audio to: {audio_path}")

# 示例调用：与视频同名输出音频
extract_audio_same_name("Douyin-videos/60万开的稀烂.mp4")



Extracted audio to: audio-output\60万开的稀烂.wav
