In [1]:
import argparse
import pathlib
import numpy as np
import cv2
import time

import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
import torch.backends.cudnn as cudnn
import torchvision

from PIL import Image
from PIL import Image, ImageOps

# from retinaface import RetinaFace
# from l2cs.detection.retinaface_detector import RetinaFace

from l2cs import select_device, draw_gaze, getArch, Pipeline, render


CWD = pathlib.Path.cwd()

# 设置参数
video_path = '../videos/video_1.mp4'  # 修改为你的视频路径
output_path = '../face_test.mp4'
device = 'gpu:0'  
arch = 'ResNet50'

cudnn.enabled = True

gaze_pipeline = Pipeline(
    weights=CWD / 'models' / 'L2CSNet_gaze360.pkl',
    arch='ResNet50',
    device = select_device(device, batch_size=256),
    confidence_threshold = 0.8
)
 
cap = cv2.VideoCapture(video_path)

# Check if the video file is opened correctly
if not cap.isOpened():
    raise IOError(f"Cannot open video file: {video_path}")

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Define video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

if not out.isOpened():
    raise IOError(f"Cannot create video writer for: {output_path}")

frame_count = 0

with torch.no_grad():
    while True:

        # Get frame
        success, frame = cap.read()    
        start_fps = time.time()  

        if not success:
            print("Video processing completed")
            break

        frame_count += 1
        print(f"Processing frame {frame_count}/{total_frames}")

        # Process frame
        results = gaze_pipeline.step(frame)

        # Visualize output
        frame = render(frame, results)
       
        myFPS = 1.0 / (time.time() - start_fps)
        cv2.putText(frame, 'FPS: {:.1f}'.format(myFPS), (10, 20),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)

        # Write frame to output video
        out.write(frame)

cap.release()
out.release()
print(f"Output video saved to: {output_path}")

Processing frame 1/68
Processing frame 2/68
Processing frame 3/68
Processing frame 4/68
Processing frame 5/68
Processing frame 6/68
Processing frame 7/68
Processing frame 8/68
Processing frame 9/68
Processing frame 10/68
Processing frame 11/68
Processing frame 12/68
Processing frame 13/68
Processing frame 14/68
Processing frame 15/68
Processing frame 16/68
Processing frame 17/68
Processing frame 18/68
Processing frame 19/68
Processing frame 20/68
Processing frame 21/68
Processing frame 22/68
Processing frame 23/68
Processing frame 24/68
Processing frame 25/68
Processing frame 26/68
Processing frame 27/68
Processing frame 28/68
Processing frame 29/68
Processing frame 30/68
Processing frame 31/68
Processing frame 32/68
Processing frame 33/68
Processing frame 34/68
Processing frame 35/68
Processing frame 36/68
Processing frame 37/68
Processing frame 38/68
Processing frame 39/68
Processing frame 40/68
Processing frame 41/68
Processing frame 42/68
Processing frame 43/68
Processing frame 44/

# obvious

In [None]:
import argparse
import pathlib
import numpy as np
import cv2
import time

import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
import torch.backends.cudnn as cudnn
import torchvision

from PIL import Image
from PIL import Image, ImageOps

from face_detection import RetinaFace

from l2cs import select_device, Pipeline 
# 注意：我们不再使用 l2cs 的 render，而是自己写绘制逻辑

CWD = pathlib.Path.cwd()

# --- 自定义参数设置 ---
video_path = '../test_gaze/video_105.mp4'
output_path = '../output_gaze/video_105_obvious.mp4'
device = 'gpu:0' if torch.cuda.is_available() else 'cpu'
arch = 'ResNet50'

# *** 在这里修改箭头的样式 ***
ARROW_LENGTH = 300   # 箭头长度 (原版通常是 50-100，这里设为 200)
THICKNESS = 5        # 线条粗细 (原版通常是 2，这里设为 5)
COLOR = (0, 0, 255)  # 箭头颜色 (B, G, R)，这里是红色
TIP_LENGTH = 0.4     # 箭头尖端的比例 (0.0-1.0)，越大尖端越长

cudnn.enabled = True

gaze_pipeline = Pipeline(
    weights=CWD / 'models' / 'L2CSNet_gaze360.pkl',
    arch='ResNet50',
    device=select_device(device, batch_size=1)
)
 
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    raise IOError(f"Cannot open video file: {video_path}")

fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

if not out.isOpened():
    raise IOError(f"Cannot create video writer for: {output_path}")

frame_count = 0

# --- 自定义绘制函数 ---
def render_custom(frame, results, length=100, thickness=2, color=(0, 0, 255)):

    pitch = results.pitch
    yaw = results.yaw
    bboxes = results.bboxes

    img = frame.copy()
    
    # 遍历每一张检测到的人脸
    for i in range(len(bboxes)):
        bbox = bboxes[i]
        
        # 计算人脸中心点 (起点)
        x_min, y_min, x_max, y_max = bbox
        cx = int((x_min + x_max) / 2)
        cy = int((y_min + y_max) / 2)
        
        # 绘制人脸框 (可选，如果你不需要框可以注释掉下面两行)
        cv2.rectangle(img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)

        # 计算箭头终点
        # L2CS 的坐标系计算方式：
        # dx = -sin(yaw)
        # dy = sin(pitch)
        dx = -length * np.sin(yaw[i])
        dy = length * np.sin(pitch[i])
        
        tx = int(cx + dx)
        ty = int(cy + dy)
        
        # 绘制更粗更长的箭头
        cv2.arrowedLine(img, (cx, cy), (tx, ty), color, thickness, cv2.LINE_AA, tipLength=TIP_LENGTH)
        
        # 在中心点画个圆圈，让起点更明显 (可选)
        cv2.circle(img, (cx, cy), thickness * 2, (0, 255, 0), -1)

    return img

with torch.no_grad():
    while True:
        success, frame = cap.read()    
        start_fps = time.time()  

        if not success:
            print("Video processing completed")
            break

        frame_count += 1
        print(f"Processing frame {frame_count}/{total_frames}")

        # Process frame
        results = gaze_pipeline.step(frame)

        # Visualize output (使用自定义函数替代原来的 render)
        frame = render_custom(frame, results, length=ARROW_LENGTH, thickness=THICKNESS, color=COLOR)
       
        myFPS = 1.0 / (time.time() - start_fps)
        cv2.putText(frame, 'FPS: {:.1f}'.format(myFPS), (10, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)

        out.write(frame)

cap.release()
out.release()
print(f"Output video saved to: {output_path}")

# process all

In [2]:
import argparse
import pathlib
import numpy as np
import cv2
import time
import os

import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
import torch.backends.cudnn as cudnn
import torchvision

from PIL import Image
from PIL import Image, ImageOps

from l2cs import select_device, draw_gaze, getArch, Pipeline, render

# 1. 设置输入和输出文件夹路径
input_folder = pathlib.Path('../videos')
output_folder = pathlib.Path('../output_L2CS')

# 确保输出目录存在，如果不存在则创建
output_folder.mkdir(parents=True, exist_ok=True)

CWD = pathlib.Path.cwd()
device = 'gpu:0'  
arch = 'ResNet50'
cudnn.enabled = True

# 2. 加载模型 (放在循环外，只加载一次，节省时间)
print("Loading model...")
gaze_pipeline = Pipeline(
    weights=CWD / 'models' / 'L2CSNet_gaze360.pkl',
    arch='ResNet50',
    device=select_device(device, batch_size=512),
    confidence_threshold = 0.8
)
print("Model loaded.")

# 3. 获取所有视频文件 (这里以 .mp4 为例，需要其他格式可以加)
# pathlib.Path.glob 支持通配符
video_files = list(input_folder.glob('*.mp4')) 
# 如果还有 .avi 或 .mov，可以用: video_files.extend(input_folder.glob('*.avi'))

if not video_files:
    print(f"No video files found in {input_folder}")
    exit()

print(f"Found {len(video_files)} videos to process.")

# 4. 循环处理每个视频
for i, video_file in enumerate(video_files):
    video_path = str(video_file)
    # 保持原文件名，只改变路径
    output_path = str(output_folder / video_file.name)
    
    print(f"[{i+1}/{len(video_files)}] Start processing: {video_file.name}")
    
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Cannot open video file: {video_path}, skipping...")
        continue

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS) # 有些视频FPS可能是浮点数
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Define video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    if not out.isOpened():
        print(f"Error: Cannot create video writer for: {output_path}, skipping...")
        cap.release()
        continue

    frame_count = 0

    with torch.no_grad():
        while True:
            success, frame = cap.read()    
            start_fps = time.time()  

            if not success:
                break

            frame_count += 1
            # 每隔50帧打印一次进度，避免刷屏太快
            if frame_count % 50 == 0:
                print(f"  -> Processing frame {frame_count}/{total_frames}")

            # Process frame - 添加异常处理
            try:
                results = gaze_pipeline.step(frame)
            except ValueError as e:
                if 'need at least one array to stack' in str(e):
                    print(f"  Warning: No face detected in frame {frame_count}, skipping...")
                    out.write(frame)  # 写入原始帧
                    continue
                else:
                    raise

            # Visualize output
            frame = render(frame, results)
        
            # Calculate and draw FPS
            end_time = time.time()
            time_diff = end_time - start_fps
            if time_diff > 0:
                myFPS = 1.0 / time_diff
            else:
                myFPS = 0.0
                
            cv2.putText(frame, 'FPS: {:.1f}'.format(myFPS), (10, 20),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)

            # Write frame to output video
            out.write(frame)

    cap.release()
    out.release()
    print(f"Finished {video_file.name}. Saved to: {output_path}\n")

print("All videos processed!")

Loading model...
Model loaded.
Found 433 videos to process.
[1/433] Start processing: video_83.mp4
  -> Processing frame 50/55
Finished video_83.mp4. Saved to: ../output_L2CS/video_83.mp4

[2/433] Start processing: video_139.mp4
Finished video_139.mp4. Saved to: ../output_L2CS/video_139.mp4

[3/433] Start processing: video_36.mp4
  -> Processing frame 50/94
Finished video_36.mp4. Saved to: ../output_L2CS/video_36.mp4

[4/433] Start processing: video_260.mp4
Finished video_260.mp4. Saved to: ../output_L2CS/video_260.mp4

[5/433] Start processing: video_70.mp4
  -> Processing frame 50/81
Finished video_70.mp4. Saved to: ../output_L2CS/video_70.mp4

[6/433] Start processing: video_177.mp4
Finished video_177.mp4. Saved to: ../output_L2CS/video_177.mp4

[7/433] Start processing: video_433.mp4
  -> Processing frame 50/60
Finished video_433.mp4. Saved to: ../output_L2CS/video_433.mp4

[8/433] Start processing: video_295.mp4
  -> Processing frame 50/72
Finished video_295.mp4. Saved to: ../outp