In [1]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("Device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device() if torch.cuda.is_available() else "None")
!nvidia-smi


CUDA available: True
Device count: 1
Current device: 0


  from .autonotebook import tqdm as notebook_tqdm


Mon Apr 28 22:07:40 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.14                 Driver Version: 566.14         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   41C    P8             14W /   50W |     970MiB /   6144MiB |     15%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [18]:
import cv2
import torch
import clip
import numpy as np
from PIL import Image
from typing import List

# 设备选择
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# 加载模型和预处理器
model, preprocess = clip.load("ViT-B/32", device=device)

model.float()

# 情绪提示词
emotion_labels = [
    # 喜 Joy
    "Joy: a vibrant scene with warm sunlight, blooming flowers, bright colors, and a peaceful landscape that evokes happiness.A dynamic and energetic scene full of movement, bright lights, fireworks, and visual excitement like a festival or concert",

    # 怒 Anger
    "Anger: a dramatic and chaotic environment with dark clouds, aggressive fire, broken structures, and intense tension in the atmosphere",

    # 哀 Sadness
    "Sadness: a lonely and desaturated scene of a rainy day, empty streets, or a quiet foggy forest that feels melancholic and heavy",

    # 害怕 Fear
    "Fear: a dark and eerie environment with shadows, fog, abandoned buildings, or looming shapes that create a strong sense of danger, suspense, or unease",
    
    "Excited: A fast-paced and thrilling electronic track with intense beats and energetic drops, reflecting the excitement and adrenaline of competitive gameplay."
    
    # 宁静 Inner Peace
    "Inner Peace: a calm and tranquil natural scene with soft sunlight, still water, open space, gentle hills or mountains, and a feeling of deep serenity"
]
text_prompts = [f"a scene evoking {e}" for e in emotion_labels]
text_tokens = clip.tokenize(text_prompts).to(device)

Using device: cuda


In [15]:
def extract_frames(video_path: str, interval_sec: float = 1.0) -> List[Image.Image]:
    """
    每 interval_sec 截取一帧，返回 PIL 图像列表
    """
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(fps * interval_sec)
    frames = []

    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % frame_interval == 0:
            image_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            frames.append(image_pil)
        frame_idx += 1

    cap.release()
    return frames

In [19]:
def predict_emotion_for_images(images: List[Image.Image], model, preprocess, text_features, device="cpu") -> np.ndarray:
    """
    对图像列表进行情绪识别，返回每帧情绪得分组成的数组 shape=[num_frames, num_emotions]
    """
    all_probs = []
    with torch.no_grad():
        for img in images:
            image_input = preprocess(img).unsqueeze(0).to(device)
            image_features = model.encode_image(image_input)
            logits_per_image = (image_features @ text_features.T).softmax(dim=-1)
            all_probs.append(logits_per_image.cpu().numpy()[0])
    return np.array(all_probs)

def analyze_video_emotion(video_path: str, model, preprocess, device="cpu") -> None:
    """
    主函数：输入视频路径，输出分析结果
    """
    print(f"⏳ 正在处理视频: {video_path}")
    # 1. 编码文本特征（只做一次）
    text_tokens = clip.tokenize(text_prompts).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_tokens)

    # 2. 提取帧
    frames = extract_frames(video_path, interval_sec=1.0)
    print(f"✅ 提取帧数: {len(frames)}")

    if not frames:
        print("⚠️ 未提取到帧，检查视频文件是否有效")
        return

    # 3. 图像预测
    all_probs = predict_emotion_for_images(frames, model, preprocess, text_features, device=device)

    # 4. 汇总分析
    avg_probs = all_probs.mean(axis=0)

    # 5. 输出
    print("\n📊 视频情绪分布：")
    for emotion, score in zip(emotion_labels, avg_probs):
        # 只打印第一个冒号前面的情感名称
        emotion_name = emotion.split(":")[0]
        print(f"{emotion_name:<15}: {score:.3f}")
    
    print("\n🌟 推测视频情感基调：", emotion_labels[np.argmax(avg_probs)].split(":")[0])


In [20]:
analyze_video_emotion("fixed_test.mp4", model, preprocess, device=device)

⏳ 正在处理视频: fixed_test.mp4
✅ 提取帧数: 43

📊 视频情绪分布：
Joy            : 0.012
Anger          : 0.008
Sadness        : 0.001
Fear           : 0.457
Excited        : 0.522

🌟 推测视频情感基调： Excited


In [None]:
analyze_video_emotion("tree.mp4", model, preprocess, device=device)

⏳ 正在处理视频: tree.mp4
✅ 提取帧数: 46

📊 视频情绪分布：
Joy            : 0.074
Anger          : 0.010
Sadness        : 0.090
Excitement     : 0.332
Inner Peace    : 0.493

🌟 推测视频情感基调： Inner Peace


In [None]:
analyze_video_emotion("./output/fear_2.mp4", model, preprocess, device=device)

⏳ 正在处理视频: ./output/fear_2.mp4
✅ 提取帧数: 90

📊 视频情绪分布：
Joy            : 0.043
Anger          : 0.027
Sadness        : 0.000
Fear           : 0.929
Inner Peace    : 0.000

🌟 推测视频情感基调： Fear


In [None]:
analyze_video_emotion("./output/愤怒_7.mp4", model, preprocess, device=device)

⏳ 正在处理视频: ./output/愤怒_7.mp4
✅ 提取帧数: 90

📊 视频情绪分布：
Joy            : 0.714
Anger          : 0.025
Sadness        : 0.012
Fear           : 0.248
Inner Peace    : 0.001

🌟 推测视频情感基调： Joy


In [None]:
analyze_video_emotion("./output/愤怒_8.mp4", model, preprocess, device=device)

⏳ 正在处理视频: ./output/愤怒_8.mp4
✅ 提取帧数: 90

📊 视频情绪分布：
Joy            : 0.372
Anger          : 0.013
Sadness        : 0.008
Fear           : 0.606
Inner Peace    : 0.001

🌟 推测视频情感基调： Fear
