In [6]:
import os
import cv2
import numpy as np
import pickle
import random
from multiprocessing import Pool, cpu_count

# Cấu hình thư mục
data_dir = './'  # Thư mục chứa dữ liệu
output_dir = './processed_frames'
os.makedirs(output_dir, exist_ok=True)

# Bản đồ cảm xúc
emotions_map = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fear', '07': 'disgust', '08': 'surprise'
}

# Lấy faces ngẫu nhiên từ video
def extract_random_faces(video_path, max_faces=10, frame_size=(64, 64), sample_frames=5):
    faces = []
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames < 2:
        cap.release()
        return faces

    frame_indices = sorted(random.sample(range(total_frames), min(sample_frames, total_frames)))

    for i in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            continue
        small = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
        gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY)
        faces_detected = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
        if len(faces_detected) > 0:
            (x, y, w, h) = max(faces_detected, key=lambda rect: rect[2] * rect[3])
            x, y, w, h = x*2, y*2, w*2, h*2
            face = frame[y:y+h, x:x+w]
            face = cv2.resize(face, frame_size)
            faces.append(face)
        if len(faces) >= max_faces:
            break

    cap.release()
    return faces

# Hàm xử lý 1 video (dùng cho multiprocessing)
def process_video(args):
    subfolder_path, file = args
    emotion_code = file.split('-')[2]
    emotion = emotions_map.get(emotion_code)
    if not emotion:
        return []

    video_path = os.path.join(subfolder_path, file)
    faces = extract_random_faces(video_path)

    results = []
    for idx, face in enumerate(faces):
        out_name = f"{file.replace('.mp4', '')}_frame{idx}.jpg"
        output_path = os.path.join(output_dir, out_name)
        if not os.path.exists(output_path):
            cv2.imwrite(output_path, cv2.cvtColor(face, cv2.COLOR_RGB2BGR))
        results.append((output_path, emotion))
    return results

# Tìm tất cả video cần xử lý
all_videos = []
for actor_folder in os.listdir(data_dir):
    actor_path = os.path.join(data_dir, actor_folder)
    if os.path.isdir(actor_path) and ('Video_Speech_Actor_' in actor_folder or 'Video_Song_Actor_' in actor_folder):
        for subfolder in os.listdir(actor_path):
            subfolder_path = os.path.join(actor_path, subfolder)
            if os.path.isdir(subfolder_path) and subfolder.startswith('Actor_'):
                for file in os.listdir(subfolder_path):
                    if file.endswith('.mp4'):
                        all_videos.append((subfolder_path, file))

print(f"Đang xử lý {len(all_videos)} video bằng {cpu_count()} core...")

# Dùng multiprocessing để tăng tốc
all_results = []
with Pool(processes=cpu_count()) as pool:
    for result in pool.imap_unordered(process_video, all_videos):
        all_results.extend(result)

print(f"Xử lý xong, tổng số ảnh: {len(all_results)}")

# Chia nhỏ và lưu ra nhiều file pkl
batch_size = 1000
for i in range(0, len(all_results), batch_size):
    batch = all_results[i:i+batch_size]
    image_paths, labels = zip(*batch)
    with open(f"image_paths_labels_batch{i//batch_size + 1}.pkl", "wb") as f:
        pickle.dump((image_paths, labels), f)
    print(f"Đã lưu batch {i//batch_size + 1} với {len(batch)} ảnh.")



Đang xử lý 4904 video bằng 12 core...


RuntimeError: Could not infer dtype of numpy.float32

In [4]:
import torch
'cuda' if torch.cuda.is_available() else 'cpu'

'cuda'