In [6]:
import os

def list_files(directory, output_file):
    with open(output_file, "w", encoding="utf-8") as f:
        for root, _, files in os.walk(directory):
            for file in files:
                file_path = os.path.join(root, file)
                f.write(file_path + "\n")

# Example usage
directory_path = "E:/dataset/happy/"
output_file = "file_list.txt"
list_files(directory_path, output_file)

print(f"File list saved to {output_file}")


File list saved to file_list.txt


In [7]:
import os

# Define your dataset paths
output_real_faces = "E:/dataset/happy/real"
output_optical_flow = "E:/dataset/happy/optical_flow"
output_edges = "E:/dataset/happy/edges"

# Check number of files in each folder
face_files = len([f for f in os.listdir(output_real_faces) if f.endswith(".jpg")])
flow_files = len([f for f in os.listdir(output_optical_flow) if f.endswith(".jpg")])
edge_files = len([f for f in os.listdir(output_edges) if f.endswith(".jpg")])

print(f"✅ Face images: {face_files}")
print(f"✅ Optical Flow images: {flow_files}")
print(f"✅ Edge images: {edge_files}")


✅ Face images: 955
✅ Optical Flow images: 954
✅ Edge images: 954


In [8]:
import os

# Paths
output_real_faces = "E:/dataset/happy/real"
output_optical_flow = "E:/dataset/happy/optical_flow"
output_edges = "E:/dataset/happy/edges"

# Get file lists without extensions
face_files = {f.replace(".jpg", "") for f in os.listdir(output_real_faces) if f.endswith(".jpg")}
flow_files = {f.replace("_flow.jpg", "") for f in os.listdir(output_optical_flow) if f.endswith(".jpg")}
edge_files = {f.replace("_edges.jpg", "") for f in os.listdir(output_edges) if f.endswith(".jpg")}

# Find missing files
missing_flow = face_files - flow_files
missing_edge = face_files - edge_files

print(f"❌ Missing Optical Flow for: {missing_flow}")
print(f"❌ Missing Edge Map for: {missing_edge}")


❌ Missing Optical Flow for: {'camera_leftfront_M004_light_up_happy_camera_leftfront.mp4_frame57_face0'}
❌ Missing Edge Map for: {'camera_leftfront_M004_light_up_happy_camera_leftfront.mp4_frame57_face0'}


In [4]:
import os
import cv2
import numpy as np
import random
from tqdm import tqdm

def load_precomputed_data(face_dir, flow_dir, edge_dir, label, condition_string, max_samples=15000):
    """Loads images in smaller batches to avoid memory overload, with debug prints."""
    x_data = []
    y_data = []
    count = 0

    print(f"\n🔄 Loading data from: {face_dir} (Label: {label}) - Filtering for '{condition_string}'")
    
    # Get list of relevant files
    face_files = [f for f in os.listdir(face_dir) if condition_string in f and f.endswith(".jpg")]

    if not face_files:
        print(f"⚠️ No '{condition_string}' images found in {face_dir}! Check filenames.")
        return np.array([]), np.array([])

    # Shuffle and limit to max_samples
    random.shuffle(face_files)
    face_files = face_files[:max_samples]

    batch_size = 1000  # Process in smaller batches
    for i in range(0, len(face_files), batch_size):
        batch_files = face_files[i:i+batch_size]

        batch_x = []
        for face_file in tqdm(batch_files, desc=f"Processing {face_dir} [{i}/{len(face_files)}]", unit="img"):
            base_name = os.path.splitext(face_file)[0].replace("_face", "")

            face_path = os.path.join(face_dir, face_file)
            flow_path = os.path.join(flow_dir, base_name + "_flow.jpg")
            edge_path = os.path.join(edge_dir, base_name + "_edges.jpg")

            # Check if all required files exist
            missing_files = []
            if not os.path.exists(face_path):
                missing_files.append("Face")
            if not os.path.exists(flow_path):
                missing_files.append("Optical Flow")
            if not os.path.exists(edge_path):
                missing_files.append("Edge Map")

            if missing_files:
                print(f"❌ Missing {', '.join(missing_files)} for {face_file}")
                continue  # Skip this file if any modality is missing

            try:
                # Load images and resize
                face_img = cv2.resize(cv2.imread(face_path), (299, 299))
                flow_img = cv2.resize(cv2.imread(flow_path), (299, 299))
                edge_img = cv2.resize(cv2.imread(edge_path), (299, 299))

                # Normalize and convert to float32 to save memory
                face_img = face_img.astype(np.float32) / 255.0
                flow_img = flow_img.astype(np.float32) / 255.0
                edge_img = edge_img.astype(np.float32) / 255.0

                # Concatenate into a 9-channel image
                combined_input = np.concatenate((face_img, flow_img, edge_img), axis=-1)

                batch_x.append(combined_input)
                y_data.append(label)
                count += 1  # Increment count only if the sample was successfully created

                print(f"✅ Successfully created sample: {face_file}")

            except Exception as e:
                print(f"⚠️ Error processing {face_file}: {e}")

        if batch_x:
            x_data.extend(batch_x)

    if count == 0:
        print(f"⚠️ No valid samples found in {face_dir}. Please check file names and paths!\n")

    print(f"✅ Finished loading {count} images from {face_dir}.\n")
    return np.array(x_data, dtype=np.float32), np.array(y_data, dtype=np.int8)  # Convert to smaller dtype

# Load in smaller batches
print("🔄 Starting to load precomputed datasets...\n")
real_x, real_y = load_precomputed_data("E:/dataset/happy/real", "E:/dataset/happy/optical_flow", "E:/dataset/happy/edges", label=0, condition_string="", max_samples=239)
fake_x, fake_y = load_precomputed_data("E:/dataset/fake", "E:/dataset/optical_flow", "E:/dataset/edges", label=1, condition_string="", max_samples=239)

# Ensure we loaded data
if real_x.size == 0 or fake_x.size == 0:
    print("❌ ERROR: No valid images loaded! Fix errors before continuing.")
    exit()

# **Use np.memmap to write data in chunks to avoid RAM overuse**
save_path = "E:/dataset/happy/preprocessed_data_happy.npz"
print("💾 Saving preprocessed data in chunks to avoid memory issues...")

# Define shapes
total_samples = real_x.shape[0] + fake_x.shape[0]
image_shape = (299, 299, 9)

# Create a memory-mapped file
x_data_memmap = np.memmap(save_path.replace(".npz", "_x.dat"), dtype=np.float32, mode="w+", shape=(total_samples, *image_shape))
y_data_memmap = np.memmap(save_path.replace(".npz", "_y.dat"), dtype=np.int8, mode="w+", shape=(total_samples,))

# Write in chunks
x_data_memmap[:real_x.shape[0]] = real_x
x_data_memmap[real_x.shape[0]:] = fake_x
y_data_memmap[:real_y.shape[0]] = real_y
y_data_memmap[real_y.shape[0]:] = fake_y

# Flush to disk
del x_data_memmap, y_data_memmap  # Ensure it's saved before creating NPZ file

# Save final NPZ file with memory-efficient data
np.savez_compressed(save_path, x=np.memmap(save_path.replace(".npz", "_x.dat"), dtype=np.float32, mode="r", shape=(total_samples, *image_shape)), 
                                  y=np.memmap(save_path.replace(".npz", "_y.dat"), dtype=np.int8, mode="r", shape=(total_samples,)))

print("✅ Data saved successfully!")


🔄 Starting to load precomputed datasets...


🔄 Loading data from: E:/dataset/happy/real (Label: 0) - Filtering for ''


Processing E:/dataset/happy/real [0/239]: 100%|██████████| 239/239 [00:00<00:00, 2861.69img/s]

❌ Missing Optical Flow, Edge Map for camera_front_M004_light_up_happy_camera_front.mp4_frame165_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_front_M004_light_up_happy_camera_front.mp4_frame45_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_front_M004_light_up_happy_camera_front.mp4_frame207_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_front_M004_light_up_happy_camera_front.mp4_frame127_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_front_M004_light_up_happy_camera_front.mp4_frame159_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_right_M004_light_up_happy_camera_right.mp4_frame42_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_front_M004_light_up_happy_camera_front.mp4_frame120_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_right_M004_light_up_happy_camera_right.mp4_frame14_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_front_M004_light_up_happy_camera_front.mp4_frame68_face0.jpg
❌ Missing Optical Flow, Edge Map for camera_front_


Processing E:/dataset/fake [0/239]:   0%|          | 1/239 [00:00<03:21,  1.18img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_506_W134.mp4_frame92_face0.jpg


Processing E:/dataset/fake [0/239]:   1%|          | 2/239 [00:01<03:23,  1.16img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_525_M101.mp4_frame75_face0.jpg


Processing E:/dataset/fake [0/239]:   1%|▏         | 3/239 [00:02<03:19,  1.18img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_159_M005.mp4_frame284_face0.jpg


Processing E:/dataset/fake [0/239]:   2%|▏         | 4/239 [00:03<03:13,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_555_W040.mp4_frame82_face0.jpg


Processing E:/dataset/fake [0/239]:   2%|▏         | 5/239 [00:04<03:12,  1.21img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_819_W135.mp4_frame766_face0.jpg


Processing E:/dataset/fake [0/239]:   3%|▎         | 6/239 [00:04<03:10,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_589_W112.mp4_frame174_face0.jpg


Processing E:/dataset/fake [0/239]:   3%|▎         | 7/239 [00:05<03:08,  1.23img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_128_M124.mp4_frame487_face0.jpg


Processing E:/dataset/fake [0/239]:   3%|▎         | 8/239 [00:06<03:09,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_349_W131.mp4_frame344_face0.jpg


Processing E:/dataset/fake [0/239]:   4%|▍         | 9/239 [00:07<03:08,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_133_M132.mp4_frame545_face0.jpg


Processing E:/dataset/fake [0/239]:   4%|▍         | 10/239 [00:08<03:06,  1.23img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_181_M007.mp4_frame184_face0.jpg


Processing E:/dataset/fake [0/239]:   5%|▍         | 11/239 [00:09<03:08,  1.21img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_472_W028.mp4_frame389_face0.jpg


Processing E:/dataset/fake [0/239]:   5%|▌         | 12/239 [00:09<03:09,  1.20img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_903_M029.mp4_frame221_face0.jpg


Processing E:/dataset/fake [0/239]:   5%|▌         | 13/239 [00:10<03:02,  1.24img/s]

✅ Successfully created sample: end_to_end_569_M115.mp4_frame1_face0.jpg


Processing E:/dataset/fake [0/239]:   6%|▌         | 14/239 [00:11<03:02,  1.23img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_773_W028.mp4_frame527_face0.jpg


Processing E:/dataset/fake [0/239]:   6%|▋         | 15/239 [00:12<03:04,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_547_M109.mp4_frame79_face0.jpg


Processing E:/dataset/fake [0/239]:   7%|▋         | 16/239 [00:13<03:01,  1.23img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_437_W023.mp4_frame368_face0.jpg


Processing E:/dataset/fake [0/239]:   7%|▋         | 17/239 [00:13<02:58,  1.24img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_003_M101.mp4_frame120_face0.jpg


Processing E:/dataset/fake [0/239]:   8%|▊         | 18/239 [00:14<02:56,  1.25img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_038_W009.mp4_frame151_face0.jpg


Processing E:/dataset/fake [0/239]:   8%|▊         | 19/239 [00:15<02:55,  1.25img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_992_W012.mp4_frame412_face0.jpg


Processing E:/dataset/fake [0/239]:   8%|▊         | 20/239 [00:16<02:55,  1.24img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_118_W132.mp4_frame302_face0.jpg


Processing E:/dataset/fake [0/239]:   9%|▉         | 21/239 [00:17<02:58,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_823_M018.mp4_frame659_face0.jpg


Processing E:/dataset/fake [0/239]:   9%|▉         | 22/239 [00:17<02:57,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_786_W133.mp4_frame117_face0.jpg


Processing E:/dataset/fake [0/239]:  10%|▉         | 23/239 [00:18<02:43,  1.32img/s]

✅ Successfully created sample: end_to_end_739_W022.mp4_frame29_face0.jpg


Processing E:/dataset/fake [0/239]:  10%|█         | 24/239 [00:19<02:45,  1.30img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_686_W015.mp4_frame236_face0.jpg


Processing E:/dataset/fake [0/239]:  10%|█         | 25/239 [00:20<02:46,  1.28img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_869_W040.mp4_frame138_face7.jpg


Processing E:/dataset/fake [0/239]:  11%|█         | 26/239 [00:21<02:51,  1.24img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_832_W036.mp4_frame239_face0.jpg


Processing E:/dataset/fake [0/239]:  11%|█▏        | 27/239 [00:21<02:54,  1.22img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_780_W029.mp4_frame151_face0.jpg


Processing E:/dataset/fake [0/239]:  12%|█▏        | 28/239 [00:22<02:51,  1.23img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_184_W031.mp4_frame428_face0.jpg


Processing E:/dataset/fake [0/239]:  12%|█▏        | 29/239 [00:23<02:49,  1.24img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_844_M134.mp4_frame395_face0.jpg


Processing E:/dataset/fake [0/239]:  13%|█▎        | 30/239 [00:24<02:49,  1.24img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_121_W021.mp4_frame408_face0.jpg


Processing E:/dataset/fake [0/239]:  13%|█▎        | 31/239 [00:25<02:48,  1.24img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_810_W134.mp4_frame844_face0.jpg


Processing E:/dataset/fake [0/239]:  13%|█▎        | 32/239 [00:26<02:48,  1.23img/s]

❌ Missing Optical Flow, Edge Map for end_to_end_871_W041.mp4_frame86_face0.jpg





KeyboardInterrupt: 