In [3]:
# Step 1: Setup and imports
import sys
import torch
from torch.utils.data import DataLoader, Subset
from pathlib import Path
import os
import cv2
import torch
import random
import pandas as pd
from torch.utils.data import Dataset
from torchvision import transforms
from glob import glob
import numpy as np
from pathlib import Path
from loaders.visual_loader import CelebDFVisualDataset
from encoders.visual_encoder import VisualEncoder

# Add src to path
sys.path.insert(0, str(Path.cwd()))

from loaders.image_loader import CelebDFImageDataset
from encoders.image_encoder import ImageEncoder

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [4]:
# Data path
root_dir = Path(r"D:\florida_coursework\third_sem\multimedia_expert_systems\multimedia_prototype\data\celeb_df")
print(f"Data directory: {root_dir}")
print(f"Exists: {data_root.exists()}")
test_list_file = root_dir / "List_of_testing_videos.txt"

test_videos = []
with open(test_list_file, "r") as f:
    cnt = 0
    for line in f:
        line = line.strip().strip(", '[]")
        if not line:
            continue
        # Each line format: "1 YouTube-real/00170.mp4"
        try:
            lbl_str, rel_path = line.split(maxsplit=1)
            lbl = int(lbl_str)
            test_videos.append({
                "rel_path": rel_path,
                "label": lbl
            })
        except ValueError as e:
            cnt += 1
            print(f"Skipping line due to parsing error: {line} ({e})")
            continue
    print("no. of skipped lines:" ,cnt)

# convert to DataFrame for easy lookup
test_df = pd.DataFrame(test_videos)

# gather all videos from dataset folders
all_videos = []
for subfolder in ["Celeb-real", "Celeb-synthesis", "YouTube-real"]:
    folder_path = root_dir / subfolder
    for vid in glob(str(folder_path / "*.mp4")):
        rel_path = f"{subfolder}/{Path(vid).name}"

        # Determine if video belongs to test set
        if rel_path in test_df["rel_path"].values:
            tag = "test"
            label = int(test_df.loc[test_df["rel_path"] == rel_path, "label"].values[0])
        else:
            # Assign label based on folder for train/val
            if subfolder in ["Celeb-real", "YouTube-real"]:
                label = 1  # real
            else:
                label = 0  # fake
            tag = "trainval"

        all_videos.append({
            "path": vid,
            "rel_path": rel_path,
            "label": label,
            "tag": tag
        })

df = pd.DataFrame(all_videos)
trainval = df[df["tag"] == "trainval"]
test = df[df["tag"] == "test"]


Data directory: D:\florida_coursework\third_sem\multimedia_expert_systems\multimedia_prototype\data\celeb_df
Exists: True
no. of skipped lines: 0


In [5]:
test_list = [
    {"path": row["path"], "label": row["label"]}
    for _, row in test.iterrows()
]
test_list[0]

{'path': 'D:\\florida_coursework\\third_sem\\multimedia_expert_systems\\multimedia_prototype\\data\\celeb_df\\Celeb-real\\id0_0001.mp4',
 'label': 1}

In [6]:
# Step 2: Load full dataset and create small test subset
print("Loading dataset...")
dataset = CelebDFImageDataset(root_dir=root_dir, video_list = test_list)

print(f"âœ“ Total videos in dataset: {len(dataset)}")
print(f"  Sample video paths: {dataset.video_paths[:3]}")
print(f"  Sample labels: {dataset.labels[:3]} (0=real, 1=fake)")

# # Create small test subset (10 videos)
# test_indices = list(range(500))
# dataset = Subset(full_dataset, test_indices)

# print(f"\nâœ“ Created test subset: {len(dataset)} videos")

Loading dataset...
âœ“ Total videos in dataset: 518
  Sample video paths: ['D:\\florida_coursework\\third_sem\\multimedia_expert_systems\\multimedia_prototype\\data\\celeb_df\\Celeb-real\\id0_0001.mp4', 'D:\\florida_coursework\\third_sem\\multimedia_expert_systems\\multimedia_prototype\\data\\celeb_df\\Celeb-real\\id10_0001.mp4', 'D:\\florida_coursework\\third_sem\\multimedia_expert_systems\\multimedia_prototype\\data\\celeb_df\\Celeb-real\\id10_0007.mp4']
  Sample labels: [1, 1, 1] (0=real, 1=fake)


In [7]:
# Step 3: Test single sample - face extraction and preprocessing
print("Testing single sample face extraction...")
sample_img, sample_label = dataset[0]

print(f"âœ“ Extracted face tensor: {sample_img.shape}")
print(f"  Expected shape: (3, 224, 224) - RGB image")
print(f"  Data range: [{sample_img.min():.3f}, {sample_img.max():.3f}]")
print(f"  Label: {sample_label.item()} ({'real' if sample_label.item() == 0 else 'fake'})")

# Check if face was detected (non-zero tensor)
if sample_img.sum() == 0:
    print("âš  Warning: Face not detected in this video (zeros returned)")
else:
    print("âœ“ Face successfully extracted and preprocessed!")

Testing single sample face extraction...
âœ“ Extracted face tensor: torch.Size([3, 224, 224])
  Expected shape: (3, 224, 224) - RGB image
  Data range: [0.004, 0.941]
  Label: 1 (fake)
âœ“ Face successfully extracted and preprocessed!


In [8]:
# Step 4: Create DataLoader for batch processing
print("Creating DataLoader...")
test_loader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=False,
    num_workers=0  # Use 0 for Windows to avoid multiprocessing issues
)

print(f"âœ“ Created DataLoader with {len(test_loader)} batches")

# Test one batch
batch_imgs, batch_labels = next(iter(test_loader))
print(f"\nBatch content:")
print(f"  - Images: {batch_imgs.shape}")
print(f"  - Labels: {batch_labels.shape}, values: {batch_labels.tolist()}")

Creating DataLoader...
âœ“ Created DataLoader with 130 batches

Batch content:
  - Images: torch.Size([4, 3, 224, 224])
  - Labels: torch.Size([4]), values: [1, 1, 1, 1]


In [9]:
# Step 5: Initialize ImageEncoder
print("Loading ImageEncoder (ResNet50)...")
embed_dim = 512  # Match audio encoder output for multimodal fusion

encoder = ImageEncoder(embed_dim=embed_dim).to(device)
encoder.eval()  # Set to evaluation mode

print(f"âœ“ ImageEncoder loaded on {device}")
print(f"  Backbone: ResNet50 (pretrained on ImageNet)")
print(f"  Output dimension: {embed_dim}")
print(f"  Total parameters: {sum(p.numel() for p in encoder.parameters()):,}")

Loading ImageEncoder (ResNet50)...
âœ“ ImageEncoder loaded on cuda
  Backbone: ResNet50 (pretrained on ImageNet)
  Output dimension: 512
  Total parameters: 24,558,144


In [10]:
# Step 6: Generate embeddings for all test batches
print("Generating image embeddings...")
all_embeddings = []
all_labels = []

with torch.no_grad():
    for i, (imgs, labels) in enumerate(test_loader):
        # Move batch to device
        imgs = imgs.to(device)
        
        # Generate embeddings
        embeddings = encoder(imgs)
        
        # Store results
        all_embeddings.append(embeddings.cpu())
        all_labels.append(labels)
        
        print(f"  Batch {i+1}/{len(test_loader)}: "
              f"images {imgs.shape} â†’ embeddings {embeddings.shape}")

# Concatenate all batches
all_embeddings = torch.cat(all_embeddings, dim=0)
all_labels = torch.cat(all_labels, dim=0)

print(f"\nâœ“ Final Results:")
print(f"  Total embeddings: {all_embeddings.shape}")
print(f"  Total labels: {all_labels.shape}")
print(f"  Labels distribution: real={(all_labels==0).sum().item()}, fake={(all_labels==1).sum().item()}")
print(f"\nðŸŽ‰ Image embeddings ready for multimodal fusion!")

Generating image embeddings...
  Batch 1/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 2/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 3/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 4/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 5/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 6/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 7/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 8/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 9/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 10/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 11/130: images torch.Size([4, 3, 224, 224]) â†’ embeddings torch.Size([4, 512])
  Batch 12

In [11]:
save_path = "embeddings/test_image_embeddings.pt"
torch.save({
    "embeddings": all_embeddings,   # (N, D)
    "labels": all_labels            # (N,)
}, save_path)

print("Saved:", save_path)


Saved: embeddings/test_image_embeddings.pt
