In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#!pip install facenet-pytorch timm torch torchvision numpy pillow tqdm

In [None]:
import os
import numpy as np
import torch
from torchvision import transforms
from facenet_pytorch import MTCNN
from PIL import Image
from tqdm import tqdm
import timm

In [None]:
data_dir = "/content/drive/MyDrive/combined_dataset_new"
save_dir = "/content/deepfake_embedding"
os.makedirs(save_dir, exist_ok=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cpu


In [None]:
# Face detector
mtcnn = MTCNN(image_size=224, margin=20, keep_all=False, device=device)

# Vision Transformer model for embeddings
vit_model = timm.create_model('vit_base_patch16_224', pretrained=True)
vit_model.head = torch.nn.Identity()  # remove classifier head
vit_model.to(device)
vit_model.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False

In [None]:
def get_vit_embedding(img_path):
    try:
        img = Image.open(img_path).convert('RGB')
        face = mtcnn(img)
        if face is None:
            return None

        # Face is already tensor (C,H,W)
        face = face.unsqueeze(0).to(device)
        face = (face - 0.5) / 0.5  # normalize between -1 and 1

        with torch.no_grad():
            emb = vit_model(face).cpu().numpy().flatten()
        return emb
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        return None

In [None]:
X, y = [], []

for label_name, label_val in [("Real", 0), ("Fake", 1)]:
    folder = os.path.join(data_dir, label_name)
    if not os.path.exists(folder):
        print(f"++Folder not found: {folder}")
        continue

    for img_name in tqdm(os.listdir(folder), desc=f"Processing {label_name}"):
        img_path = os.path.join(folder, img_name)
        if not img_path.lower().endswith((".jpg", ".png", ".jpeg")):
            continue

        emb = get_vit_embedding(img_path)
        if emb is not None:
            X.append(emb)
            y.append(label_val)

X = np.array(X)
y = np.array(y)

print("Extraction complete!")
print("Embeddings shape:", X.shape)
print("Labels shape:", y.shape)


Processing real: 100%|██████████| 500/500 [06:19<00:00,  1.32it/s]
Processing fake: 100%|██████████| 500/500 [07:12<00:00,  1.16it/s]

✅ Extraction complete!
Embeddings shape: (838, 768)
Labels shape: (838,)





In [None]:
np.save(os.path.join(save_dir, "X_vit_embeddings.npy"), X)
np.save(os.path.join(save_dir, "y_labels.npy"), y)
print("Embeddings saved to:", save_dir)

✅ Embeddings saved to: /content/deepfake_embedding


In [None]:
import numpy as np

# Path where you saved the embeddings
save_dir = "/content/deepfake_embedding"

# Load .npy files
X = np.load(f"{save_dir}/X_vit_embeddings.npy")
y = np.load(f"{save_dir}/y_labels.npy")

# Check shapes
print("Embeddings shape:", X.shape)
print("Labels shape:", y.shape)

# View a few embeddings
print("\nSample Embedding (first row):\n", X[0])
print("\nLabel for this embedding:", y[0])

Embeddings shape: (838, 768)
Labels shape: (838,)

Sample Embedding (first row):
 [ 2.49818134e+00 -3.06579518e+00 -1.56503034e+00 -3.53825986e-01
 -3.20551664e-01 -1.18795419e+00  1.92895365e+00  2.63427210e+00
  1.20990232e-01  3.40338796e-01  8.44277978e-01 -1.50059378e+00
  1.46957129e-01 -1.27068424e+00 -8.66069674e-01  4.56599444e-01
  6.53088391e-01 -1.26802838e+00 -1.56395924e+00  6.01584077e-01
  1.30887938e+00  1.38220215e+00 -7.86968887e-01 -1.53597808e+00
 -1.00858104e+00  6.88783705e-01  7.35350791e-03  8.92337859e-02
 -4.15933758e-01 -1.82200587e+00  2.04298735e+00 -1.91502631e+00
  8.89839947e-01 -2.04025340e+00 -4.38666135e-01  2.11892319e+00
 -6.50086284e-01  1.99009478e+00 -1.15953898e+00 -1.45665157e+00
  1.04546928e+00  2.04804271e-01  1.44774318e-01 -2.01874399e+00
 -2.60067511e+00 -1.23651636e+00 -3.77473402e+00 -8.22797358e-01
 -2.19426855e-01  1.13229893e-01 -3.36363971e-01  1.18089139e+00
 -1.95771599e+00  2.18909606e-01 -8.85927260e-01  1.88188028e+00
 -5.3087