In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install torch torchvision pillow numpy tqdm



In [None]:
import os
import numpy as np
from PIL import Image
import torch
from torchvision import models, transforms
from tqdm import tqdm

In [None]:
data_dir = "/content/drive/MyDrive/Dataset"
save_dir = "/content/resnet_embeddings"
os.makedirs(save_dir, exist_ok=True)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

resnet = models.resnet50(weights="IMAGENET1K_V2")
resnet.fc = torch.nn.Identity()   # remove classifier to get embeddings
resnet = resnet.to(device)
resnet.eval()

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:01<00:00, 80.6MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
preprocess = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
def get_resnet_embedding(img_path):
    try:
        img = Image.open(img_path).convert("RGB")
        img = preprocess(img).unsqueeze(0).to(device)

        with torch.no_grad():
            emb = resnet(img).cpu().numpy().flatten()
        return emb
    except Exception as e:
        print(f"Error: {img_path}, {e}")
        return None

In [None]:
X = []
y = []

for label_name, label_val in [("real", 0), ("fake", 1)]:

    folder = os.path.join(data_dir, label_name)

    # Check if the folder exists
    if not os.path.exists(folder):
        print(f"Error: Folder '{folder}' not found. Skipping {label_name} images.")
        continue

    all_images = sorted(os.listdir(folder))

    # Take only first 25,000 images
    next_images = all_images[25000:50000]
    print(f"Processing {label_name}: {len(next_images)} images")

    for img_name in tqdm(next_images):
        img_path = os.path.join(folder, img_name)
        if not img_path.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        emb = get_resnet_embedding(img_path)
        if emb is not None:
            X.append(emb)
            y.append(label_val)

X = np.array(X)
y = np.array(y)

print("Final embedding shape:", X.shape)

Processing real: 25000 images


 71%|███████   | 17763/25000 [3:18:15<1:31:11,  1.32it/s]

In [None]:
np.save(os.path.join(save_dir, "X_resnet_25k_next.npy"), X)
np.save(os.path.join(save_dir, "y_resnet_25k_next.npy"), y)

print("Saved to:", save_dir)

In [None]:
import numpy as np

# Load .npy files
X = np.load(f"{save_dir}/X_resnet_25k_next.npy")
y = np.load(f"{save_dir}/y_resnet_25k_next.npy")

# Check shapes
print("Embeddings shape:", X.shape)
print("Labels shape:", y.shape)

# View a few embeddings
print("\nSample Embedding (first row):\n", X[0])
print("\nLabel for this embedding:", y[0])