In [1]:
!pip install torch torchvision tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curan

In [2]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
from pathlib import Path
from tqdm import tqdm


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
resnet = models.resnet50(pretrained=True)  
model = nn.Sequential(*list(resnet.children())[:-1])
model.to(device)
model.eval()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 212MB/s]


Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]
    ),
])


In [5]:
def getem(image_folder):
    image_paths = list(Path(image_folder).glob("*.*"))
    embeddings = []

    for path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        image = Image.open(path).convert("RGB")
        img_tensor = transform(image).unsqueeze(0).to(device)
        with torch.no_grad():
            emb = model(img_tensor).squeeze()
            emb = emb/emb.norm()
        embeddings.append(emb.cpu())
    
    embeddings = torch.stack(embeddings)
    return embeddings


In [6]:
def datasim(folder_a, folder_b):
    emb_A = getem(folder_a)
    emb_B = getem(folder_b)

    
    smtrx = emb_A@emb_B.T
    Amax = smtrx.max(dim=1).values
    Bmax = smtrx.max(dim=0).values
    score = (Amax.mean() + Bmax.mean())/2
    return score.item()


In [7]:
cfrcat = "/kaggle/input/cifar10/cifar10/test/cat"
cfrdog = "//kaggle/input/cifar10/cifar10/test/dog"
cifr = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/test/REAL"
cifg = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/test/FAKE"

scCatDog = datasim(cfrcat, cfrdog)
scRealGen = datasim(cifr, cifg)
print("Comparing Cat vs Dog: ")
print("Semantic Similarity (Cat vs Dog):", scCatDog)
print("\nComparing Real vs Generated CIFAKE: ")
print("Semantic Similarity (Real vs Generated):", scRealGen)

Processing /kaggle/input/cifar10/cifar10/test/cat: 100%|██████████| 1000/1000 [00:17<00:00, 56.19it/s]
Processing //kaggle/input/cifar10/cifar10/test/dog: 100%|██████████| 1000/1000 [00:16<00:00, 58.94it/s]
Processing /kaggle/input/cifake-real-and-ai-generated-synthetic-images/test/REAL: 100%|██████████| 10000/10000 [02:54<00:00, 57.44it/s]
Processing /kaggle/input/cifake-real-and-ai-generated-synthetic-images/test/FAKE: 100%|██████████| 10000/10000 [02:36<00:00, 63.93it/s]


Comparing Cat vs Dog: 
Semantic Similarity (Cat vs Dog): 0.8272014260292053

Comparing Real vs Generated CIFAKE: 
Semantic Similarity (Real vs Generated): 0.8742204904556274
