In [4]:
import sys
path = "../../.."
if path not in sys.path:
    sys.path.insert(0, path)

In [5]:
from data_retrieval import lipade_groundtruth
from PIL import Image

import numpy as np
import torch
from torchvision import transforms
from scipy.spatial.distance import cosine
from torchvision.models import resnet18, resnet50, ResNet18_Weights, ResNet50_Weights
from torch.utils.data import DataLoader, Dataset

from tqdm import tqdm

corpus = "lipade_groundtruth"
resultsPath = "../results/distance/" + corpus + "/"

In [6]:
x,_,y = lipade_groundtruth.getDataset()
for i in range(len(x)):
    x[i] = Image.open(x[i]).convert('RGB')

In [7]:
class CustomImageDataset(Dataset):
    def __init__(self, images, transform=None):
        self.images = images
        self.transform = transform
    def __len__(self):
        return len(self.images)
    def __getitem__(self, idx):
        image = self.images[idx]
        if self.transform:
                image = self.transform(image)
        return image

def setup_transforms():
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
def custom_collate_fn(batch):
    batch = [b for b in batch if b[0] is not None]
    if not batch:
        return torch.tensor([]), []
    return torch.stack(batch, dim=0)

dataset = CustomImageDataset(x, setup_transforms())
dataloader = DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)

In [8]:
backbone_resnet18 = resnet18(weights=ResNet18_Weights.DEFAULT)
backbone_resnet18.eval()

backbone_resnet50 = resnet50(weights=ResNet50_Weights.DEFAULT)
backbone_resnet50.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
def extract_features(dataloader, model):
    feature2_list = []
    feature3_list = []
    feature4_list = []

    for images in tqdm(dataloader, desc="Extracting features"):
        if images.nelement() > 0:
            with torch.no_grad():
                x = model.conv1(images)
                x = model.bn1(x)
                x = model.relu(x)
                x = model.maxpool(x)
                stage2 = model.layer1(x)
                stage3 = model.layer2(stage2)
                stage4 = model.layer3(stage3)

            feature2_list.append(stage2.cpu().numpy())
            feature3_list.append(stage3.cpu().numpy())
            feature4_list.append(stage4.cpu().numpy())

    feature2_list =  np.vstack(feature2_list) if feature2_list else np.array([])
    feature3_list =  np.vstack(feature3_list) if feature3_list else np.array([])
    feature4_list =  np.vstack(feature4_list) if feature4_list else np.array([])

    return feature2_list, feature3_list, feature4_list

In [10]:
resnet18features = extract_features(dataloader, backbone_resnet18)

Extracting features: 100%|██████████| 9/9 [00:05<00:00,  1.68it/s]


In [11]:
resnet50features = extract_features(dataloader, backbone_resnet50)

Extracting features: 100%|██████████| 9/9 [00:11<00:00,  1.24s/it]


In [12]:
res18_2, res18_3, res18_4 = resnet18features
res50_2, res50_3, res50_4 = resnet50features

features = [res18_2, res18_3, res18_4, res50_2, res50_3, res50_4]
featureNames = ["resnet18_stage2", "resnet18_stage3", "resnet18_stage4", "resnet50_stage2", "resnet50_stage3", "resnet50_stage4"]

for i in range(len(features)):
    features[i] = features[i].reshape(len(y),-1)

In [13]:
distance = []
for i in range(len(features)):
    distance.append(np.zeros((len(y), len(y))))

for sim_index in range(len(features)):
    for i in tqdm(range(len(y)), desc="Similarity " + str(sim_index)):
        distance[sim_index][i,i] = 0.5 # Ignore self similarity (will be 1 with the matrix transpose)
        for j in range(i+1, len(y)):
            distance[sim_index][i,j] = cosine(features[sim_index][i], features[sim_index][j])

for i in range(len(features)):
    distance[i] = distance[i] + distance[i].T

Similarity 0:   0%|          | 0/279 [00:00<?, ?it/s]

Similarity 0: 100%|██████████| 279/279 [00:07<00:00, 37.02it/s] 
Similarity 1: 100%|██████████| 279/279 [00:02<00:00, 104.62it/s]
Similarity 2: 100%|██████████| 279/279 [00:01<00:00, 200.94it/s]
Similarity 3: 100%|██████████| 279/279 [00:33<00:00,  8.31it/s]
Similarity 4: 100%|██████████| 279/279 [00:17<00:00, 16.39it/s]
Similarity 5: 100%|██████████| 279/279 [00:07<00:00, 38.06it/s]


In [14]:
for i in range(len(features)):
    np.save(resultsPath + featureNames[i] + ".npy", distance[i])