In [1]:
import os
import torch
import torchvision
from torchvision import datasets
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor, Lambda
import matplotlib.pyplot as plt
import requests
from zipfile import ZipFile
from io import BytesIO
import numpy as np
import zipfile
import os


zip_file_path = r'C:\Users\nicol\Documents\PoliTo\AdvancedML\project\SPair-71k.zip' 
extract_dir = r'C:\Users\nicol\Documents\PoliTo\AdvancedML\project\SPair-71k_extracted'

# Crea la directory di estrazione se non esiste
os.makedirs(extract_dir, exist_ok=True)

# Estrai il file ZIP solo se esiste
if os.path.exists(zip_file_path):
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    print(f"File '{zip_file_path}' estratto con successo nella directory '{extract_dir}'")
    print(f"Contenuti della directory '{extract_dir}':\n{os.listdir(extract_dir)}")
else:
    print(f"File zip '{zip_file_path}' non trovato. Assicurati che il dataset sia estratto in '{extract_dir}'.")



File 'C:\Users\nicol\Documents\PoliTo\AdvancedML\project\SPair-71k.zip' estratto con successo nella directory 'C:\Users\nicol\Documents\PoliTo\AdvancedML\project\SPair-71k_extracted'
Contenuti della directory 'C:\Users\nicol\Documents\PoliTo\AdvancedML\project\SPair-71k_extracted':
['SPair-71k']


In [2]:
from PIL import Image
import glob
import json


class Normalize(object):
    def __init__(self, image_keys):
        self.image_keys = image_keys
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    def __call__(self, image):
        for key in self.image_keys:
            image[key] /= 255.0
            image[key] = self.normalize(image[key])
        return image


def read_img(path):
    img = np.array(Image.open(path).convert('RGB'))

    return torch.tensor(img.transpose(2, 0, 1).astype(np.float32))


class SPairDataset(Dataset):
    def __init__(self, pair_ann_path, layout_path, image_path, dataset_size, pck_alpha, datatype):

        self.datatype = datatype
        self.pck_alpha = pck_alpha
        self.ann_files = open(os.path.join(layout_path, dataset_size, datatype + '.txt'), "r").read().split('\n')
        self.ann_files = self.ann_files[:len(self.ann_files) - 1]
        self.pair_ann_path = pair_ann_path
        self.image_path = image_path
        self.categories = list(map(lambda x: os.path.basename(x), glob.glob('%s/*' % image_path)))
        self.categories.sort()
        self.transform = Normalize(['src_img', 'trg_img'])

    def __len__(self):
        return len(self.ann_files)

    def __getitem__(self, idx):
        
        raw_line = self.ann_files[idx]
        ann_filename = raw_line.replace(':', '_')
        ann_file = ann_filename + '.json'
        json_path = os.path.join(self.pair_ann_path, self.datatype, ann_file)

        with open(json_path) as f:
            annotation = json.load(f)

        category = annotation['category']
        src_img = read_img(os.path.join(self.image_path, category, annotation['src_imname']))
        trg_img = read_img(os.path.join(self.image_path, category, annotation['trg_imname']))

        trg_bbox = annotation['trg_bndbox']
        pck_threshold = max(trg_bbox[2] - trg_bbox[0],  trg_bbox[3] - trg_bbox[1]) * self.pck_alpha

        sample = {'pair_id': annotation['pair_id'],
                  'filename': annotation['filename'],
                  'src_imname': annotation['src_imname'],
                  'trg_imname': annotation['trg_imname'],
                  'src_imsize': src_img.size(),
                  'trg_imsize': trg_img.size(),

                  'src_bbox': annotation['src_bndbox'],
                  'trg_bbox': annotation['trg_bndbox'],
                  'category': annotation['category'],

                  'src_pose': annotation['src_pose'],
                  'trg_pose': annotation['trg_pose'],

                  'src_img': src_img,
                  'trg_img': trg_img,
                  'src_kps': torch.tensor(annotation['src_kps']).float(),
                  'trg_kps': torch.tensor(annotation['trg_kps']).float(),

                  'mirror': annotation['mirror'],
                  'vp_var': annotation['viewpoint_variation'],
                  'sc_var': annotation['scale_variation'],
                  'truncn': annotation['truncation'],
                  'occlsn': annotation['occlusion'],

                  'pck_threshold': pck_threshold}

        if self.transform:
            sample = self.transform(sample)

        return sample

if __name__ == '__main__':
    base_dir = r"C:\Users\nicol\Documents\PoliTo\AdvancedML\project\SPair-71k_extracted\SPair-71k\SPair-71k"    
    pair_ann_path = os.path.join(base_dir, 'PairAnnotation')
    layout_path = os.path.join(base_dir, 'Layout')
    image_path = os.path.join(base_dir, 'JPEGImages')
    dataset_size = 'large'
    pck_alpha = 0.1
    
    # Verifica che i percorsi esistano prima di creare il dataset
    if os.path.exists(pair_ann_path) and os.path.exists(layout_path) and os.path.exists(image_path):
        trn_dataset = SPairDataset(pair_ann_path, layout_path, image_path, dataset_size, pck_alpha, datatype='trn')
        val_dataset = SPairDataset(pair_ann_path, layout_path, image_path, dataset_size, pck_alpha, datatype='val')
        test_dataset = SPairDataset(pair_ann_path, layout_path, image_path, dataset_size, pck_alpha, datatype='test')

        trn_dataloader = DataLoader(trn_dataset, num_workers=0)
        val_dataloader = DataLoader(val_dataset, num_workers=0)
        test_dataloader = DataLoader(test_dataset, num_workers=0)
        print("Dataset caricati correttamente.")
    else:
        print(f"Errore: Impossibile trovare i percorsi del dataset in '{base_dir}'.\nVerifica l'estrazione e controlla se la struttura delle cartelle corrisponde.")

Dataset caricati correttamente.


In [11]:
import torch
import math 
import numpy as np
import torch.nn.functional as F
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# 1. SETUP
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Loading DINOv2 Model on {device}...")
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14')
model.to(device)
model.eval() 

output_dir = "final_analysis_results"
os.makedirs(output_dir, exist_ok=True)
done_classes = {'aeroplane': False, 'chair': False}

# 2. FUNCTIONS
def pad_to_multiple(x, k=14):
    h, w = x.shape[-2:]
    new_h = math.ceil(h / k) * k
    new_w = math.ceil(w / k) * k
    return F.pad(x, (0, new_w - w, 0, new_h - h), value=0)

def denormalize(img_tensor):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = img_tensor.cpu().squeeze(0).permute(1, 2, 0).numpy()
    return np.clip((img * std) + mean, 0, 1)

def compute_joint_pca(feat1, feat2, h1, w1, h2, w2):
    f1 = feat1[0].cpu().detach().numpy()
    f2 = feat2[0].cpu().detach().numpy()
    
    # Fit PCA on combined features so colors mean the same thing
    pca = PCA(n_components=3)
    pca.fit(np.concatenate((f1, f2), axis=0))
    
    p1 = pca.transform(f1)
    p2 = pca.transform(f2)
    
    p_all = np.concatenate((p1, p2), axis=0)
    p_min, p_max = p_all.min(0), p_all.max(0)
    
    # Normalize and reshape
    img1 = ((p1 - p_min) / (p_max - p_min)).reshape(h1, w1, 3)
    img2 = ((p2 - p_min) / (p_max - p_min)).reshape(h2, w2, 3)
    return img1, img2

# 3. MAIN LOOP
print("Starting PCA-only loop...")
with torch.no_grad():
    for i, data in enumerate(tqdm(test_dataloader, desc="Scanning")):
        category = data['category'][0]
        if category not in done_classes or done_classes[category]: continue

        # A. PREPARE
        src_img, trg_img = data['src_img'].to(device), data['trg_img'].to(device)
        
        src_pad, trg_pad = pad_to_multiple(src_img, 14), pad_to_multiple(trg_img, 14)
        h_g_s, w_g_s = src_pad.shape[-2] // 14, src_pad.shape[-1] // 14
        h_g_t, w_g_t = trg_pad.shape[-2] // 14, trg_pad.shape[-1] // 14

        # n=3 fetches the last 4 blocks: [Layer 10, Layer 11, Layer 12]
        out_s = model.get_intermediate_layers(src_pad, n=3, reshape=False, return_class_token=False)
        out_t = model.get_intermediate_layers(trg_pad, n=3, reshape=False, return_class_token=False)
        
        f_s_9, f_t_9 = F.normalize(out_s[0], p=2, dim=-1), F.normalize(out_t[0], p=2, dim=-1)
        f_s_11, f_t_11 = F.normalize(out_s[2], p=2, dim=-1), F.normalize(out_t[2], p=2, dim=-1)

        # C. VISUALIZATION
        pca_s_9, pca_t_9 = compute_joint_pca(f_s_9, f_t_9, h_g_s, w_g_s, h_g_t, w_g_t)
        _, pca_t_11 = compute_joint_pca(f_s_11, f_t_11, h_g_s, w_g_s, h_g_t, w_g_t)
        
        fig, ax = plt.subplots(2, 3, figsize=(18, 10))
        plt.subplots_adjust(hspace=0.2, wspace=0.1)
        img_s, img_t = denormalize(src_img), denormalize(trg_img)

        # Plotting
        # Row 1: Original Images
        ax[0, 0].imshow(img_s)
        ax[0, 0].set_title(f"SOURCE ({category})\nOriginal Image", fontsize=14)
        
        ax[0, 1].imshow(img_t)
        ax[0, 1].set_title("TARGET\nOriginal Image", fontsize=14)
        
        ax[0, 2].axis('off') # Empty slot

        # Row 2: PCA Visualization (Using bicubic for paper-like look)
        ax[1, 0].imshow(pca_s_9, interpolation='bicubic')
        ax[1, 0].set_title("Source PCA (Layer 10)\nGeometric Features", fontsize=14)
        
        ax[1, 1].imshow(pca_t_9, interpolation='bicubic')
        ax[1, 1].set_title("Target PCA (Layer 10)\nShould Match Source Colors", fontsize=14)
        
        ax[1, 2].imshow(pca_t_11, interpolation='bicubic')
        ax[1, 2].set_title("Target PCA (Last Layer)\nSemantic Collapse", fontsize=14)
        
        for a in ax.flatten(): a.axis('off')
        
        save_path = os.path.join(output_dir, f"PCA_ANALYSIS_{category}.png")
        plt.savefig(save_path, bbox_inches='tight')
        plt.close(fig)
        print(f"--> Saved PCA Analysis to: {save_path}")
        
        done_classes[category] = True
        if done_classes['aeroplane'] and done_classes['chair']: break

print("Done.")

Loading DINOv2 Model on cuda...


Using cache found in C:\Users\nicol/.cache\torch\hub\facebookresearch_dinov2_main


Starting PCA-only loop...


Scanning:   0%|          | 14/12234 [00:00<09:12, 22.11it/s] 

--> Saved PCA Analysis to: final_analysis_results\PCA_ANALYSIS_aeroplane.png


Scanning:  44%|████▍     | 5422/12234 [00:49<01:02, 108.56it/s]

--> Saved PCA Analysis to: final_analysis_results\PCA_ANALYSIS_chair.png
Done.



