In [None]:
import os
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
from PIL import Image
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import zipfile

In [None]:
# Base paths
zip_path = "../celebA/celeba/img_align_celeba.zip"
real_10k = "../celebA/celeba/real_10000"

print("Extracting first 10k images from ZIP...")
os.makedirs(real_10k, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    all_files = sorted([f for f in zip_ref.namelist() if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
    first_10k = all_files[:10000]
    for file in first_10k:
        zip_ref.extract(member=file, path=real_10k)

print(f"Extracted {len(first_10k)} images to {real_10k}\n")

In [8]:
# Paths
real_dir = "../celebA/celeba/real_10000/img_align_celeba"
fake_dirs = {
    "VAE": "../vae_outputs/generated",
    "GAN": "../gan_outputs/generated"
}
label_map = {0: "Real", 1: "VAE", 2: "GAN"}

In [9]:
# Dataset
class TSNEDataset(Dataset):
    def __init__(self, real_dir, fake_dirs, transform, samples_per_class=1000):
        self.paths, self.labels = [], []

        real_imgs = sorted(os.listdir(real_dir))[:samples_per_class]
        self.paths += [os.path.join(real_dir, img) for img in real_imgs]
        self.labels += [0] * samples_per_class

        for idx, (name, fdir) in enumerate(fake_dirs.items(), start=1):
            fake_imgs = sorted(os.listdir(fdir))[:samples_per_class]
            self.paths += [os.path.join(fdir, img) for img in fake_imgs]
            self.labels += [idx] * samples_per_class

        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        img = Image.open(self.paths[idx]).convert("RGB")
        return self.transform(img), self.labels[idx]

# Setup
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])
dataset = TSNEDataset(real_dir, fake_dirs, transform)
loader = DataLoader(dataset, batch_size=64, shuffle=False)

FileNotFoundError: [Errno 2] No such file or directory: '../gan_outputs/generated'

In [None]:
# Feature Extractor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet18(pretrained=True)
model.fc = torch.nn.Identity()
model = model.to(device).eval()

# Feature extraction
features, labels = [], []
with torch.no_grad():
    for imgs, lbls in tqdm(loader, desc="Extracting features"):
        imgs = imgs.to(device)
        feats = model(imgs).cpu().numpy()
        features.append(feats)
        labels.extend(lbls.numpy())

features = np.vstack(features)
labels = np.array(labels)

# t-SNE
tsne = TSNE(n_components=2, perplexity=30, n_iter=1000, random_state=42)
tsne_result = tsne.fit_transform(features)

# Plot
plt.figure(figsize=(10, 8))
colors = ['black', 'red', 'blue']
for i in range(3):
    idxs = labels == i
    plt.scatter(tsne_result[idxs, 0], tsne_result[idxs, 1], s=10, color=colors[i], label=label_map[i], alpha=0.6)

plt.legend()
plt.title("t-SNE of Real and Generated Images")
plt.grid(True)
plt.tight_layout()
plt.show()
