In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("kevinpatel04/celeba-original-wild-images")

print("Path to dataset files:", path)

Mounting files to /kaggle/input/celeba-original-wild-images...
Path to dataset files: /kaggle/input/celeba-original-wild-images


In [None]:
!pip -qq install ultralytics

In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jessicali9530/celeba-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/celeba-dataset


In [None]:
import torchvision.transforms.functional as TF
import torchvision.transforms as transforms
import numpy as np, torch, cv2, os, glob
import matplotlib.pyplot as plt
from ultralytics import YOLO
from tqdm import tqdm
from PIL import Image

# ───────────────────────────────── CONFIG ─────────────────────────────────
INPUT_DIR  = "/kaggle/input/celeba-dataset/img_align_celeba/img_align_celeba"
MODEL_PATH = "/kaggle/input/yolo-world-small/pytorch/default/1/yolov8s-world.pt"
MASK_H, MASK_W = 128, 128

PROMPTS = ['eye', 'mouth', 'ear', 'nose', 'nosetip', 'face', 'chin', 'head', 'eyebrows']
WEIGHTS = {
    'eye': 4.5, 'mouth': 4.0, 'nose': 4.0, 'nosetip': 3.0,
    'chin': 4.0, 'ear': 4.0, 'eyebrows': 4.0, 'face': 4.0, 'head': 1.2
}

NUM_IMAGES = 30000

# ───────────────────────────────── UTILS ─────────────────────────────────
def inverse_gaussian_fade(shape):
    hh, ww = shape
    y, x = np.meshgrid(np.linspace(-1,1,ww), np.linspace(-1,1,hh))
    distance = np.sqrt(x**2 + y**2)
    sigma = 0.6
    fade = 1 - np.exp(-(distance**2) / (2 * sigma**2))
    return fade.astype(np.float32)

INV_FADE_CACHE = {}

def get_inverse_fade(shape):
    if shape not in INV_FADE_CACHE:
        INV_FADE_CACHE[shape] = inverse_gaussian_fade(shape)
    return INV_FADE_CACHE[shape]

def extract_edges(img_gray):
    scharr_x = cv2.Scharr(img_gray, cv2.CV_32F, 1, 0)
    scharr_y = cv2.Scharr(img_gray, cv2.CV_32F, 0, 1)
    scharr = np.sqrt(scharr_x**2 + scharr_y**2)
    scharr /= (scharr.max() + 1e-8)

    canny = cv2.Canny(img_gray, 50, 150).astype(np.float32)/255.0

    combined = (0.5 * scharr + 0.5 * canny)
    return combined / (combined.max() + 1e-8)

def gaussian_fade(shape):
    hh, ww = shape
    y, x = np.meshgrid(np.linspace(-1,1,ww), np.linspace(-1,1,hh))
    fade = np.exp(-2.0 * (x**2 + y**2))
    return fade.astype(np.float32)

FADE_CACHE = {}

def get_fade(shape):
    if shape not in FADE_CACHE:
        FADE_CACHE[shape] = gaussian_fade(shape)
    return FADE_CACHE[shape]

def preprocess_exactly_like_dataset(img_path: str) -> np.ndarray:
    # Carica immagine come PIL (come nel dataset)
    pil_image = Image.open(img_path).convert('RGB')
    
    # Applica le STESSE trasformazioni del dataset (data_augmentation=False)
    transform = transforms.Compose([
        transforms.CenterCrop((178, 178)),    # IDENTICO al dataset
        transforms.Resize((128, 128)),        # IDENTICO al dataset
    ])
    
    # Applica trasformazione
    processed_pil = transform(pil_image)
    
    # Converti a numpy array (RGB, non BGR!)
    img_rgb = np.array(processed_pil)
    
    return img_rgb

def detailed_heatmap_aligned(img_path, yolo_model, prompts, weights):
    # 1. Preprocessa ESATTAMENTE come il dataset PyTorch
    img128 = preprocess_exactly_like_dataset(img_path)
    H, W = 128, 128  # Dimensioni finali
    
    # 2. Applica YOLO sull'immagine preprocessata (128x128)
    gray128 = cv2.cvtColor(img128, cv2.COLOR_RGB2GRAY)
    
    results = yolo_model(img128, conf=0.001, iou=0.1, verbose=False)[0]
    
    if len(results.boxes) == 0:
        return np.zeros((H, W), dtype=np.float32)
    
    boxes = results.boxes.xyxy.cpu().numpy().astype(int)
    classes = results.boxes.cls.cpu().numpy().astype(int)
    confidences = results.boxes.conf.cpu().numpy()

    combined_heatmap = np.zeros((H, W), dtype=np.float32)

    # 3. Applica la logica di generazione heatmap (invariata)
    for (x1, y1, x2, y2), cls, conf in zip(boxes, classes, confidences):
        lab = prompts[cls]
        weight = weights.get(lab, 1.0)

        # Assicurati che le coordinate siano valide
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(W, x2), min(H, y2)
        
        if x2 <= x1 or y2 <= y1:
            continue

        crop = gray128[y1:y2, x1:x2]
        if crop.size == 0:
            continue

        # Estrai edges dalla crop
        edges = extract_edges(crop)
        
        # Crea una maschera più grande per evitare bordi netti
        box_w, box_h = x2 - x1, y2 - y1
        pad = max(8, min(box_w, box_h) // 4)  # Padding adattivo
        
        # Espandi la regione con padding
        y1_exp = max(0, y1 - pad)
        y2_exp = min(H, y2 + pad)
        x1_exp = max(0, x1 - pad)
        x2_exp = min(W, x2 + pad)
        
        # Crea una heatmap locale più grande
        local_h, local_w = y2_exp - y1_exp, x2_exp - x1_exp
        local_heatmap = np.zeros((local_h, local_w), dtype=np.float32)
        
        # Posiziona gli edges nella regione centrale
        crop_start_y = y1 - y1_exp
        crop_start_x = x1 - x1_exp
        
        # Resize edges per matchare la box originale
        edges_resized = cv2.resize(edges, (box_w, box_h), interpolation=cv2.INTER_LINEAR)
        
        # FIX: Applica fade in modo più intelligente
        if lab == 'head':
            # Per head: usa normal fade (non inverse) e peso ridotto
            edges_resized *= get_fade(edges_resized.shape)
            weight *= 0.5  # Ulteriore riduzione per head
        elif lab == 'face':
            # Per face: inverse fade ma più contenuto
            fade = get_inverse_fade(edges_resized.shape)
            fade = 0.3 + 0.7 * fade  # Riduce l'effetto inverse
            edges_resized *= fade
        else:
            # Per altri landmark: normal fade
            edges_resized *= get_fade(edges_resized.shape)
        
        # Inserisci nella heatmap locale
        local_heatmap[crop_start_y:crop_start_y + box_h, crop_start_x:crop_start_x + box_w] = edges_resized
        
        # Applica un fade radiale più ampio per coprire tutta la bounding box
        center_y, center_x = local_h // 2, local_w // 2
        y_coords, x_coords = np.ogrid[:local_h, :local_w]
        
        # Distanza dal centro
        distances = np.sqrt((x_coords - center_x)**2 + (y_coords - center_y)**2)
        max_dist = min(local_h, local_w) / 2
        
        # Fade più ampio e meno aggressivo per coprire meglio la bounding box
        radial_fade = np.exp(-0.8 * (distances / max_dist)**2)
        radial_fade = np.clip(radial_fade, 0.3, 1.0)  # Baseline più alto per coprire più area
        
        local_heatmap *= radial_fade
        
        # Aggiungi alla heatmap globale con blending
        combined_heatmap[y1_exp:y2_exp, x1_exp:x2_exp] = np.maximum(
            combined_heatmap[y1_exp:y2_exp, x1_exp:x2_exp], 
            local_heatmap * weight
        )

    # 4. Post-processing più aggressivo per smoothing
    combined_heatmap = cv2.GaussianBlur(combined_heatmap, (21, 21), sigmaX=2.0)
    
    if combined_heatmap.max() > combined_heatmap.min():
        combined_heatmap = (combined_heatmap - combined_heatmap.min()) / (combined_heatmap.max() - combined_heatmap.min())
    
    return combined_heatmap

def verify_alignment_with_dataset(img_path, dataset_loader_func=None):
    """
    BONUS: Funzione per verificare l'allineamento perfetto
    """
    # Metodo 1: Il nostro preprocessing
    our_img = preprocess_exactly_like_dataset(img_path)
    
    # Metodo 2: Se hai una funzione del dataset, confronta
    if dataset_loader_func:
        dataset_img = dataset_loader_func(img_path)
        
        # Verifica se sono identici
        diff = np.abs(our_img.astype(float) - dataset_img.astype(float))
        max_diff = diff.max()
        
        print(f"✅ Alignment check: max pixel difference = {max_diff}")
        if max_diff < 1e-6:
            print("PERFECT ALIGNMENT!")
        elif max_diff < 1.0:
            print("Very good alignment (sub-pixel differences)")
        else:
            print("Significant differences detected")
    
    return our_img

# ───────────────────────────────── SETUP ─────────────────────────────────
device = 'cuda' if torch.cuda.is_available() else 'cpu'
yolo_model = YOLO(MODEL_PATH).to(device)
yolo_model.set_classes(PROMPTS)
yolo_model.eval()

image_paths = sorted(glob.glob(os.path.join(INPUT_DIR, '*.jpg')))[:NUM_IMAGES]

# ───────────────────────────────── GENERATE PIXEL-PERFECT ALIGNED HEATMAPS ─────────────────────────────────
heatmaps = []
processed_images = []

print("Generating pixel-perfect aligned heatmaps...")

for path in tqdm(image_paths, desc="Extracting aligned heatmaps"):
    
    # FIXED: Usa il preprocessing identico al dataset
    img128 = preprocess_exactly_like_dataset(path)
    
    # Genera heatmap con allineamento perfetto
    hm = detailed_heatmap_aligned(path, yolo_model, PROMPTS, WEIGHTS)
    
    heatmaps.append(hm)
    processed_images.append(img128)

heatmaps = np.stack(heatmaps)  # (N,128,128)
processed_images = np.stack(processed_images)  # (N,128,128,3)

# ───────────────────────────────── SAVE ─────────────────────────────────
import h5py

with h5py.File('heatmaps_aligned_10k.h5', 'w') as hf:
    hf.create_dataset(
        'heatmaps', data=heatmaps.astype('f2'),
        compression='gzip', compression_opts=4
    )
    hf.create_dataset(
        'images', data=processed_images.astype('uint8'),
        compression='gzip', compression_opts=4
    )

print(f"Saved {len(heatmaps)} PIXEL-PERFECT aligned heatmaps to heatmaps_pixel_perfect_aligned.h5")
print("Heatmaps are now perfectly aligned with CelebDataSet (data_augmentation=False)!")