In [None]:
from pathlib import Path
from PIL import Image
import torch
from transformers import SiglipVisionModel, SiglipImageProcessor

overlay_dir = Path("/home/s2behappy4/data/gyuhyeong/code/overlays/hazelnut/crack")
token_dir   = Path("/home/s2behappy4/data/gyuhyeong/code/siglip/hazelnut/crack")
token_dir.mkdir(parents=True, exist_ok=True)

overlay_paths = sorted([p for p in overlay_dir.glob("001_*_overlay.png")])

model_id = "google/siglip-so400m-patch14-384"
device   = "cuda" if torch.cuda.is_available() else "cpu"
proc     = SiglipImageProcessor.from_pretrained(model_id)
vision   = SiglipVisionModel.from_pretrained(model_id).to(device).eval()

token_bank = {}                                        
for img_path in overlay_paths:
    img   = Image.open(img_path).convert("RGB")
    batch = proc(images=img, return_tensors="pt").to(device)

    with torch.no_grad():
        cls_vec = vision(**batch).pooler_output.squeeze(0).cpu()   

    sample_id = img_path.stem.replace("_overlay", "")              
    token_bank[sample_id] = cls_vec

save_path = token_dir / "hazelnut_001_siglip_tokens.pt"
torch.save(token_bank, save_path)

n_tokens   = len(token_bank)
token_dim  = next(iter(token_bank.values())).shape                

In [None]:
from pathlib import Path
import torch

overlay_dir = Path("/home/s2behappy4/data/gyuhyeong/code/overlays/hazelnut/crack")
token_dir   = Path("/home/s2behappy4/data/gyuhyeong/code/siglip/hazelnut/crack")

token_path  = token_dir / "hazelnut_001_siglip_tokens.pt"        
pairs_path  = token_dir / "hazelnut_001_token_mask_pairs.pt"

token_bank = torch.load(token_path)      

pairs = []                               
missing = []

for sample_id, cls_vec in token_bank.items():
    mask_file = overlay_dir / f"{sample_id}_mask.pt"             
    if not mask_file.exists():
        missing.append(mask_file.name)
        continue

    mask_tensor = torch.load(mask_file).bool()                   
    pairs.append({
        "mask_token": cls_vec,        
        "mask"      : mask_tensor     
    })

torch.save(pairs, pairs_path)

# # Hazelnut(cut) Token save

In [None]:
from pathlib import Path
from PIL import Image
import torch
from transformers import SiglipVisionModel, SiglipImageProcessor

img_dir  = Path("/home/s2behappy4/data/gyuhyeong/dataset/MMAD/MVTec-AD/"
                "hazelnut/test/cut")
out_dir  = Path("/home/s2behappy4/data/gyuhyeong/code/siglip_token/hazelnut/cut/02")
out_dir.mkdir(parents=True, exist_ok=True)

model_id = "google/siglip-so400m-patch14-384"
device   = "cuda" if torch.cuda.is_available() else "cpu"
proc     = SiglipImageProcessor.from_pretrained(model_id)
vision   = SiglipVisionModel.from_pretrained(model_id).to(device).eval()

for img_path in sorted(img_dir.glob("*.png")):
    key = img_path.stem            
    img = Image.open(img_path).convert("RGB")
    batch = proc(images=img, return_tensors="pt").to(device)

    with torch.no_grad():
        cls_vec = vision(**batch).pooler_output.squeeze(0).cpu()   

    save_path = out_dir / f"{key}_cls.pt"
    torch.save({"cls_token": cls_vec}, save_path)
    print(f"[✔] {key} 저장 → {save_path}")

# # Hazelnut(crack) Token save

In [None]:
from pathlib import Path
from PIL import Image
import torch
from transformers import SiglipVisionModel, SiglipImageProcessor

img_dir  = Path("/home/s2behappy4/data/gyuhyeong/dataset/MMAD/MVTec-AD/"
                "hazelnut/test/crack")
out_dir  = Path("/home/s2behappy4/data/gyuhyeong/code/siglip_token/hazelnut/crack/02")
out_dir.mkdir(parents=True, exist_ok=True)

model_id = "google/siglip-so400m-patch14-384"
device   = "cuda" if torch.cuda.is_available() else "cpu"
proc     = SiglipImageProcessor.from_pretrained(model_id)
vision   = SiglipVisionModel.from_pretrained(model_id).to(device).eval()

for img_path in sorted(img_dir.glob("*.png")):
    key = img_path.stem            
    img = Image.open(img_path).convert("RGB")
    batch = proc(images=img, return_tensors="pt").to(device)

    with torch.no_grad():
        cls_vec = vision(**batch).pooler_output.squeeze(0).cpu()   

    save_path = out_dir / f"{key}_cls.pt"
    torch.save({"cls_token": cls_vec}, save_path)
    print(f"[✔] {key} 저장 → {save_path}")