In [None]:
import torch
from diffusers import StableDiffusionPipeline, LCMScheduler
from diffusers.utils import load_image
from peft import PeftModel, PeftConfig
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
from tqdm import tqdm
import os


# 1. 평가용 프롬프트 Json load
with open("test_prompts.json", encoding="utf-8") as f:
    prompt_data = json.load(f)
prompts = [item["prompt"] for item in prompt_data]

# 2. Stable Diffusion 3.5 pipeline 준비 (Diffusers 최신 버전 필요)
base_model_id = "stabilityai/stable-diffusion-3.5"   # 실제 사용 모델명에 맞게
pipe = StableDiffusionPipeline.from_pretrained(seed=42, base_model_id, torch_dtype=torch.float16).to("cuda")

# (필요시) 이미지 저장 폴더 준비
os.makedirs("test_outputs/base", exist_ok=True)
os.makedirs("test_outputs/lora", exist_ok=True)

# 3. 베이스모델 이미지 생성
for i, prompt in enumerate(tqdm(prompts, desc="Base Images")):
    image = pipe(prompt, num_inference_steps=30, guidance_scale=7.0).images[0]
    image.save(f"outputs/base/{i:03d}.png")

# 4. LoRA 적용
lora_path = "lora_weights"  # LoRA 파라미터 폴더/파일 경로
pipe.load_lora_weights(lora_path)

# 5. LoRA 이미지 생성
for i, prompt in enumerate(tqdm(prompts, desc="LoRA Images")):
    image = pipe(prompt, num_inference_steps=30, guidance_scale=7.0).images[0]
    image.save(f"outputs/lora/{i:03d}.png")

# 6. CLIP 준비
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

def clip_score(image_path, prompt):
    image = Image.open(image_path).convert("RGB")
    inputs = clip_processor(text=[prompt], images=image, return_tensors="pt", padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = clip_model(**inputs)
        return outputs.logits_per_image.item()

def get_image_paths(folder):
    files = sorted([f for f in os.listdir(folder) if f.endswith(".png")])
    return [os.path.join(folder, f) for f in files]

base_imgs = get_image_paths("outputs/base")
lora_imgs = get_image_paths("outputs/lora")
assert len(base_imgs) == len(lora_imgs) == len(prompts)

def eval_clip_scores(img_paths, prompts):
    return [clip_score(img, prompt) for img, prompt in tqdm(zip(img_paths, prompts), total=len(prompts))]

base_scores = eval_clip_scores(base_imgs, prompts)
lora_scores = eval_clip_scores(lora_imgs, prompts)

print(f"\n[Base 모델] CLIP score 평균: {sum(base_scores)/len(base_scores):.4f}")
print(f"[LoRA 모델] CLIP score 평균: {sum(lora_scores)/len(lora_scores):.4f}")

# 각 샘플별 비교도 가능
for i, (b, l) in enumerate(zip(base_scores, lora_scores)):
    print(f"{i+1:03d}번 프롬프트 | base: {b:.4f} | lora: {l:.4f} | {'↑' if l>b else '↓' if l<b else '-'}")
