In [None]:
pip install -U diffusers transformers datasets accelerate bitsandbytes peft


### Evaluation Matrics


In [2]:
import random
from PIL import Image
from datasets import load_dataset

dataset = load_dataset("hahminlew/kream-product-blip-captions", split="train")

In [None]:
import os
import random
from PIL import Image
from tqdm import tqdm

# STEP 1: Create directories
os.makedirs("ground_truth-xl", exist_ok=True)
os.makedirs("generated-xl", exist_ok=True)

# STEP 2: Select 25 random indices
selected_indices = random.sample(range(len(dataset)), 25)

# STEP 3: Save ground truth images and extract prompts
ground_truth_images = []
prompts = []

for idx, i in enumerate(selected_indices):
    image = dataset[i]['image']          # assuming it's a PIL.Image
    prompt = dataset[i]['text']          # assuming it's a string
    image.save(f"ground_truth-xl/img_{idx:02d}.png")

    ground_truth_images.append(image)
    prompts.append(prompt)




In [None]:
from diffusers import DiffusionPipeline
import torch


pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    use_safetensors=True
)

pipe.to("cuda")

pipe.load_lora_weights("DGM_project/Kream-model-lora-finetune", weight_name="pytorch_lora_weights.safetensors")

Loading pipeline components...: 100%|██████████| 7/7 [00:10<00:00,  1.46s/it]


In [5]:
from diffusers import StableDiffusionPipeline
import torch

# Load diffusion model (example: Stable Diffusion)
# pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4").to("cuda")

generated_images = []

for idx, prompt in tqdm(enumerate(prompts), total=len(prompts)):
    image = pipe(prompt, num_inference_steps=45, guidance_scale=7.5).images[0]
    image.save(f"generated-xl/img_{idx:02d}.png")
    generated_images.append(image)


100%|██████████| 45/45 [00:27<00:00,  1.61it/s]
100%|██████████| 45/45 [00:27<00:00,  1.63it/s]
100%|██████████| 45/45 [00:27<00:00,  1.63it/s]
100%|██████████| 45/45 [00:27<00:00,  1.63it/s]
100%|██████████| 45/45 [00:27<00:00,  1.63it/s]
100%|██████████| 5/5 [05:02<00:00, 60.41s/it]


In [7]:
print(prompts)

['outer, Carhartt WIP Arling Jacket Black Faded, a photography of a jacket with a hood and a zipper', 'top, Stussy Skull & Bones Pigment Dyed T-Shirt Forest, a photography of a green t - shirt with a yellow skull and bones on it', 'bottom,  Adidas Adicolor Classics Long Track Skirt Black - US Sizing, a photography of a black skirt with white stripes on the side', 'top, C.P. Company Open Diagonal Raised Fleece Hooded Sweatshirt Ivy Green - 23FW, a photography of a green hooded jacket with a hoodie', 'outer, Martine Rose Oversized Panelled Track Jacket Teal White, a photography of a teal and white jacket with a zippered collar']


In [None]:
! pip install scikit-image tqdm cleanfid


In [9]:
import os
from PIL import Image
import numpy as np
from tqdm import tqdm
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim

# Paths
gen_path = "generated-xl"
gt_path = "ground_truth-xl"

# Metrics
psnr_scores = []
ssim_scores = []

# Compute PSNR & SSIM
for i in tqdm(range(len(prompts))):
    gen_img = Image.open(os.path.join(gen_path, f"img_{i:02d}.png")).convert("RGB")
    gt_img = Image.open(os.path.join(gt_path, f"img_{i:02d}.png")).convert("RGB")

    gen_np = np.array(gen_img.resize((256, 256))) / 255.0
    gt_np = np.array(gt_img.resize((256, 256))) / 255.0

    psnr_scores.append(psnr(gt_np, gen_np, data_range=1.0))
    ssim_scores.append(ssim(gt_np, gen_np, channel_axis=-1, data_range=1.0))


print(f"Average PSNR: {np.mean(psnr_scores):.4f}")
print(f"Average SSIM: {np.mean(ssim_scores):.4f}")


100%|██████████| 5/5 [00:00<00:00, 27.77it/s]

Average PSNR: 9.8838
Average SSIM: 0.5960



