In [1]:
from pathlib import Path
import os, sys
repo_path= Path.cwd().resolve()
while '.gitignore' not in os.listdir(repo_path): # while not in the root of the repo
    repo_path = repo_path.parent #go up one level
sys.path.insert(0,str(repo_path)) if str(repo_path) not in sys.path else None
exp_path = Path.cwd().resolve() # experiment path
# visible GPUs
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
from torchmetrics import MultiScaleStructuralSimilarityIndexMeasure
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch
import numpy as np
from matplotlib import pyplot as plt
import cv2 as cv

There are 2 metrics: MS-SSIM (diversity) and FID (fidelity)

# MS-SSIM

We will compute this metric for each model (vendor) and for each prompt
- Each prompt will have
    - 4 synthetic pairs
    - 4 real pairs

In [68]:
# load model
output_dir= repo_path  /'results/mammo40k_size-prompt_5k'
pipe = StableDiffusionPipeline.from_pretrained(output_dir, safety_checker=None, torch_dtype=torch.float16).to("cuda")
generator = torch.Generator(device='cuda')
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()

# DiffusionHP
guidance_scale = 4
num_inference_steps = 50
prompt = 'mammogram, MLO view, small size'
seed = 1337
generator.manual_seed(seed)
negative_prompt = ""
num_samples = 2
size = 512

# create ssim_vector
num_tests = 4
ssim_values = np.zeros(num_tests)

for i in range(num_tests):
    
    with torch.autocast("cuda"), torch.inference_mode():
        image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_images_per_prompt=num_samples,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            height=size,
            width=size,
            generator=None,
            output_type='pil'
        ).images

    # turn pil images to torch tensors
    t_images = torch.stack([torch.tensor(np.asarray(img, dtype=float)).permute(2,0,1) for img in image])
    # add additional channel to t_image
    t_images = t_images.unsqueeze(1)
    # compute MS-SSIM
    ms_ssim = MultiScaleStructuralSimilarityIndexMeasure(kernel_size=11, reduction='none')
    ssim_values[i] = ms_ssim(t_images[0], t_images[1])
    # print(ms_value)
    # # show images
    # fig, ax = plt.subplots(1,2)
    # ax[0].imshow(t_images[0,0,0], cmap='gray')
    # ax[1].imshow(t_images[1,0,0], cmap='gray')

ssim_values.mean(), ssim_values.std()

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

(0.2118308525896737, 0.14459827573969733)

In [67]:
ssim_values.mean(), ssim_values.std()

(0.5231541991499744, 0.029559441454340647)