In [1]:
%load_ext autoreload
%autoreload 2

In [33]:
!pip install --upgrade diffusers[torch] datasets invisible_watermark transformers accelerate safetensors torchmetrics Pillow




[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Step 1: Generate Stable Diffusion Images

In [1]:
from pathlib import Path
import shutil
import uuid
from diffusers import DiffusionPipeline
import torch
import datasets
import pandas as pd

In [3]:
prompt_dataset = datasets.load_dataset("Gustavosta/Stable-Diffusion-Prompts")

Found cached dataset parquet (/home/daniel/.cache/huggingface/datasets/Gustavosta___parquet/Gustavosta--Stable-Diffusion-Prompts-eb720eed369c61bb/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()

rf_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
rf_pipe.enable_model_cpu_offload()

In [None]:
target_dir = Path("images")
if not target_dir.is_dir():
    target_dir.mkdir()
else:
    shutil.rmtree(target_dir)
    target_dir.mkdir()

prompts = []
generated_images = []
for prompt in prompt_dataset["train"]:
    try:
        prompt = prompt["Prompt"]
        image = pipe(prompt, output_type="latent").images

        image = rf_pipe(prompt=prompt, image=image).images[0]

        image_name = f"{str(uuid.uuid4())}.png"
        image_path = target_dir / image_name
        image.save(image_path)
        prompts.append(prompt)
        generated_images.append(str(image_path))

        df = pd.DataFrame(data={"image": generated_images, "prompt": prompts})
        df.to_json("sd_dataset.json", orient="records") # save this after every generation to not loose progress in case of crashing
    except:
        print("An error occured while generating image.")

# Step 2: Generate CLIP Scores for all the examples

In [1]:
import pandas as pd
from torchmetrics.multimodal.clip_score import CLIPScore
import PIL
import numpy as np
import torch
from tqdm import tqdm

In [2]:
metric = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14") # openai/clip-vit-base-patch16

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


In [3]:
metric = metric.to("cuda")

In [4]:
df = pd.read_json("sd_dataset.json")

In [9]:
clip_scores = []
for idx, row in tqdm(df.iterrows(), total=len(df)):
    try:
        img = PIL.Image.open(row["image"])
        img = img.convert('RGB')
        np_img = np.array(img)
        clip_score = metric(torch.Tensor(np_img).to("cuda"), row["prompt"]).detach().cpu().numpy()
        img.close()
        clip_scores.append(clip_score)
    except Exception as e:
        print(e)
        clip_scores.append(np.nan)

  0%|                                               | 1/1431 [00:00<02:43,  8.76it/s]

The size of tensor a (101) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (81) must match the size of tensor b (77) at non-singleton dimension 1


  1%|▌                                             | 17/1431 [00:02<03:32,  6.65it/s]

The size of tensor a (96) must match the size of tensor b (77) at non-singleton dimension 1


  2%|▉                                             | 29/1431 [00:04<03:30,  6.68it/s]

The size of tensor a (112) must match the size of tensor b (77) at non-singleton dimension 1


  3%|█▌                                            | 49/1431 [00:07<03:26,  6.69it/s]

The size of tensor a (98) must match the size of tensor b (77) at non-singleton dimension 1


  4%|█▋                                            | 52/1431 [00:08<03:18,  6.94it/s]

The size of tensor a (87) must match the size of tensor b (77) at non-singleton dimension 1


  4%|█▉                                            | 60/1431 [00:09<03:21,  6.80it/s]

The size of tensor a (110) must match the size of tensor b (77) at non-singleton dimension 1


  5%|██▎                                           | 70/1431 [00:11<03:22,  6.73it/s]

The size of tensor a (92) must match the size of tensor b (77) at non-singleton dimension 1


  5%|██▍                                           | 77/1431 [00:12<03:20,  6.75it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


  6%|██▋                                           | 84/1431 [00:13<03:20,  6.72it/s]

The size of tensor a (99) must match the size of tensor b (77) at non-singleton dimension 1


  6%|██▊                                           | 89/1431 [00:14<03:17,  6.80it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 11%|█████                                        | 159/1431 [00:25<02:51,  7.41it/s]

The size of tensor a (98) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (88) must match the size of tensor b (77) at non-singleton dimension 1


 11%|█████                                        | 162/1431 [00:26<02:56,  7.18it/s]

The size of tensor a (120) must match the size of tensor b (77) at non-singleton dimension 1


 12%|█████▏                                       | 166/1431 [00:26<03:03,  6.89it/s]

The size of tensor a (114) must match the size of tensor b (77) at non-singleton dimension 1


 12%|█████▍                                       | 173/1431 [00:27<03:08,  6.68it/s]

The size of tensor a (96) must match the size of tensor b (77) at non-singleton dimension 1


 12%|█████▌                                       | 176/1431 [00:28<03:02,  6.87it/s]

The size of tensor a (101) must match the size of tensor b (77) at non-singleton dimension 1


 12%|█████▌                                       | 178/1431 [00:28<02:56,  7.11it/s]

The size of tensor a (94) must match the size of tensor b (77) at non-singleton dimension 1


 13%|█████▊                                       | 185/1431 [00:29<03:05,  6.73it/s]

The size of tensor a (114) must match the size of tensor b (77) at non-singleton dimension 1


 13%|█████▉                                       | 188/1431 [00:30<03:00,  6.88it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 14%|██████▎                                      | 202/1431 [00:32<03:05,  6.62it/s]

The size of tensor a (91) must match the size of tensor b (77) at non-singleton dimension 1


 14%|██████▍                                      | 204/1431 [00:32<02:55,  6.98it/s]

The size of tensor a (101) must match the size of tensor b (77) at non-singleton dimension 1


 15%|██████▌                                      | 208/1431 [00:33<02:57,  6.88it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 15%|██████▋                                      | 214/1431 [00:34<03:02,  6.69it/s]

The size of tensor a (109) must match the size of tensor b (77) at non-singleton dimension 1


 15%|██████▊                                      | 217/1431 [00:34<02:56,  6.87it/s]

The size of tensor a (81) must match the size of tensor b (77) at non-singleton dimension 1


 15%|██████▉                                      | 220/1431 [00:35<02:53,  6.97it/s]

The size of tensor a (86) must match the size of tensor b (77) at non-singleton dimension 1


 16%|███████▏                                     | 227/1431 [00:36<03:00,  6.67it/s]

The size of tensor a (175) must match the size of tensor b (77) at non-singleton dimension 1


 17%|███████▊                                     | 248/1431 [00:39<03:00,  6.54it/s]

The size of tensor a (93) must match the size of tensor b (77) at non-singleton dimension 1


 18%|███████▉                                     | 253/1431 [00:40<02:55,  6.70it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 18%|████████                                     | 257/1431 [00:41<02:52,  6.79it/s]

The size of tensor a (101) must match the size of tensor b (77) at non-singleton dimension 1


 19%|████████▌                                    | 272/1431 [00:43<02:56,  6.56it/s]

The size of tensor a (100) must match the size of tensor b (77) at non-singleton dimension 1


 19%|████████▋                                    | 278/1431 [00:44<02:52,  6.69it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 20%|█████████▏                                   | 291/1431 [00:46<02:53,  6.58it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 21%|█████████▍                                   | 300/1431 [00:48<02:51,  6.58it/s]

The size of tensor a (84) must match the size of tensor b (77) at non-singleton dimension 1


 21%|█████████▌                                   | 305/1431 [00:48<02:29,  7.51it/s]

The size of tensor a (132) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (87) must match the size of tensor b (77) at non-singleton dimension 1


 22%|█████████▉                                   | 315/1431 [00:50<02:49,  6.60it/s]

The size of tensor a (84) must match the size of tensor b (77) at non-singleton dimension 1


 23%|██████████▏                                  | 322/1431 [00:51<02:46,  6.66it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 23%|██████████▏                                  | 325/1431 [00:52<02:40,  6.88it/s]

The size of tensor a (92) must match the size of tensor b (77) at non-singleton dimension 1


 24%|██████████▉                                  | 347/1431 [00:55<02:45,  6.56it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1


 25%|███████████                                  | 351/1431 [00:56<02:23,  7.53it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (94) must match the size of tensor b (77) at non-singleton dimension 1


 25%|███████████▍                                 | 362/1431 [00:57<02:41,  6.64it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 26%|███████████▍                                 | 365/1431 [00:58<02:18,  7.69it/s]

The size of tensor a (84) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (145) must match the size of tensor b (77) at non-singleton dimension 1


 26%|███████████▌                                 | 369/1431 [00:58<02:19,  7.60it/s]

The size of tensor a (125) must match the size of tensor b (77) at non-singleton dimension 1


 26%|███████████▊                                 | 376/1431 [00:59<02:36,  6.74it/s]

The size of tensor a (92) must match the size of tensor b (77) at non-singleton dimension 1


 27%|████████████▏                                | 387/1431 [01:01<02:39,  6.57it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 27%|████████████▎                                | 391/1431 [01:02<02:34,  6.74it/s]

The size of tensor a (86) must match the size of tensor b (77) at non-singleton dimension 1


 28%|████████████▌                                | 401/1431 [01:04<02:35,  6.60it/s]

The size of tensor a (123) must match the size of tensor b (77) at non-singleton dimension 1


 29%|█████████████                                | 414/1431 [01:06<02:37,  6.47it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 30%|█████████████▎                               | 424/1431 [01:07<02:32,  6.59it/s]

The size of tensor a (114) must match the size of tensor b (77) at non-singleton dimension 1


 30%|█████████████▍                               | 429/1431 [01:08<02:28,  6.74it/s]

The size of tensor a (88) must match the size of tensor b (77) at non-singleton dimension 1


 31%|█████████████▉                               | 445/1431 [01:11<02:28,  6.65it/s]

The size of tensor a (95) must match the size of tensor b (77) at non-singleton dimension 1


 32%|██████████████▎                              | 455/1431 [01:12<02:27,  6.61it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 32%|██████████████▍                              | 461/1431 [01:13<02:25,  6.66it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1


 33%|██████████████▊                              | 473/1431 [01:15<02:26,  6.55it/s]

The size of tensor a (115) must match the size of tensor b (77) at non-singleton dimension 1


 33%|██████████████▉                              | 476/1431 [01:16<02:20,  6.79it/s]

The size of tensor a (111) must match the size of tensor b (77) at non-singleton dimension 1


 34%|███████████████▏                             | 484/1431 [01:17<02:22,  6.63it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 35%|███████████████▌                             | 494/1431 [01:19<02:21,  6.61it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 35%|███████████████▋                             | 497/1431 [01:19<02:16,  6.84it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 35%|███████████████▊                             | 502/1431 [01:20<02:17,  6.77it/s]

The size of tensor a (96) must match the size of tensor b (77) at non-singleton dimension 1


 35%|███████████████▉                             | 505/1431 [01:20<02:13,  6.94it/s]

The size of tensor a (81) must match the size of tensor b (77) at non-singleton dimension 1


 36%|████████████████▎                            | 517/1431 [01:22<02:19,  6.53it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1


 38%|█████████████████                            | 544/1431 [01:27<02:16,  6.48it/s]

The size of tensor a (85) must match the size of tensor b (77) at non-singleton dimension 1


 39%|█████████████████▎                           | 551/1431 [01:28<02:12,  6.65it/s]

The size of tensor a (127) must match the size of tensor b (77) at non-singleton dimension 1


 40%|█████████████████▊                           | 567/1431 [01:30<02:12,  6.54it/s]

The size of tensor a (94) must match the size of tensor b (77) at non-singleton dimension 1


 41%|██████████████████▎                          | 584/1431 [01:33<02:09,  6.52it/s]

The size of tensor a (128) must match the size of tensor b (77) at non-singleton dimension 1


 42%|██████████████████▊                          | 599/1431 [01:36<02:07,  6.53it/s]

The size of tensor a (113) must match the size of tensor b (77) at non-singleton dimension 1


 42%|██████████████████▉                          | 602/1431 [01:36<02:01,  6.81it/s]

The size of tensor a (85) must match the size of tensor b (77) at non-singleton dimension 1


 43%|███████████████████▍                         | 619/1431 [01:39<02:08,  6.34it/s]

The size of tensor a (84) must match the size of tensor b (77) at non-singleton dimension 1


 44%|███████████████████▉                         | 633/1431 [01:41<02:02,  6.50it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 45%|████████████████████▎                        | 644/1431 [01:43<02:00,  6.53it/s]

The size of tensor a (137) must match the size of tensor b (77) at non-singleton dimension 1


 46%|████████████████████▋                        | 657/1431 [01:45<01:57,  6.59it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 46%|████████████████████▊                        | 660/1431 [01:46<01:52,  6.85it/s]

The size of tensor a (115) must match the size of tensor b (77) at non-singleton dimension 1


 48%|█████████████████████▍                       | 681/1431 [01:49<01:54,  6.55it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 48%|█████████████████████▌                       | 686/1431 [01:50<01:52,  6.64it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 48%|█████████████████████▋                       | 690/1431 [01:51<01:48,  6.83it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 49%|█████████████████████▉                       | 698/1431 [01:52<01:52,  6.54it/s]

The size of tensor a (108) must match the size of tensor b (77) at non-singleton dimension 1


 49%|██████████████████████                       | 703/1431 [01:53<01:49,  6.64it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 50%|██████████████████████▎                      | 709/1431 [01:54<01:49,  6.60it/s]

The size of tensor a (121) must match the size of tensor b (77) at non-singleton dimension 1


 51%|██████████████████████▋                      | 723/1431 [01:56<01:49,  6.44it/s]

The size of tensor a (100) must match the size of tensor b (77) at non-singleton dimension 1


 52%|███████████████████████▏                     | 738/1431 [01:59<01:47,  6.44it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 52%|███████████████████████▏                     | 739/1431 [01:59<01:38,  7.06it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (125) must match the size of tensor b (77) at non-singleton dimension 1


 52%|███████████████████████▎                     | 743/1431 [01:59<01:33,  7.37it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 53%|███████████████████████▊                     | 759/1431 [02:02<01:41,  6.59it/s]

The size of tensor a (101) must match the size of tensor b (77) at non-singleton dimension 1


 54%|████████████████████████                     | 767/1431 [02:03<01:42,  6.47it/s]

The size of tensor a (88) must match the size of tensor b (77) at non-singleton dimension 1


 54%|████████████████████████▎                    | 773/1431 [02:04<01:39,  6.58it/s]

The size of tensor a (174) must match the size of tensor b (77) at non-singleton dimension 1


 55%|████████████████████████▌                    | 781/1431 [02:05<01:38,  6.58it/s]

The size of tensor a (104) must match the size of tensor b (77) at non-singleton dimension 1


 55%|████████████████████████▋                    | 785/1431 [02:06<01:37,  6.64it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 55%|████████████████████████▋                    | 787/1431 [02:06<01:32,  6.94it/s]

The size of tensor a (130) must match the size of tensor b (77) at non-singleton dimension 1


 55%|████████████████████████▊                    | 791/1431 [02:07<01:32,  6.92it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 56%|█████████████████████████                    | 798/1431 [02:08<01:35,  6.66it/s]

The size of tensor a (92) must match the size of tensor b (77) at non-singleton dimension 1


 56%|█████████████████████████▎                   | 805/1431 [02:09<01:23,  7.49it/s]

The size of tensor a (155) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (104) must match the size of tensor b (77) at non-singleton dimension 1


 57%|█████████████████████████▍                   | 809/1431 [02:10<01:29,  6.99it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 57%|█████████████████████████▌                   | 811/1431 [02:10<01:26,  7.21it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 57%|█████████████████████████▋                   | 815/1431 [02:10<01:28,  6.97it/s]

The size of tensor a (81) must match the size of tensor b (77) at non-singleton dimension 1


 57%|█████████████████████████▊                   | 821/1431 [02:11<01:30,  6.76it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 58%|██████████████████████████                   | 828/1431 [02:12<01:31,  6.59it/s]

The size of tensor a (81) must match the size of tensor b (77) at non-singleton dimension 1


 58%|██████████████████████████▏                  | 834/1431 [02:13<01:29,  6.68it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 59%|██████████████████████████▌                  | 846/1431 [02:15<01:29,  6.54it/s]

The size of tensor a (81) must match the size of tensor b (77) at non-singleton dimension 1


 61%|███████████████████████████▎                 | 867/1431 [02:19<01:27,  6.47it/s]

The size of tensor a (88) must match the size of tensor b (77) at non-singleton dimension 1


 62%|███████████████████████████▋                 | 882/1431 [02:21<01:23,  6.55it/s]

The size of tensor a (97) must match the size of tensor b (77) at non-singleton dimension 1


 62%|███████████████████████████▉                 | 888/1431 [02:22<01:21,  6.65it/s]

The size of tensor a (99) must match the size of tensor b (77) at non-singleton dimension 1


 62%|████████████████████████████                 | 894/1431 [02:23<01:20,  6.69it/s]

The size of tensor a (97) must match the size of tensor b (77) at non-singleton dimension 1


 63%|████████████████████████████▍                | 905/1431 [02:25<01:19,  6.61it/s]

The size of tensor a (102) must match the size of tensor b (77) at non-singleton dimension 1


 63%|████████████████████████████▌                | 908/1431 [02:26<01:16,  6.81it/s]

The size of tensor a (96) must match the size of tensor b (77) at non-singleton dimension 1


 64%|████████████████████████████▊                | 915/1431 [02:27<01:17,  6.65it/s]

The size of tensor a (114) must match the size of tensor b (77) at non-singleton dimension 1


 65%|█████████████████████████████                | 925/1431 [02:28<01:16,  6.58it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 65%|█████████████████████████████▏               | 930/1431 [02:29<01:15,  6.65it/s]

The size of tensor a (137) must match the size of tensor b (77) at non-singleton dimension 1


 65%|█████████████████████████████▎               | 934/1431 [02:30<01:13,  6.76it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 66%|█████████████████████████████▌               | 941/1431 [02:31<01:14,  6.62it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 66%|█████████████████████████████▋               | 943/1431 [02:31<01:10,  6.96it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 67%|██████████████████████████████               | 955/1431 [02:33<01:04,  7.43it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (144) must match the size of tensor b (77) at non-singleton dimension 1


 67%|██████████████████████████████▏              | 958/1431 [02:33<00:58,  8.13it/s]

The size of tensor a (86) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 68%|██████████████████████████████▍              | 966/1431 [02:35<01:08,  6.75it/s]

The size of tensor a (93) must match the size of tensor b (77) at non-singleton dimension 1


 68%|██████████████████████████████▌              | 971/1431 [02:35<01:08,  6.75it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 68%|██████████████████████████████▊              | 980/1431 [02:37<01:07,  6.64it/s]

The size of tensor a (96) must match the size of tensor b (77) at non-singleton dimension 1


 69%|███████████████████████████████▎             | 994/1431 [02:39<01:06,  6.59it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 70%|███████████████████████████████▎             | 996/1431 [02:39<01:02,  6.96it/s]

The size of tensor a (86) must match the size of tensor b (77) at non-singleton dimension 1


 70%|███████████████████████████████▍             | 998/1431 [02:40<01:00,  7.16it/s]

The size of tensor a (153) must match the size of tensor b (77) at non-singleton dimension 1


 70%|██████████████████████████████▉             | 1008/1431 [02:41<01:03,  6.65it/s]

The size of tensor a (116) must match the size of tensor b (77) at non-singleton dimension 1


 72%|███████████████████████████████▉            | 1037/1431 [02:46<00:59,  6.61it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 73%|████████████████████████████████▏           | 1048/1431 [02:48<00:57,  6.62it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1


 74%|████████████████████████████████▍           | 1056/1431 [02:49<00:56,  6.60it/s]

The size of tensor a (81) must match the size of tensor b (77) at non-singleton dimension 1


 74%|████████████████████████████████▌           | 1060/1431 [02:50<00:56,  6.61it/s]

The size of tensor a (100) must match the size of tensor b (77) at non-singleton dimension 1


 75%|████████████████████████████████▊           | 1069/1431 [02:51<00:54,  6.61it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 75%|█████████████████████████████████           | 1075/1431 [02:52<00:53,  6.68it/s]

The size of tensor a (85) must match the size of tensor b (77) at non-singleton dimension 1


 77%|█████████████████████████████████▊          | 1101/1431 [02:56<00:50,  6.54it/s]

The size of tensor a (110) must match the size of tensor b (77) at non-singleton dimension 1


 77%|█████████████████████████████████▉          | 1105/1431 [02:57<00:48,  6.73it/s]

The size of tensor a (91) must match the size of tensor b (77) at non-singleton dimension 1


 77%|██████████████████████████████████          | 1107/1431 [02:57<00:45,  7.06it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 77%|██████████████████████████████████          | 1109/1431 [02:58<00:44,  7.21it/s]

The size of tensor a (94) must match the size of tensor b (77) at non-singleton dimension 1


 78%|██████████████████████████████████▎         | 1116/1431 [02:59<00:47,  6.70it/s]

The size of tensor a (107) must match the size of tensor b (77) at non-singleton dimension 1


 79%|██████████████████████████████████▋         | 1127/1431 [03:01<00:46,  6.56it/s]

The size of tensor a (104) must match the size of tensor b (77) at non-singleton dimension 1


 79%|██████████████████████████████████▊         | 1131/1431 [03:01<00:39,  7.58it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (126) must match the size of tensor b (77) at non-singleton dimension 1


 79%|██████████████████████████████████▉         | 1137/1431 [03:02<00:43,  6.82it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


 80%|███████████████████████████████████         | 1140/1431 [03:02<00:41,  6.94it/s]

The size of tensor a (84) must match the size of tensor b (77) at non-singleton dimension 1


 82%|███████████████████████████████████▉        | 1169/1431 [03:07<00:35,  7.45it/s]

The size of tensor a (94) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


 82%|████████████████████████████████████        | 1173/1431 [03:08<00:36,  7.04it/s]

The size of tensor a (91) must match the size of tensor b (77) at non-singleton dimension 1


 82%|████████████████████████████████████▏       | 1176/1431 [03:08<00:36,  7.06it/s]

The size of tensor a (80) must match the size of tensor b (77) at non-singleton dimension 1


 83%|████████████████████████████████████▍       | 1185/1431 [03:10<00:36,  6.65it/s]

The size of tensor a (85) must match the size of tensor b (77) at non-singleton dimension 1


 84%|█████████████████████████████████████▏      | 1208/1431 [03:13<00:33,  6.57it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 85%|█████████████████████████████████████▍      | 1216/1431 [03:15<00:32,  6.57it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 86%|█████████████████████████████████████▋      | 1227/1431 [03:17<00:31,  6.51it/s]

The size of tensor a (89) must match the size of tensor b (77) at non-singleton dimension 1


 86%|█████████████████████████████████████▉      | 1233/1431 [03:18<00:29,  6.65it/s]

The size of tensor a (108) must match the size of tensor b (77) at non-singleton dimension 1


 87%|██████████████████████████████████████      | 1238/1431 [03:18<00:28,  6.69it/s]

The size of tensor a (109) must match the size of tensor b (77) at non-singleton dimension 1


 87%|██████████████████████████████████████▏     | 1241/1431 [03:19<00:27,  6.89it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 87%|██████████████████████████████████████▎     | 1246/1431 [03:19<00:24,  7.61it/s]

The size of tensor a (101) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (102) must match the size of tensor b (77) at non-singleton dimension 1


 87%|██████████████████████████████████████▍     | 1249/1431 [03:20<00:24,  7.31it/s]

The size of tensor a (94) must match the size of tensor b (77) at non-singleton dimension 1


 87%|██████████████████████████████████████▍     | 1251/1431 [03:20<00:24,  7.38it/s]

The size of tensor a (236) must match the size of tensor b (77) at non-singleton dimension 1


 88%|██████████████████████████████████████▌     | 1255/1431 [03:21<00:24,  7.08it/s]

The size of tensor a (99) must match the size of tensor b (77) at non-singleton dimension 1


 88%|██████████████████████████████████████▊     | 1262/1431 [03:22<00:25,  6.72it/s]

The size of tensor a (86) must match the size of tensor b (77) at non-singleton dimension 1


 89%|███████████████████████████████████████▏    | 1274/1431 [03:24<00:23,  6.54it/s]

The size of tensor a (117) must match the size of tensor b (77) at non-singleton dimension 1


 89%|███████████████████████████████████████▏    | 1276/1431 [03:24<00:22,  6.94it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 90%|███████████████████████████████████████▌    | 1285/1431 [03:26<00:22,  6.57it/s]

The size of tensor a (99) must match the size of tensor b (77) at non-singleton dimension 1


 90%|███████████████████████████████████████▌    | 1288/1431 [03:26<00:20,  6.84it/s]

The size of tensor a (88) must match the size of tensor b (77) at non-singleton dimension 1


 90%|███████████████████████████████████████▋    | 1291/1431 [03:26<00:20,  6.95it/s]

The size of tensor a (86) must match the size of tensor b (77) at non-singleton dimension 1


 90%|███████████████████████████████████████▊    | 1293/1431 [03:27<00:19,  7.17it/s]

The size of tensor a (136) must match the size of tensor b (77) at non-singleton dimension 1


 91%|████████████████████████████████████████▏   | 1309/1431 [03:29<00:18,  6.56it/s]

The size of tensor a (99) must match the size of tensor b (77) at non-singleton dimension 1


 92%|████████████████████████████████████████▍   | 1314/1431 [03:30<00:17,  6.76it/s]

The size of tensor a (109) must match the size of tensor b (77) at non-singleton dimension 1


 93%|████████████████████████████████████████▊   | 1329/1431 [03:33<00:15,  6.54it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1


 93%|████████████████████████████████████████▉   | 1332/1431 [03:33<00:14,  6.84it/s]

The size of tensor a (110) must match the size of tensor b (77) at non-singleton dimension 1


 95%|█████████████████████████████████████████▊  | 1359/1431 [03:37<00:11,  6.50it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1


 96%|██████████████████████████████████████████▎ | 1376/1431 [03:40<00:08,  6.56it/s]

The size of tensor a (117) must match the size of tensor b (77) at non-singleton dimension 1


 97%|██████████████████████████████████████████▍ | 1381/1431 [03:41<00:07,  6.66it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 97%|██████████████████████████████████████████▊ | 1394/1431 [03:43<00:05,  6.57it/s]

The size of tensor a (86) must match the size of tensor b (77) at non-singleton dimension 1


 98%|███████████████████████████████████████████ | 1401/1431 [03:44<00:04,  6.64it/s]

The size of tensor a (90) must match the size of tensor b (77) at non-singleton dimension 1


 99%|███████████████████████████████████████████▎| 1410/1431 [03:46<00:03,  6.62it/s]

The size of tensor a (78) must match the size of tensor b (77) at non-singleton dimension 1


 99%|███████████████████████████████████████████▍| 1412/1431 [03:46<00:02,  6.97it/s]

The size of tensor a (79) must match the size of tensor b (77) at non-singleton dimension 1


100%|███████████████████████████████████████████▊| 1424/1431 [03:48<00:01,  6.62it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1


100%|███████████████████████████████████████████▊| 1426/1431 [03:48<00:00,  6.98it/s]

The size of tensor a (83) must match the size of tensor b (77) at non-singleton dimension 1


100%|███████████████████████████████████████████▉| 1430/1431 [03:49<00:00,  7.72it/s]

The size of tensor a (82) must match the size of tensor b (77) at non-singleton dimension 1
The size of tensor a (84) must match the size of tensor b (77) at non-singleton dimension 1


100%|████████████████████████████████████████████| 1431/1431 [03:49<00:00,  6.24it/s]


In [10]:
scored_df = pd.concat((df, pd.DataFrame(data={"clip_score": clip_scores})), axis=1)

In [11]:
scored_df = scored_df.dropna()

In [13]:
scored_df["prompt"] = scored_df["prompt"].astype("str")

In [20]:
scored_df.to_json("sd_dataset_scored.json", orient="records")

# Step 3: Precompute CLIP embeddings for the image text pairs

In [51]:
from tqdm import tqdm
import pandas as pd
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

In [41]:
# Load data
df = pd.read_json("sd_dataset_scored.json")

In [42]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14", output_hidden_states=True).to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


In [52]:
clip_image_embeddings = []
clip_text_embeddings = []
for idx, row in tqdm(df.iterrows(), total=len(df)):
    with Image.open(row["image"]) as img:
        inputs = processor(text=[row["prompt"]], images=[img], return_tensors="pt", padding=True).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            image_embedding = outputs["image_embeds"].detach().cpu().numpy()[0]
            text_embedding = outputs["text_embeds"].detach().cpu().numpy()[0]
    clip_image_embeddings.append(image_embedding)
    clip_text_embeddings.append(text_embedding)

100%|███████████████████████████████████████| 1250/1250 [01:31<00:00, 13.70it/s]


In [53]:
df["clip_text_embedding"] = [e.tolist() for e in clip_text_embeddingsxt_embeddings]
df["clip_image_embedding"] = [e.tolist() for e in clip_image_embeddings]

In [54]:
df.to_json("sd_dataset_scored_embedded.json")

# Step 4: Selection of especially appealing images according to CLIP

In [1]:
import pandas as pd
import numpy as np
from sliceguard import SliceGuard
from renumics import spotlight
from renumics.spotlight import Image, Embedding

In [2]:
df = pd.read_json("sd_dataset_scored_embedded.json")

In [7]:
clip_text_embeddings = np.vstack(df["clip_text_embedding"])
clip_image_embeddings = np.vstack(df["clip_image_embedding"])

## Global selection

In [9]:
def return_precomputed_metric(y, y_pred):
    return y.mean(0)

In [27]:
sg = SliceGuard()
sg.find_issues(df, ["clip_image_embedding"],
               "clip_score",
               "clip_score",
               return_precomputed_metric,
               metric_mode="min",
               min_support=2,
               min_drop=6.0,
              precomputed_embeddings={"clip_image_embedding": clip_image_embeddings})

# Note: There is no explicit interface for using a precomputed metric, however just supply the metric column
# for y and y_pred and return y mean in your metric function

The overall metric value is 31.099600642394634
Using 2 as minimum support for determining problematic clusters.
Using 6.0 as minimum drop for determining problematic clusters.
Identified 1 problematic slices.


Unnamed: 0,issue,issue_metric,issue_explanation
0,-1,,
1,-1,,
2,-1,,
3,-1,,
4,-1,,
...,...,...,...
1245,-1,,
1246,-1,,
1247,-1,,
1248,-1,,


In [28]:
sg.report(spotlight_dtype={"image": Image, "clip_image_embedding": Embedding})

Unnamed: 0,image,prompt,clip_score,clip_text_embedding,clip_image_embedding,issue,issue_metric,issue_explanation,sg_emb_clip_image_embedding
0,images/56477cc5-64d0-4ba6-971a-72c719460235.png,"steampunk market interior, colorful, 3 d scene...",26.963913,"[-0.039592452300000004, -0.0147153912, 0.02259...","[0.042762961200000005, -0.0210433938, 0.045792...",-1,,,"[0.042762961200000005, -0.0210433938, 0.045792..."
1,images/bd09826a-796e-4142-862d-1dacdeb3c4f5.png,"“A portrait of a cyborg in a golden suit, D&D ...",29.205557,"[-0.0020829607000000003, 0.0192637891, -0.0362...","[0.018390106, -0.0414507277, 0.0406149887, -0....",-1,,,"[0.018390106, -0.0414507277, 0.0406149887, -0...."
2,images/8fe45789-ed16-4260-bdad-048485ab90e4.png,A full portrait of a beautiful post apocalypti...,30.058372,"[0.0062153996, 0.021728647900000002, -0.046727...","[0.0483528748, -0.050396055, 0.034902714200000...",-1,,,"[0.0483528748, -0.050396055, 0.034902714200000..."
3,images/aa0ac4e2-38f2-4ce7-8a2c-69890b3f7173.png,"beautiful victorian raven digital painting, ar...",33.178711,"[0.0280955508, -0.0245218761, -0.0008252721000...","[0.0324550271, -0.0018246276, 0.0379409231, -0...",-1,,,"[0.0324550271, -0.0018246276, 0.0379409231, -0..."
4,images/fc8fdc82-9bd2-4015-b434-b6958f9f0d80.png,"ilya kuvshinov with long sky blue hair, gold e...",31.954140,"[0.032235581400000005, -0.0244684052, -0.04714...","[0.0431220941, -0.0332533978, 0.02237399850000...",-1,,,"[0.0431220941, -0.0332533978, 0.02237399850000..."
...,...,...,...,...,...,...,...,...,...
1245,images/9e3b26ab-b737-472e-815f-7c13389dd2f0.png,amazing lifelike award winning pencil illustra...,32.023315,"[-0.0161065459, 0.0105979368, 0.040526025, -0....","[0.0670162812, 0.02718102, 0.0588702112, -0.04...",-1,,,"[0.0670162812, 0.02718102, 0.0588702112, -0.04..."
1246,images/4c3264ab-da87-4ff5-93fb-0a12bb5ed64e.png,A castle made out of white stone burning from ...,26.541388,"[-0.0453475416, 0.0172001105, -0.0454900749000...","[-0.0132969152, -0.0348297916, -0.0372253507, ...",-1,,,"[-0.0132969152, -0.0348297916, -0.0372253507, ..."
1247,images/bace5279-aba0-499c-8c78-f3dbaeae50f8.png,"You keep on creepin', let the night be scary",23.000858,"[-0.024687962600000002, 0.018290622200000002, ...","[0.0080016209, 0.0243713874, 0.0124144787, -0....",-1,,,"[0.0080016209, 0.0243713874, 0.0124144787, -0...."
1248,images/3d58bdd1-f6e3-455a-b0ee-0db53b14a4f1.png,"photo, young female meth producer, meth lab, c...",24.982887,"[0.0028408652, 0.0373360664, -0.00086677050000...","[0.0246292036, 0.054845836, 0.0172184352, -0.0...",-1,,,"[0.0246292036, 0.054845836, 0.0172184352, -0.0..."


## Category-wise, adaptive selection

In [30]:
from hnne import HNNE

In [31]:
hnne = HNNE(metric="euclidean")

**Visualization idea: Cluster map with clip score and images, animated over best slices probably stratified according to clustering**

# Detect Biases in Stable Diffusion and CLIP Score Metric