## Generate images for evaluation

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

import sys
utils_path = os.path.abspath(os.path.join('../..'))
sys.path.append(utils_path)

import torch
from diffusers import StableDiffusionPipeline
from utils.dataset_info import get_subjects_prompts_info

output_path = "../../outputs/subjects/textual_inversion"
embedding_folds = os.listdir("../../logs/subjects/textual_inversion/")

dataset_info_path = "../../pcs_dataset/info.json"

prompts_info, prompts = get_subjects_prompts_info(dataset_info_path)

for subject in embedding_folds:
    if subject not in os.listdir(output_path):
        try:
            # load sd model
            pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
            pipeline.load_textual_inversion(f"../../logs/subjects/textual_inversion/{subject}")

            for idx, prompt in enumerate(prompts[prompts_info[subject]["prompt_type"]]):
                prompt = prompt.replace("{0} {1}", "<new1>")

                image = pipeline(prompt, num_inference_steps=50, num_images_per_prompt=4).images

                save_path = os.path.join(output_path, subject, f"prompt{idx}")
                os.makedirs(save_path, exist_ok=True)

                for img_idx, img in enumerate(image):
                    img.save(os.path.join(save_path, f"{img_idx:04d}.jpg"))
            
            print(f"Finished textual inversion in subject: {subject}!")
        except:
            print("Error in Subject: ", subject)


Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00,  7.01it/s]
100%|██████████| 50/50 [00:13<00:00,  3.75it/s]
100%|██████████| 50/50 [00:13<00:00,  3.84it/s]
100%|██████████| 50/50 [00:13<00:00,  3.84it/s]
100%|██████████| 50/50 [00:13<00:00,  3.84it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:13<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:12<00:00,  3.85it/s]
100%|██████████| 50/50 [00:06<00:00,  8.30it/s]
100%|██████████| 50/50 [00:06<00:00,  8.30it/s]
100%|██████████| 50/50 [00:06<00:00,  8.31it/s]
100%|█████

Finished textual inversion in subject: clock2!


## Evaluation

Calculate img to img similarity and text to img similarity by CLIP Evaluator

In [3]:
import os, sys, json

os.environ["CUDA_VISIBLE_DEVICES"] = "6"

utils_path = os.path.abspath(os.path.join('../..'))
sys.path.append(utils_path)

import numpy as np
from utils.clip_eval import CLIPEvaluator, evaluate_i2i, evaluate_t2i
import re
from utils.dataset_info import get_subjects_prompts_info

In [4]:
def extract_id(folder_name):
    match = re.search(r'\d+', folder_name)
    if match:
        return int(match.group())
    return None

def convert_to_native(data):
    if isinstance(data, np.ndarray):
        return data.tolist()
    elif isinstance(data, np.generic):
        return data.item()
    elif isinstance(data, dict):
        return {key: convert_to_native(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [convert_to_native(item) for item in data]
    else:
        return data

In [5]:
outputs_path = "../../outputs/subjects/textual_inversion"
eval_res_path = "../../eval_results/subjects/textual_inversion"
dataset_path = "../../pcs_dataset/subjects"
dataset_info_path = "../../pcs_dataset/info.json"

prompts_info, prompts = get_subjects_prompts_info(dataset_info_path)

subjects_list = os.listdir(outputs_path)

evaluation_res = dict()
for subject in subjects_list:
    print("process subject: ", subject)

    res_for_each_subject =dict()

    for sub_dir in os.listdir(os.path.join(outputs_path, subject)):
        print("prompt: ", sub_dir)

        prompt = prompts[prompts_info[subject]["prompt_type"]][extract_id(sub_dir)]
        res_for_each_prompt =dict()

        for generate_img_name in os.listdir(os.path.join(outputs_path, subject, sub_dir)):
            generate_img_path = os.path.join(outputs_path, subject, sub_dir, generate_img_name)
            res_for_each_prompt[generate_img_name] = [evaluate_i2i(generate_img_path, os.path.join(dataset_path, subject)), evaluate_t2i(generate_img_path, prompt)]
        
        res_for_each_subject[prompt] = res_for_each_prompt

        print(res_for_each_prompt)

    evaluation_res[subject] = res_for_each_subject

os.makedirs(eval_res_path, exist_ok=True)

with open(os.path.join(eval_res_path, "evaluation_results.json"), "w") as json_file:
    json.dump(convert_to_native(evaluation_res), json_file, indent=4)


process subject:  chair
prompt:  prompt13
{'0003.jpg': [0.869, 0.152], '0000.jpg': [0.8896, 0.1621], '0002.jpg': [0.8857, 0.1533], '0001.jpg': [0.8604, 0.1447]}
prompt:  prompt8
{'0003.jpg': [0.8374, 0.183], '0000.jpg': [0.8364, 0.2118], '0002.jpg': [0.861, 0.2212], '0001.jpg': [0.833, 0.2032]}
prompt:  prompt17
{'0003.jpg': [0.628, 0.2076], '0000.jpg': [0.695, 0.2214], '0002.jpg': [0.7305, 0.1948], '0001.jpg': [0.666, 0.2406]}
prompt:  prompt19
{'0003.jpg': [0.894, 0.21], '0000.jpg': [0.891, 0.2078], '0002.jpg': [0.758, 0.236], '0001.jpg': [0.9043, 0.2068]}
prompt:  prompt10
{'0003.jpg': [0.8105, 0.1604], '0000.jpg': [0.818, 0.1478], '0002.jpg': [0.7983, 0.1675], '0001.jpg': [0.7314, 0.1865]}
prompt:  prompt2
{'0003.jpg': [0.85, 0.1384], '0000.jpg': [0.9062, 0.1444], '0002.jpg': [0.8594, 0.1455], '0001.jpg': [0.764, 0.21]}
prompt:  prompt15
{'0003.jpg': [0.8027, 0.1891], '0000.jpg': [0.7227, 0.2384], '0002.jpg': [0.7783, 0.1842], '0001.jpg': [0.8154, 0.2056]}
prompt:  prompt22
{'0003.