# CLIP Interrogator for datasets by [@seedmanc](https://github.com/seedmanc)

Want to figure out what prompt describes your dataset as a whole? Or pick a prompt opposite to it to test your lora's performance in out of distribution situations? The CLIP Interrogator is here to get you answers!

<br>

For Stable Diffusion 1.X choose the **ViT-L** model, for Stable Diffusion 2.0+ choose the **ViT-H** CLIP Model, SDXL needs either L or G versions, but L is faster. You can blank out the captioning model to only use CLIP, otherwise the top 2 most matching captions will be prepended to its output.

This version is specialized for producing nice prompts for use with Stable Diffusion and achieves higher alignment between generated text prompt and source image.

<br>

If this notebook is helpful to you consider following me on [xitter](https://x.com/seedmanc) for more cool Ai stuff. üôÇ

And if you're looking for more AI art tools check out the [AI generative art tools list](https://pharmapsychotic.com/tools.html).


In [None]:
#@title Setup
!git clone https://github.com/seedmanc/clip-interrogator-average.git
!pip install open_clip_torch


In [None]:
# @title
caption_model_name = 'blip2-2.7b' #@param ["blip-base", "blip-large", "blip2-2.7b", "blip2-flan-t5-xl", ""]
clip_model_name = 'ViT-L-14/openai' #@param ["ViT-L-14/openai", "ViT-H-14/laion2b_s32b_b79k", "ViT-g-14/laion2B-s34B-b88K"]

import sys
from PIL import Image

# Add the subdirectory to sys.path
sys.path.append('clip-interrogator-average')
from clip_interrogator import Config, Interrogator

config = Config()
config.clip_model_name = clip_model_name
config.caption_model_name = caption_model_name

from huggingface_hub import hf_hub_download
entries = ['artists','flavors']
for entry in entries:
  try:
    hf_hub_download(
        repo_id="seedmanc/clip-interrogator-cache",
        filename=f"{clip_model_name.replace('/', '_').replace('@', '_')}_{entry}.safetensors",
        local_dir="clip-interrogator-average/cache",
    )
  except:
    print(f'No {entry} cache found for {clip_model_name}')

ci = Interrogator(config)

def _tb(txt,scr):
  info = f"Score: {scr:.3f}" if scr is not None else None
  return gr.Textbox(value=txt, info=info)

def image_to_prompt(files, mode):
  if files is None or len(files) == 0:
    yield _tb('',None), _tb('',None), _tb('',None), None
    return

  images = [Image.open(i).convert('RGB') for i in files]
  ci.config.chunk_size = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
  ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
  res = ci.interrogate(images)
  prompt, score, imge = None, None, None
  for value in res:
    prompt, score, imge = value
    yield _tb(prompt, score), None, None, imge

  orth, score2 = ci.interrogate_orthogonal_fast(images)
  yield _tb(prompt, score), _tb(orth, score2), None, imge
  neg, score3 = ci.interrogate_negative(images)
  yield _tb(prompt, score), _tb(orth, score2), _tb(neg, score3), imge

In [None]:
#@title Images to prompt! üñºÔ∏èüñºÔ∏èüñºÔ∏è -> üìù
import gradio as gr

def prompt_tab():
    with gr.Column():
        with gr.Row():
            images = gr.Files(label="Image",file_types=["image",".webp"])
            with gr.Column():
                prompt = gr.Textbox(label="Prompt",lines=3, show_copy_button=True)
                orthprompt = gr.Textbox(label="Neutral",lines=3)
                negprompt = gr.Textbox(label="Negative",lines=2)
                img = gr.Image(label="Most representative image")
    images.change(image_to_prompt, images, [prompt,orthprompt,negprompt,img])

with gr.Blocks(fill_height=True) as ui:
    prompt_tab()

ui.launch( debug=True,show_error=True)