In [1]:
!pip install faiss-gpu-cu11 --index-url https://pypi.org/simple


Collecting faiss-gpu-cu11
  Downloading faiss_gpu_cu11-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting numpy<2 (from faiss-gpu-cu11)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-runtime-cu11>=11.8.89 (from faiss-gpu-cu11)
  Downloading nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cublas-cu11>=11.11.3.6 (from faiss-gpu-cu11)
  Downloading nvidia_cublas_cu11-11.11.3.6-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Downloading faiss_gpu_cu11-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (48.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp312-cp312-manylinux

In [1]:
import faiss
print("FAISS version:", faiss.__version__)
print("GPU available:", faiss.get_num_gpus())


import faiss
print(faiss.get_num_gpus())  # should be >=1


FAISS version: 1.12.0
GPU available: 1
1


In [2]:
!pip install -U transformers==4.44.2
!pip install -U sentence-transformers==2.7.0


Collecting transformers==4.44.2
  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.20,>=0.19 (from transformers==4.44.2)
  Downloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.44.2-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
  

In [3]:
# ===================================
# Imports
# ===================================
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
import faiss
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
from google.colab import drive

# ===================================
# Mount Drive
# ===================================
drive.mount('/content/drive', force_remount=True)

# ===================================
# Setup
# ===================================
device = "cuda" if torch.cuda.is_available() else "cpu"
print("⚡ Using device:", device)

base_path = "/content/drive/MyDrive/MediaEval/newsimages_25_v1.1/"
results_dir = os.path.join(base_path, "results_local/")
os.makedirs(results_dir, exist_ok=True)
generated_dir = os.path.join(results_dir, "generated_images/")
os.makedirs(generated_dir, exist_ok=True)

TOP_K = 5  # Number of images to retrieve per article

# ===================================
# Load datasets
# ===================================
subset_columns = ["article_id", "article_url", "article_title", "article_tags", "image_id", "image_url"]

df_full = pd.read_csv(os.path.join(base_path, "newsarticles.csv"))
df_subset = pd.read_csv(os.path.join(base_path, "subset.csv"), header=None, names=subset_columns)

print("✅ Loaded datasets")
print("Full dataset shape:", df_full.shape)
print("Subset dataset shape:", df_subset.shape)

# ===================================
# Prepare text for embeddings
# ===================================
df_full["text"] = df_full["article_title"].fillna("") + ". " + df_full["article_tags"].fillna("")

# ===================================
# Load SentenceTransformer
# ===================================
model = SentenceTransformer("all-MiniLM-L6-v2", device=device)

# ===================================
# Generate embeddings
# ===================================
print("⚡ Generating embeddings for full dataset...")
text_embeddings = model.encode(
    df_full["text"].tolist(),
    batch_size=64,
    show_progress_bar=True,
    convert_to_numpy=True,
    device=device
)
text_embeddings = text_embeddings / np.linalg.norm(text_embeddings, axis=1, keepdims=True)

# ===================================
# Build FAISS index (GPU if available)
# ===================================
d = text_embeddings.shape[1]
cpu_index = faiss.IndexFlatIP(d)

if device == "cuda":
    res = faiss.StandardGpuResources()
    index = faiss.index_cpu_to_gpu(res, 0, cpu_index)
else:
    index = cpu_index

index.add(text_embeddings)
print("✅ FAISS index built with:", index.ntotal, "articles (on", device, ")")

# ===================================
# Retrieval function
# ===================================
def retrieve_top_k(df_queries, top_k=TOP_K):
    results = []
    for i, row in tqdm(df_queries.iterrows(), total=len(df_queries), desc="Retrieving"):
        query_text = str(row["article_title"]) + ". " + str(row["article_tags"])
        q_emb = model.encode([query_text], convert_to_numpy=True, device=device)
        q_emb = q_emb / np.linalg.norm(q_emb, axis=1, keepdims=True)
        D, I = index.search(q_emb, top_k)
        for rank, idx in enumerate(I[0]):
            retrieved = df_full.iloc[idx]
            results.append({
                "query_article_id": row["article_id"],
                "query_title": row["article_title"],
                "retrieved_article_id": retrieved["article_id"],
                "retrieved_article_title": retrieved["article_title"],
                "retrieved_image_id": retrieved["image_id"],
                "retrieved_image_url": retrieved["image_url"],
                "score": float(D[0][rank]),
                "rank": rank + 1
            })
    return pd.DataFrame(results)

# ===================================
# Task 1a: Retrieve for subset
# ===================================
print("⚡ Retrieving images for subset...")
df_subset_results = retrieve_top_k(df_subset, top_k=TOP_K)
subset_out = os.path.join(results_dir, "task1_subset_results.csv")
df_subset_results.to_csv(subset_out, index=False)
print("✅ Subset retrieval results saved:", subset_out)

# ===================================
# Task 1b: Retrieve for full dataset
# ===================================
print("⚡ Retrieving images for full dataset...")
df_full_results = retrieve_top_k(df_full, top_k=TOP_K)
full_out = os.path.join(results_dir, "task1_full_dataset_results.csv")
df_full_results.to_csv(full_out, index=False)
print("✅ Full dataset retrieval results saved:", full_out)



Mounted at /content/drive
⚡ Using device: cuda
✅ Loaded datasets
Full dataset shape: (8500, 6)
Subset dataset shape: (30, 6)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

⚡ Generating embeddings for full dataset...


Batches:   0%|          | 0/133 [00:00<?, ?it/s]

✅ FAISS index built with: 8500 articles (on cuda )
⚡ Retrieving images for subset...


Retrieving: 100%|██████████| 30/30 [00:00<00:00, 78.07it/s]


✅ Subset retrieval results saved: /content/drive/MyDrive/MediaEval/newsimages_25_v1.1/results_local/task1_subset_results.csv
⚡ Retrieving images for full dataset...


Retrieving: 100%|██████████| 8500/8500 [01:05<00:00, 129.59it/s]


✅ Full dataset retrieval results saved: /content/drive/MyDrive/MediaEval/newsimages_25_v1.1/results_local/task1_full_dataset_results.csv


In [None]:
# ===================================
# Task 2: Stable Diffusion (GPU optimized)
# ===================================
print("⚡ Setting up Stable Diffusion for local generation...")
model_name = "runwayml/stable-diffusion-v1-5"
scheduler = EulerDiscreteScheduler.from_pretrained(model_name, subfolder="scheduler")

pipe = StableDiffusionPipeline.from_pretrained(
    model_name,
    scheduler=scheduler,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
)
pipe = pipe.to(device)

if device == "cuda":
    pipe.enable_attention_slicing()

def generate_image_local(prompt, save_path, num_inference_steps=30, guidance_scale=7.5):
    image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]
    image.save(save_path)

# ===================================
# Generate images for subset
# ===================================
generated_data = []
print("⚡ Generating images for subset locally...")
for i, row in tqdm(df_subset.iterrows(), total=len(df_subset), desc="Generating images"):
    prompt = f"{row['article_title']}. {row['article_tags']}. Illustration, non-photorealistic, news thumbnail."
    save_path = os.path.join(generated_dir, f"{row['article_id']}.png")
    try:
        generate_image_local(prompt, save_path)
        generated_data.append({
            "article_id": row["article_id"],
            "article_title": row["article_title"],
            "image_path": save_path,
            "prompt": prompt
        })
    except Exception as e:
        print(f"❌ Failed to generate image for article {row['article_id']}: {e}")

# Save metadata
df_generated = pd.DataFrame(generated_data)
gen_csv = os.path.join(generated_dir, "task2_generated_images_local.csv")
df_generated.to_csv(gen_csv, index=False)
print("✅ Generated images metadata saved:", gen_csv)

# ===============================
# Image Generation for FULL dataset
# ===============================
print("⚡ Generating images for full dataset...")

generated_data_full = []

for i, row in tqdm(df_full.iterrows(), total=len(df_full), desc="Generating full dataset"):
    prompt = f"{row['article_title']}. {row['article_tags']}. Illustration, non-photorealistic, news thumbnail."
    save_path = os.path.join(generated_dir, f"{row['article_id']}.png")

    try:
        generate_image_local(prompt, save_path)
        generated_data_full.append({
            "article_id": row["article_id"],
            "article_title": row["article_title"],
            "image_path": save_path,
            "prompt": prompt
        })
    except Exception as e:
        print(f"❌ Failed to generate image for article {row['article_id']}: {e}")

# Save metadata
df_generated_full = pd.DataFrame(generated_data_full)
gen_csv_full = os.path.join(generated_dir, "task2_generated_images_full.csv")
df_generated_full.to_csv(gen_csv_full, index=False)
print("✅ Generated FULL dataset images metadata saved:", gen_csv_full)



⚡ Setting up Stable Diffusion for local generation...


scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

safety_checker/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

⚡ Generating images for subset locally...


Generating images:   0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:   3%|▎         | 1/30 [00:07<03:47,  7.86s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:   7%|▋         | 2/30 [00:14<03:18,  7.10s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  10%|█         | 3/30 [00:20<03:03,  6.79s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  13%|█▎        | 4/30 [00:27<02:54,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  17%|█▋        | 5/30 [00:33<02:46,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  20%|██        | 6/30 [00:40<02:38,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  23%|██▎       | 7/30 [00:47<02:32,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  27%|██▋       | 8/30 [00:53<02:26,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  30%|███       | 9/30 [01:00<02:20,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  33%|███▎      | 10/30 [01:07<02:14,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  37%|███▋      | 11/30 [01:14<02:07,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  40%|████      | 12/30 [01:20<02:01,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  43%|████▎     | 13/30 [01:28<01:56,  6.86s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  47%|████▋     | 14/30 [01:34<01:48,  6.78s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  50%|█████     | 15/30 [01:41<01:41,  6.74s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  53%|█████▎    | 16/30 [01:47<01:33,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  57%|█████▋    | 17/30 [01:54<01:26,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  60%|██████    | 18/30 [02:01<01:19,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  63%|██████▎   | 19/30 [02:07<01:12,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  67%|██████▋   | 20/30 [02:14<01:06,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  70%|███████   | 21/30 [02:20<00:59,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  73%|███████▎  | 22/30 [02:27<00:53,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  77%|███████▋  | 23/30 [02:34<00:46,  6.65s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (125 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['of wisconsin ; college pulse ; university of florida ; california institute of technology ; florida state university ; university admissions directors ; university of southern california ; republican party ; johns hopkins university ; amherst college. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  80%|████████  | 24/30 [02:40<00:39,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  83%|████████▎ | 25/30 [02:47<00:33,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  87%|████████▋ | 26/30 [02:57<00:30,  7.51s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  90%|█████████ | 27/30 [03:03<00:21,  7.26s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  93%|█████████▎| 28/30 [03:10<00:14,  7.09s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images:  97%|█████████▋| 29/30 [03:17<00:06,  6.97s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating images: 100%|██████████| 30/30 [03:23<00:00,  6.79s/it]


✅ Generated images metadata saved: /content/drive/MyDrive/MediaEval/newsimages_25_v1.1/results_local/generated_images/task2_generated_images_local.csv
⚡ Generating images for full dataset...


Generating full dataset:   0%|          | 0/8500 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 1/8500 [00:06<15:39:52,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 2/8500 [00:13<15:43:23,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 3/8500 [00:19<15:41:54,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 4/8500 [00:34<23:07:22,  9.80s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 5/8500 [00:41<20:23:59,  8.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['international. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 6/8500 [00:47<18:45:35,  7.95s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 7/8500 [00:54<17:43:46,  7.52s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 8/8500 [01:01<17:06:54,  7.26s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['realistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 9/8500 [01:07<16:41:32,  7.08s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 10/8500 [01:14<16:28:59,  6.99s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 11/8500 [01:21<16:15:00,  6.89s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 12/8500 [01:27<16:02:33,  6.80s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 13/8500 [01:34<15:57:15,  6.77s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 14/8500 [01:41<15:55:14,  6.75s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   0%|          | 15/8500 [01:47<15:45:50,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 16/8500 [01:54<15:40:07,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['realistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 17/8500 [02:00<15:40:05,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 18/8500 [02:08<16:03:20,  6.81s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 19/8500 [02:14<15:57:36,  6.77s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 20/8500 [02:21<15:50:22,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 21/8500 [02:28<15:47:32,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 22/8500 [02:34<15:42:54,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 23/8500 [02:41<15:41:05,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 24/8500 [02:48<15:42:36,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 25/8500 [02:54<15:40:55,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['to gulf cooperation council ; strategic council ; economic partnership agreement. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 26/8500 [03:01<15:37:47,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 27/8500 [03:07<15:35:41,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 28/8500 [03:14<15:35:02,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 29/8500 [03:21<15:35:47,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 30/8500 [03:27<15:39:07,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 31/8500 [03:34<15:36:25,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 32/8500 [03:41<15:36:41,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 33/8500 [03:47<15:33:38,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 34/8500 [03:54<15:36:15,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 35/8500 [04:00<15:34:26,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 36/8500 [04:07<15:40:08,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   0%|          | 37/8500 [04:14<15:34:00,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 38/8500 [04:20<15:31:52,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 39/8500 [04:27<15:29:28,  6.59s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 40/8500 [04:33<15:29:37,  6.59s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 41/8500 [04:40<15:28:43,  6.59s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   0%|          | 42/8500 [04:47<15:29:40,  6.59s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['mirela kopjar | health canal ; research vegan brands ; vegan society. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 43/8500 [04:53<15:31:50,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 44/8500 [05:00<15:37:31,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['r hrekov clodagh kilcoyne ; michael square ; kyiv clodagh kilcoyne ; leo varadkar ; oleksandr hrekov ; pierre zakrzewski | lesya ukrainka national academic drama theatre. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 45/8500 [05:07<15:38:21,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 46/8500 [05:13<15:37:22,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 47/8500 [05:20<15:35:49,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 48/8500 [05:27<15:35:51,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 49/8500 [05:33<15:33:26,  6.63s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 50/8500 [05:40<15:31:47,  6.62s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['of justice ; pediatric complex care program ; florida agency for health care administration ; florida department of health ; united states district court ; civil rights division. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 51/8500 [05:46<15:35:06,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 52/8500 [05:53<15:40:00,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 53/8500 [06:00<15:36:09,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 54/8500 [06:07<15:37:38,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 55/8500 [06:13<15:35:56,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 56/8500 [06:20<15:40:15,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 57/8500 [06:27<15:41:05,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 58/8500 [06:33<15:39:43,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 59/8500 [06:40<15:40:00,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 60/8500 [06:47<15:39:00,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 61/8500 [06:53<15:44:59,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 62/8500 [07:00<15:42:54,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 63/8500 [07:07<15:38:28,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 64/8500 [07:13<15:41:26,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 65/8500 [07:20<15:36:27,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 66/8500 [07:27<15:39:37,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 67/8500 [07:33<15:39:30,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 68/8500 [07:40<15:39:23,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 69/8500 [07:47<15:39:33,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 70/8500 [07:53<15:38:37,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   1%|          | 71/8500 [08:00<15:35:27,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 72/8500 [08:07<15:39:22,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 73/8500 [08:14<15:37:52,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 74/8500 [08:20<15:41:11,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 75/8500 [08:27<15:38:43,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 76/8500 [08:33<15:27:05,  6.60s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 77/8500 [08:40<15:28:27,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 78/8500 [08:47<15:35:06,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 79/8500 [08:53<15:34:23,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 80/8500 [09:00<15:36:12,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 81/8500 [09:07<15:36:03,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 82/8500 [09:13<15:33:27,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 83/8500 [09:20<15:30:46,  6.63s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['netflix ; alliance of motion ; television producers ; screen actors guild. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 84/8500 [09:27<15:32:13,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 85/8500 [09:33<15:32:11,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 86/8500 [09:40<15:30:04,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 87/8500 [09:46<15:28:13,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 88/8500 [09:53<15:30:03,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 89/8500 [10:00<15:36:45,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 90/8500 [10:07<15:35:26,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 91/8500 [10:13<15:32:39,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 92/8500 [10:20<15:31:38,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 93/8500 [10:26<15:31:36,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 94/8500 [10:33<15:29:29,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 95/8500 [10:40<15:31:08,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 96/8500 [10:46<15:31:47,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 97/8500 [10:53<15:28:42,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 98/8500 [11:00<15:29:15,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 99/8500 [11:06<15:29:21,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 100/8500 [11:13<15:29:38,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 101/8500 [11:20<15:27:16,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 102/8500 [11:26<15:28:38,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 103/8500 [11:33<15:25:26,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 104/8500 [11:39<15:24:55,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 105/8500 [11:46<15:26:15,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|          | 106/8500 [11:53<15:26:07,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 107/8500 [11:59<15:28:45,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 108/8500 [12:06<15:29:51,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['instagram. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 109/8500 [12:13<15:31:37,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 110/8500 [12:19<15:29:41,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 111/8500 [12:26<15:34:42,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 112/8500 [12:33<15:30:05,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 113/8500 [12:40<15:41:57,  6.74s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 114/8500 [12:46<15:36:43,  6.70s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 115/8500 [12:53<15:31:31,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 116/8500 [12:59<15:31:49,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 117/8500 [13:06<15:35:24,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 118/8500 [13:13<15:34:11,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 119/8500 [13:20<15:35:13,  6.70s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['nes university ; pope county high school ; california state university ; school of physics ; university of illinois ; education research center ; langara college ; dynamic eclipse broadcast initiative ; national solar observatory ; united states. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 120/8500 [13:26<15:32:58,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 121/8500 [13:33<15:32:31,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 122/8500 [13:40<15:31:50,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 123/8500 [13:46<15:29:22,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; supreme court ; temple university beasley school of law ; carl hessler jr medianews group ; appellate court procedural rules committee ; district court ; youth court. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 124/8500 [13:53<15:28:12,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['trader joe. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 125/8500 [14:00<15:30:11,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 126/8500 [14:06<15:28:36,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   1%|▏         | 127/8500 [14:13<15:29:24,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 128/8500 [14:19<15:28:51,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 129/8500 [14:26<15:29:31,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 130/8500 [14:33<15:27:32,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 131/8500 [14:39<15:29:47,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 132/8500 [14:46<15:29:47,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 133/8500 [14:53<15:31:52,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 134/8500 [15:00<15:33:28,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 135/8500 [15:06<15:30:22,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 136/8500 [15:13<15:29:39,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 137/8500 [15:19<15:26:13,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 138/8500 [15:26<15:26:36,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 139/8500 [15:33<15:28:28,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 140/8500 [15:39<15:26:18,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 141/8500 [15:46<15:25:48,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 142/8500 [15:53<15:31:43,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 143/8500 [15:59<15:29:51,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 144/8500 [16:06<15:27:11,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 145/8500 [16:13<15:27:11,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 146/8500 [16:19<15:25:21,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 147/8500 [16:26<15:24:35,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 148/8500 [16:33<15:23:55,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 149/8500 [16:39<15:25:01,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 150/8500 [16:46<15:29:44,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 151/8500 [16:53<15:23:51,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 152/8500 [16:59<15:23:48,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['realistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 153/8500 [17:06<15:21:21,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 154/8500 [17:12<15:21:43,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 155/8500 [17:19<15:24:53,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 156/8500 [17:26<15:24:46,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 157/8500 [17:33<15:29:39,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 158/8500 [17:39<15:26:53,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 159/8500 [17:46<15:25:03,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 160/8500 [17:52<15:21:37,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 161/8500 [17:59<15:23:20,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['kitty oppenheimer emily ; hoyte van hoytema ; kitty oppenheimer ; benny safdie ; downey chaplin ; frank dylan arnold ; christopher nolan ; jean tatlock florence pugh ; tom conti | triumph ; u s atomic energy commission. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 162/8500 [18:06<15:21:32,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 163/8500 [18:12<15:20:33,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 164/8500 [18:19<15:21:00,  6.63s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; central brant women probus club ; fairview community centre charity garage sale ; heritage united church ; day program ; woodman seniors group ; torchmen quartet at burtch baptist church ; beckett recreation centre ; pleasant united church ; river council on ; river women probus club ; sandy heidi macgregor at burtch baptist church ; autism dog services ; cowan community health hub ; burtch baptist church ; paris centre presbyterian church ; adult recreation therapy centre ; watchmen at burtch baptist church ; paris legion branch ; faith lutheran church ; navy club. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 165/8500 [18:26<15:20:04,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 166/8500 [18:32<15:20:12,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 167/8500 [18:39<15:19:58,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 168/8500 [18:45<15:20:24,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 169/8500 [18:52<15:23:04,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 170/8500 [18:59<15:24:17,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 171/8500 [19:05<15:22:36,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 172/8500 [19:12<15:20:47,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 173/8500 [19:19<15:26:15,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['marine audi ; united salad company ; luck services ; portland international raceway ; national auto sport association ; national champion. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 174/8500 [19:25<15:23:44,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 175/8500 [19:32<15:21:52,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 176/8500 [19:39<15:23:08,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 177/8500 [19:45<15:24:45,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 178/8500 [19:52<15:32:20,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 179/8500 [19:59<15:30:45,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 180/8500 [20:06<15:28:09,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 181/8500 [20:12<15:25:45,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 182/8500 [20:19<15:27:35,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 183/8500 [20:26<15:23:59,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 184/8500 [20:32<15:23:17,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 185/8500 [20:39<15:23:37,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 186/8500 [20:45<15:20:52,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 187/8500 [20:52<15:21:12,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 188/8500 [20:59<15:22:12,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 189/8500 [21:06<15:23:54,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 190/8500 [21:12<15:21:40,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 191/8500 [21:19<15:19:51,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 192/8500 [21:25<15:19:37,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 193/8500 [21:32<15:16:24,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 194/8500 [21:39<15:16:37,  6.62s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['science information ; energy networks association. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 195/8500 [21:45<15:15:04,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 196/8500 [21:52<15:12:52,  6.60s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 197/8500 [21:58<15:13:27,  6.60s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['city council ; martin library. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 198/8500 [22:05<15:15:38,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 199/8500 [22:12<15:15:34,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 200/8500 [22:18<15:17:57,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 201/8500 [22:25<15:19:43,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 202/8500 [22:32<15:23:28,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 203/8500 [22:38<15:20:53,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 204/8500 [22:45<15:26:55,  6.70s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['police. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 205/8500 [22:52<15:25:39,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 206/8500 [22:59<15:26:18,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 207/8500 [23:05<15:22:43,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['u s senate ; republican governors association ; white house. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 208/8500 [23:12<15:23:57,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 209/8500 [23:19<15:24:30,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 210/8500 [23:25<15:26:32,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 211/8500 [23:32<15:28:14,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   2%|▏         | 212/8500 [23:39<15:26:16,  6.71s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['wealth management financial services inc ; borden ladner gervais ; company advisory ; manulife financial corp ; arch capital inc ; baker tilly canada ; fortis inc ; silicon valley bank ; royal bank ; national bank financial ; harbourfront wealth management inc ; capital power corp ; dominion bank. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 213/8500 [23:45<15:26:33,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 214/8500 [23:52<15:25:25,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 215/8500 [23:59<15:24:27,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 216/8500 [24:05<15:22:31,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 217/8500 [24:12<15:20:44,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 218/8500 [24:19<15:19:46,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 219/8500 [24:26<15:25:08,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 220/8500 [24:32<15:21:59,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   3%|▎         | 221/8500 [24:39<15:12:38,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 222/8500 [24:45<15:14:19,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 223/8500 [24:52<15:17:45,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['kyoji horiguchi ; chris cariaso ; nordine taleb ; randa markos ; thomas almeida ; fabio maldonado ; chris clements ; bryan barberena ; alexis davis ; patrick cote ; chad laprise | fox sports ; bell centre. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 224/8500 [24:59<15:22:10,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 225/8500 [25:05<15:20:22,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; turkish space agency ; economic partnership agreement ; ministry of investment ; space agency. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 226/8500 [25:12<15:27:36,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 227/8500 [25:19<15:27:28,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 228/8500 [25:26<15:26:34,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 229/8500 [25:32<15:22:11,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 230/8500 [25:39<15:18:42,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 231/8500 [25:46<15:20:59,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 232/8500 [25:52<15:18:14,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 233/8500 [25:59<15:16:51,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 234/8500 [26:06<15:14:40,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 235/8500 [26:12<15:13:03,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 236/8500 [26:19<15:12:31,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 237/8500 [26:25<15:12:24,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 238/8500 [26:32<15:02:07,  6.55s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 239/8500 [26:38<15:06:19,  6.58s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 240/8500 [26:45<15:10:32,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 241/8500 [26:52<15:08:46,  6.60s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 242/8500 [26:58<15:13:59,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 243/8500 [27:05<15:12:56,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 244/8500 [27:12<15:13:19,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 245/8500 [27:18<15:16:37,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 246/8500 [27:25<15:22:16,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 247/8500 [27:32<15:19:46,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 248/8500 [27:38<15:18:06,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 249/8500 [27:45<15:17:18,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['services ; david l moss criminal justice center ; office of fiscal transparency ; oklahoma legislature ; health sciences ; tulsa county district attorney office ; substance abuse services ; department of mental health ; oklahoma state university center. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 250/8500 [27:52<15:18:35,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 251/8500 [27:59<15:18:28,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 252/8500 [28:05<15:20:52,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 253/8500 [28:12<15:19:15,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 254/8500 [28:19<15:22:36,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 255/8500 [28:25<15:23:55,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 256/8500 [28:32<15:21:07,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 257/8500 [28:39<15:20:40,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 258/8500 [28:45<15:19:49,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 259/8500 [28:52<15:18:01,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 260/8500 [28:59<15:17:41,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 261/8500 [29:05<15:16:58,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 262/8500 [29:12<15:26:48,  6.75s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 263/8500 [29:19<15:21:29,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 264/8500 [29:26<15:21:18,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 265/8500 [29:32<15:22:42,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 266/8500 [29:39<15:20:35,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 267/8500 [29:46<15:14:58,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 268/8500 [29:52<15:13:30,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 269/8500 [29:59<15:13:39,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 270/8500 [30:06<15:13:01,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 271/8500 [30:12<15:11:44,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 272/8500 [30:19<15:10:43,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 273/8500 [30:26<15:16:39,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 274/8500 [30:32<15:18:04,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 275/8500 [30:39<15:17:02,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 276/8500 [30:46<15:14:20,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 277/8500 [30:52<15:14:56,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 278/8500 [30:59<15:15:35,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 279/8500 [31:06<15:12:32,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 280/8500 [31:12<15:12:06,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 281/8500 [31:19<15:11:48,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 282/8500 [31:26<15:13:06,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 283/8500 [31:32<15:10:34,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 284/8500 [31:39<15:12:57,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 285/8500 [31:46<15:25:36,  6.76s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 286/8500 [31:53<15:22:43,  6.74s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 287/8500 [31:59<15:18:22,  6.71s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['utin shelter island little league ; national anthem. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 288/8500 [32:06<15:19:14,  6.72s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['rashtra samithi ; majilis - e - itthadul muslimeen ; komatireddy rajagopal reddy ; qasim razvi | bharatiya janata party ; university of hyderabad political science department ; greater hyderabad municipal corporation ; congress party ; telugu desam party ; archaeological survey of india. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 289/8500 [32:13<15:15:58,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 290/8500 [32:20<15:26:36,  6.77s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 291/8500 [32:26<15:22:20,  6.74s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 292/8500 [32:33<15:17:04,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 293/8500 [32:40<15:13:33,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; world health organization ; world food programme ; sea grain initiative. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 294/8500 [32:46<15:12:33,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['omi soga ; james joseph dresnok | u s forces korea ; washington post ; jiji press ; u s army ; new york times ; korean central news agency ; united nations. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 295/8500 [32:53<15:14:19,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 296/8500 [33:00<15:11:27,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   3%|▎         | 297/8500 [33:06<15:10:04,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 298/8500 [33:13<15:09:44,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 299/8500 [33:20<15:10:08,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 300/8500 [33:26<15:12:01,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 301/8500 [33:33<15:09:35,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 302/8500 [33:40<15:10:23,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['limited ; union home ministry department of official languages ; bombay high court on. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 303/8500 [33:46<15:10:55,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 304/8500 [33:53<15:11:07,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 305/8500 [34:00<15:10:58,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['francisco ; twitter ; drug administration ; johnson johnson. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 306/8500 [34:06<15:13:57,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 307/8500 [34:13<15:09:40,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 308/8500 [34:20<15:12:46,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 309/8500 [34:26<15:08:36,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 310/8500 [34:33<15:09:40,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 311/8500 [34:40<15:23:29,  6.77s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 312/8500 [34:46<15:15:01,  6.71s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 313/8500 [34:53<15:13:17,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 314/8500 [35:00<15:12:13,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 315/8500 [35:06<15:09:49,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 316/8500 [35:13<15:09:04,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['bluff ; university of virginia ; police department. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 317/8500 [35:20<15:09:53,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▎         | 318/8500 [35:27<15:12:37,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 319/8500 [35:33<15:09:02,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 320/8500 [35:40<15:07:10,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 321/8500 [35:46<15:05:52,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 322/8500 [35:53<15:05:48,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 323/8500 [36:00<15:06:13,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 324/8500 [36:06<15:09:58,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 325/8500 [36:13<15:06:52,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 326/8500 [36:20<15:05:17,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 327/8500 [36:26<15:04:44,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 328/8500 [36:33<15:07:32,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 329/8500 [36:40<15:05:15,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 330/8500 [36:46<15:05:22,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['blind ; reuters ; energy commerce ; legislation in congress ; traffic safety administration ; consumer technology association. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 331/8500 [36:53<15:04:37,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 332/8500 [37:00<15:04:22,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 333/8500 [37:06<15:05:34,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 334/8500 [37:13<15:08:28,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 335/8500 [37:20<15:11:35,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 336/8500 [37:26<15:10:44,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 337/8500 [37:33<15:06:41,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 338/8500 [37:40<15:06:21,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 339/8500 [37:46<15:07:30,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 340/8500 [37:53<15:03:38,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 341/8500 [38:00<15:02:15,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 342/8500 [38:06<15:02:23,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 343/8500 [38:13<15:02:25,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 344/8500 [38:19<15:04:08,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 345/8500 [38:26<15:03:35,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 346/8500 [38:32<14:52:37,  6.57s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 347/8500 [38:39<14:57:42,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 348/8500 [38:46<15:01:50,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 349/8500 [38:53<15:06:09,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 350/8500 [38:59<15:05:20,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 351/8500 [39:06<15:04:33,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 352/8500 [39:13<15:03:34,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 353/8500 [39:19<15:00:49,  6.63s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['sam morsy ; burgess barbrook ; jacob slater ; ben woodburn ; luke woolfenden ; conor chaplin ; vaclav hladky ; greg leigh ; marcus harness ; kyle edwards ; freddie ladapo ; walton hladky ; ladapo hirst ; kayden jackson |. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 354/8500 [39:26<15:02:02,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['otunba kunle kalejaye ; orowo king ; otunba kalejaye | high court ; national electoral commission ; peoples movement ; action alliance ; peoples democratic party. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 355/8500 [39:33<15:02:16,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 356/8500 [39:39<15:04:00,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 357/8500 [39:46<15:03:43,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 358/8500 [39:53<15:03:32,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 359/8500 [39:59<15:01:01,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 360/8500 [40:06<15:04:54,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 361/8500 [40:13<15:07:34,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 362/8500 [40:19<15:07:02,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 363/8500 [40:26<15:08:53,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 364/8500 [40:33<15:08:41,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 365/8500 [40:39<15:08:27,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 366/8500 [40:46<15:06:58,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 367/8500 [40:53<15:03:07,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 368/8500 [40:59<15:03:13,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 369/8500 [41:06<15:00:56,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 370/8500 [41:13<15:00:31,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 371/8500 [41:19<14:59:29,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 372/8500 [41:26<15:02:10,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 373/8500 [41:33<15:05:54,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 374/8500 [41:39<15:04:45,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 375/8500 [41:46<15:04:48,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 376/8500 [41:53<15:05:12,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['extras inc ; directors guild ; postmedia network inc. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 377/8500 [41:59<15:03:09,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 378/8500 [42:06<15:03:37,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 379/8500 [42:13<14:58:03,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 380/8500 [42:19<14:58:12,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 381/8500 [42:26<14:57:52,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   4%|▍         | 382/8500 [42:32<14:57:47,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ilip saha ; uddhav thackeray | express ; brihanmumbai municipal corporation. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 383/8500 [42:39<14:56:54,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 384/8500 [42:46<14:57:57,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   5%|▍         | 385/8500 [42:52<14:53:11,  6.60s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 386/8500 [42:59<15:01:42,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 387/8500 [43:06<14:59:05,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 388/8500 [43:12<15:01:43,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 389/8500 [43:19<15:05:31,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 390/8500 [43:26<15:02:21,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 391/8500 [43:32<15:02:36,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 392/8500 [43:39<15:03:34,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 393/8500 [43:46<15:04:03,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; cambodian people party ; european court. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 394/8500 [43:53<15:06:28,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 395/8500 [43:59<15:04:02,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 396/8500 [44:06<15:11:24,  6.75s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 397/8500 [44:13<15:07:04,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 398/8500 [44:19<15:01:05,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 399/8500 [44:26<15:01:51,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 400/8500 [44:33<15:03:58,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 401/8500 [44:39<15:02:17,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['samantha mathis ; annasophia robb ; david krumholtz ; jaimie alexander ; america ferrera ; los angeles | york university ; netflix. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 402/8500 [44:46<15:03:24,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 403/8500 [44:53<14:59:59,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 404/8500 [45:00<15:00:54,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 405/8500 [45:06<15:01:51,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 406/8500 [45:13<15:07:07,  6.72s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['university ; mary r koch art center mark arts ; yale university art gallery. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 407/8500 [45:20<15:04:19,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 408/8500 [45:26<15:04:14,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 409/8500 [45:33<15:02:26,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 410/8500 [45:40<15:02:08,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 411/8500 [45:46<15:00:49,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 412/8500 [45:53<15:07:15,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   5%|▍         | 413/8500 [46:00<14:58:54,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['energy storage solutions ; d america inc ; michigan economic development corporation ; detroit regional partnership. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 414/8500 [46:06<14:58:36,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 415/8500 [46:13<14:57:06,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 416/8500 [46:20<14:59:38,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 417/8500 [46:26<15:00:14,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 418/8500 [46:33<15:01:47,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 419/8500 [46:40<14:58:18,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 420/8500 [46:46<14:57:19,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 421/8500 [46:53<14:54:53,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 422/8500 [47:00<14:55:16,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 423/8500 [47:06<14:53:20,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▍         | 424/8500 [47:13<14:54:12,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 425/8500 [47:20<14:58:57,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 426/8500 [47:27<15:01:39,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 427/8500 [47:33<15:03:49,  6.72s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['alamos ; kenneth branagh ; florence pugh ; david krumholtz ; josh hartnett ; dane dehaan ; harry s truman ; jack quaid ; james urbaniak | atomic energy commission ; oval office. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 428/8500 [47:40<15:02:53,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 429/8500 [47:47<14:58:35,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 430/8500 [47:53<14:54:51,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 431/8500 [48:00<14:53:59,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 432/8500 [48:08<15:38:18,  6.98s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 433/8500 [48:14<15:25:42,  6.89s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 434/8500 [48:21<15:16:43,  6.82s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 435/8500 [48:28<15:12:17,  6.79s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 436/8500 [48:34<15:06:55,  6.75s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 437/8500 [48:41<15:11:22,  6.78s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 438/8500 [48:48<15:04:04,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 439/8500 [48:54<14:59:10,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['united states ; national center. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 440/8500 [49:01<14:58:37,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 441/8500 [49:08<14:55:27,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 442/8500 [49:14<14:53:52,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 443/8500 [49:21<14:53:47,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 444/8500 [49:28<14:55:54,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 445/8500 [49:34<14:52:12,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 446/8500 [49:41<14:53:34,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 447/8500 [49:47<14:51:43,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 448/8500 [49:54<14:54:08,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 449/8500 [50:01<14:59:49,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 450/8500 [50:08<14:59:51,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 451/8500 [50:14<14:58:07,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 452/8500 [50:21<14:58:21,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 453/8500 [50:28<14:55:53,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 454/8500 [50:34<14:58:32,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 455/8500 [50:41<14:55:08,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 456/8500 [50:48<14:54:43,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 457/8500 [50:54<14:53:31,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 458/8500 [51:01<14:52:01,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 459/8500 [51:08<15:04:52,  6.75s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 460/8500 [51:15<15:00:18,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 461/8500 [51:21<14:57:16,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 462/8500 [51:28<14:58:26,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 463/8500 [51:35<15:01:01,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 464/8500 [51:42<15:00:35,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 465/8500 [51:48<15:00:24,  6.72s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['a mission ; mission impossible ; peter graves mission impossible ; mob there a mission ; youtube ; a mission. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 466/8500 [51:55<15:01:27,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   5%|▌         | 467/8500 [52:02<14:56:40,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 468/8500 [52:08<14:54:28,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 469/8500 [52:15<14:52:35,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 470/8500 [52:22<14:54:12,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 471/8500 [52:28<14:51:39,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 472/8500 [52:35<14:54:28,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 473/8500 [52:42<14:51:52,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 474/8500 [52:48<14:52:08,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 475/8500 [52:55<14:48:42,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 476/8500 [53:02<14:49:18,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 477/8500 [53:08<14:49:34,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['institute of technology caltech ; u s bureau of labor statistics ; microsoft ; cornell university ; university of michigan umich ; cisco systems ; university of california ; georgia institute of technology ; national academy of engineering fellows ; california institute of technology ; northwestern university ; ivy league ; carnegie mellon university ; georgia institute of technology georgia tech. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 478/8500 [53:15<14:50:10,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['- photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 479/8500 [53:22<14:49:49,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 480/8500 [53:28<14:49:06,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 481/8500 [53:35<14:51:04,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 482/8500 [53:42<14:51:05,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 483/8500 [53:48<14:47:31,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 484/8500 [53:55<14:48:21,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 485/8500 [54:01<14:48:28,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 486/8500 [54:08<14:46:15,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 487/8500 [54:15<14:43:03,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 488/8500 [54:21<14:45:02,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 489/8500 [54:28<14:42:49,  6.61s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['market. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 490/8500 [54:34<14:43:49,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 491/8500 [54:41<14:48:57,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['fall river okd the community preservation. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 492/8500 [54:48<14:48:47,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 493/8500 [54:54<14:35:47,  6.56s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 494/8500 [55:01<14:37:44,  6.58s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 495/8500 [55:07<14:39:43,  6.59s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 496/8500 [55:14<14:38:48,  6.59s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 497/8500 [55:21<14:41:15,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 498/8500 [55:27<14:39:27,  6.59s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 499/8500 [55:34<14:42:12,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 500/8500 [55:41<14:41:46,  6.61s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['zelensky | lesya ukrainka national academic drama theatre. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 501/8500 [55:47<14:45:08,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 502/8500 [55:54<14:47:08,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 503/8500 [56:01<14:47:36,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 504/8500 [56:07<14:46:11,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['realistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   6%|▌         | 505/8500 [56:14<14:40:21,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 506/8500 [56:20<14:44:59,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 507/8500 [56:27<14:45:52,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 508/8500 [56:34<14:44:53,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 509/8500 [56:40<14:45:27,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 510/8500 [56:47<14:43:08,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 511/8500 [56:54<14:45:17,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 512/8500 [57:00<14:46:29,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 513/8500 [57:07<14:44:35,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 514/8500 [57:14<14:41:04,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 515/8500 [57:20<14:44:26,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 516/8500 [57:27<14:44:55,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['skyi ; catherine nicholls ; yulia kesaieva ; volodymyr zelensky ; anna chernova ; allegra goodwin ; yevgeny viktorovich ; cyril ramaphosa ; dmitry peskov ; leonid kasinsky ; kathernina krebs ; paul p murphy ; paul murphy ; hande atay alam ; sergey lavrov | defense department ; cnn ; united states ; national advanced missile systems ; wagner group ; ukrainian agriculture ministry ; russian federation ; prosecutor office ; infrastructure development of ukraine ; kurdistan worker party ; russian foreign ministry ; department of defense ; united nations ; sean grain initiative ; russian defense ministry ; ministry for communities ; international criminal court ; sea initiative ; russian ministry of defense ; ukraine security assistance initiative. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 517/8500 [57:34<14:47:32,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 518/8500 [57:40<14:46:20,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 519/8500 [57:47<14:46:28,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 520/8500 [57:54<14:43:22,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 521/8500 [58:00<14:42:28,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 522/8500 [58:07<14:45:47,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 523/8500 [58:14<14:45:08,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 524/8500 [58:20<14:43:35,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 525/8500 [58:27<14:41:40,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 526/8500 [58:33<14:37:35,  6.60s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 527/8500 [58:40<14:37:31,  6.60s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 528/8500 [58:46<14:38:24,  6.61s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['second republic ; member of parliament ; office of the federal attorney ; human capital development ; hung parliament ; clerk of parliament ; public service architecture ; roman catholic mission ; pcs members of parliament ; members of parliament ; parliament of sierra leone ; edwards secondary school at kingtom ; public relations department ; economic community of west african states ; international law ; commonwealth observer group for the ugandan elections ; standing orders of parliament ; university of cambridge ; second administration of the president. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 529/8500 [58:53<14:40:32,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 530/8500 [59:00<14:43:07,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▌         | 531/8500 [59:07<14:44:27,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 532/8500 [59:13<14:44:27,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 533/8500 [59:20<14:44:47,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 534/8500 [59:27<14:44:28,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 535/8500 [59:33<14:43:11,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 536/8500 [59:40<14:44:33,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['evans ; alberto e rodriquez getty ; josh brolin ; heath ledger ; brie larson | google ; justice league. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 537/8500 [59:46<14:42:41,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 538/8500 [59:53<14:41:13,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 539/8500 [1:00:00<14:40:33,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['giriadis ; danae bezantakou ; panagiotis lymperis ; adonis georgiades ; sulaiman almazroui ; georgios alexandrakis ; ioannis plakiotakis ; george j tsunis ; ioannis drymousis ; costis frangoulis ; isabella schidrich ; dimitri vassilacos | united states ; navios maritime ; navios maritime holdings ; top ships inc ; deputy speaker of the hellenic parliament ; tsakos energy navigation ltd ; danaos corporation ; profit civil company ; diana shipping inc ; eurodry ltd ; united maritime corporation ; bureau veritas ; safe bulkers inc ; union of greek shipowners ; york stock exchange ; tsakos group of companies ; castor maritime inc ; u s embassy ; exchange commission ; hellenic chamber ; pyxis tankers inc ; seanergy maritime corp ; nasdaq ; capital link inc ; global ship lease inc ; star bulk carriers corp ; 

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 540/8500 [1:00:06<14:43:27,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 541/8500 [1:00:13<14:42:13,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 542/8500 [1:00:20<14:44:28,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 543/8500 [1:00:26<14:43:16,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 544/8500 [1:00:33<14:41:46,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 545/8500 [1:00:40<14:41:31,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 546/8500 [1:00:46<14:42:37,  6.66s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['mann ; etienne devillers | suffolk county police on ; rh consultants associates in manhattan. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 547/8500 [1:00:53<14:41:06,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 548/8500 [1:01:00<14:43:43,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 549/8500 [1:01:06<14:48:22,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 550/8500 [1:01:13<14:44:52,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 551/8500 [1:01:20<14:44:36,  6.68s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   6%|▋         | 552/8500 [1:01:26<14:44:31,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 553/8500 [1:01:33<14:42:30,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 554/8500 [1:01:40<14:57:33,  6.78s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 555/8500 [1:01:47<14:49:57,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
Generating full dataset:   7%|▋         | 556/8500 [1:01:53<14:40:06,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 557/8500 [1:02:00<14:39:45,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 558/8500 [1:02:06<14:39:59,  6.65s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 559/8500 [1:02:13<14:39:49,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 560/8500 [1:02:20<14:42:02,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 561/8500 [1:02:26<14:39:17,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 562/8500 [1:02:33<14:41:40,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 563/8500 [1:02:40<14:44:44,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 564/8500 [1:02:47<14:45:14,  6.69s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['of new hampshire ; associated press ; white house ; u s senate. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 565/8500 [1:02:53<14:42:20,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 566/8500 [1:03:00<14:48:45,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 567/8500 [1:03:07<14:45:57,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 568/8500 [1:03:13<14:42:47,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 569/8500 [1:03:20<14:41:09,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['lionel richie ; michael mcdonald | jude children research hospital. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 570/8500 [1:03:27<14:39:43,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 571/8500 [1:03:33<14:38:06,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 572/8500 [1:03:40<14:37:09,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 573/8500 [1:03:46<14:34:32,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 574/8500 [1:03:53<14:33:49,  6.61s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 575/8500 [1:04:00<14:35:36,  6.63s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 576/8500 [1:04:06<14:34:56,  6.62s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 577/8500 [1:04:13<14:36:18,  6.64s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 578/8500 [1:04:20<14:38:12,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 579/8500 [1:04:26<14:38:38,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 580/8500 [1:04:34<15:00:51,  6.82s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 581/8500 [1:04:40<14:56:13,  6.79s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 582/8500 [1:04:47<15:09:40,  6.89s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 583/8500 [1:04:54<14:59:16,  6.82s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 584/8500 [1:05:01<14:51:24,  6.76s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 585/8500 [1:05:07<14:53:25,  6.77s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 586/8500 [1:05:14<14:48:18,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 587/8500 [1:05:21<14:46:35,  6.72s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 588/8500 [1:05:27<14:41:59,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 589/8500 [1:05:34<14:40:47,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 590/8500 [1:05:41<14:38:54,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 591/8500 [1:05:47<14:37:47,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 592/8500 [1:05:54<14:35:44,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 593/8500 [1:06:01<14:35:39,  6.64s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 594/8500 [1:06:07<14:36:32,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 595/8500 [1:06:14<14:38:20,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 596/8500 [1:06:21<14:39:55,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 597/8500 [1:06:28<14:46:08,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 598/8500 [1:06:34<14:41:56,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 599/8500 [1:06:41<14:42:20,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 600/8500 [1:06:48<14:45:50,  6.73s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 601/8500 [1:06:54<14:43:01,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 602/8500 [1:07:01<14:42:36,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 603/8500 [1:07:08<14:41:04,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 604/8500 [1:07:14<14:37:37,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 605/8500 [1:07:21<14:37:52,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 606/8500 [1:07:28<14:35:15,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 607/8500 [1:07:34<14:36:05,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 608/8500 [1:07:41<14:43:38,  6.72s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['collaros |. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 609/8500 [1:07:48<14:56:49,  6.82s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 610/8500 [1:07:55<14:50:03,  6.77s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 611/8500 [1:08:01<14:45:39,  6.74s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; emma mcintyre getty ; matt damon ; ben kaufman ; fran drescher | writers guild ; disney ; united states ; instagram ; netflix ; new york daily news ; screen actors guild. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 612/8500 [1:08:08<14:41:18,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 613/8500 [1:08:15<14:40:20,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 614/8500 [1:08:21<14:36:04,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 615/8500 [1:08:28<14:52:55,  6.79s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 616/8500 [1:08:35<14:44:59,  6.74s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 617/8500 [1:08:42<14:39:53,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 618/8500 [1:08:48<14:35:54,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 619/8500 [1:08:55<14:36:02,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['assistance program ; food bank ; food bank inc ; gleaners food bank ; community harvest food bank. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 620/8500 [1:09:02<14:35:34,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 621/8500 [1:09:08<14:37:42,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 622/8500 [1:09:15<14:35:20,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 623/8500 [1:09:22<14:36:43,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 624/8500 [1:09:28<14:33:04,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 625/8500 [1:09:35<14:35:22,  6.67s/it]The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['cherokee point elementary school ; lincoln high school ; morse high school ; bell middle school ; san diego education association. illustration, non - photorealistic, news thumbnail.']


  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 626/8500 [1:09:42<14:34:23,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 627/8500 [1:09:48<14:34:43,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 628/8500 [1:09:55<14:32:24,  6.65s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 629/8500 [1:10:02<14:34:52,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 630/8500 [1:10:08<14:35:42,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 631/8500 [1:10:15<14:39:32,  6.71s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 632/8500 [1:10:22<14:37:32,  6.69s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 633/8500 [1:10:28<14:37:51,  6.70s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 634/8500 [1:10:35<14:36:00,  6.68s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 635/8500 [1:10:42<14:34:32,  6.67s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

Generating full dataset:   7%|▋         | 636/8500 [1:10:48<14:33:20,  6.66s/it]

  0%|          | 0/30 [00:00<?, ?it/s]

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch

# ===============================
# Retrieval Metrics Function
# ===============================
def evaluate_retrieval(df_results, df_queries, top_k=5, return_all=False):
    metrics = {"precision_at_k": [], "recall_at_k": [], "mrr": [], "ndcg": []}

    for qid in df_queries["article_id"]:
        q_results = df_results[df_results["query_article_id"] == qid]
        gt_image = df_queries[df_queries["article_id"] == qid]["image_id"].values[0]

        ranks = q_results[q_results["retrieved_image_id"] == gt_image]["rank"].values

        if len(ranks) > 0:
            rank = ranks[0]
            prec_at_k = np.mean(q_results.head(top_k)["retrieved_image_id"] == gt_image)
            rec_at_k = 1.0 if rank <= top_k else 0.0
            rr = 1.0 / rank
            ndcg = 1.0 / np.log2(rank + 1)
        else:
            prec_at_k, rec_at_k, rr, ndcg = 0, 0, 0, 0

        metrics["precision_at_k"].append(prec_at_k)
        metrics["recall_at_k"].append(rec_at_k)
        metrics["mrr"].append(rr)
        metrics["ndcg"].append(ndcg)

    summary = {m: np.mean(v) for m, v in metrics.items()}
    if return_all:
        return summary, metrics
    return summary


# ===============================
# Evaluate Retrieval
# ===============================
retrieval_metrics_subset, retrieval_all_subset = evaluate_retrieval(df_subset_results, df_subset, return_all=True)
retrieval_metrics_full, retrieval_all_full = evaluate_retrieval(df_full_results, df_full, return_all=True)

print("✅ Retrieval Metrics (Subset):", retrieval_metrics_subset)
print("✅ Retrieval Metrics (Full):", retrieval_metrics_full)


# ===============================
# Load CLIP Model for Generation Evaluation
# ===============================
device = "cuda" if torch.cuda.is_available() else "cpu"
print("⚡ Using device:", device)

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")


def compute_clip_score(df_generated, return_all=False):
    scores = []
    for _, row in tqdm(df_generated.iterrows(), total=len(df_generated), desc="CLIP scoring"):
        try:
            image = Image.open(row["image_path"]).convert("RGB")
            inputs = clip_processor(
                text=[row["article_title"]],
                images=image,
                return_tensors="pt",
                padding=True
            ).to(device)
            outputs = clip_model(**inputs)
            score = outputs.logits_per_image.item()
            scores.append(score)
        except Exception as e:
            print(f"⚠️ Error scoring {row['article_id']}: {e}")
            scores.append(0.0)

    avg_score = np.mean(scores)
    if return_all:
        return avg_score, scores
    return avg_score


# ===============================
# Compute CLIP Scores
# ===============================
clip_score_subset, clip_scores_subset = compute_clip_score(df_generated, return_all=True)
clip_score_full, clip_scores_full = compute_clip_score(df_generated_full, return_all=True)

print(f"✅ Avg CLIP Score (Subset): {clip_score_subset:.4f}")
print(f"✅ Avg CLIP Score (Full): {clip_score_full:.4f}")


# ===============================
# Visualization
# ===============================
# 1. Retrieval Metrics Comparison
metrics_df = pd.DataFrame([
    {"Dataset": "Subset", **retrieval_metrics_subset},
    {"Dataset": "Full", **retrieval_metrics_full}
])

plt.figure(figsize=(8,6))
metrics_df.set_index("Dataset").plot(kind="bar", rot=0)
plt.title("Retrieval Metrics Comparison")
plt.ylabel("Score")
plt.ylim(0, 1.1)
plt.legend(title="Metric")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

# 2. CLIP Score Comparison
plt.figure(figsize=(6,4))
plt.bar(["Subset", "Full"], [clip_score_subset, clip_score_full], color=["skyblue", "lightgreen"])
plt.title("Average CLIP Score for Generated Images")
plt.ylabel("CLIP Score")
plt.ylim(0, max(clip_score_subset, clip_score_full) + 1)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()


# ===============================
# 🔹 NEW: Variance & Distribution Analysis
# ===============================
# Retrieval Variance
for dataset_name, metrics_all in [("Subset", retrieval_all_subset), ("Full", retrieval_all_full)]:
    print(f"\n📊 Retrieval Variance ({dataset_name})")
    for m, values in metrics_all.items():
        print(f"{m}: mean={np.mean(values):.4f}, var={np.var(values):.4f}, std={np.std(values):.4f}")

    # Boxplot
    plt.figure(figsize=(7,5))
    plt.boxplot(metrics_all.values(), labels=metrics_all.keys())
    plt.title(f"Retrieval Metric Distributions - {dataset_name}")
    plt.ylabel("Score")
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.show()

# CLIP Score Distributions
for dataset_name, scores in [("Subset", clip_scores_subset), ("Full", clip_scores_full)]:
    print(f"\n📊 CLIP Score Stats ({dataset_name})")
    print(f"mean={np.mean(scores):.4f}, var={np.var(scores):.4f}, std={np.std(scores):.4f}, min={np.min(scores):.4f}, max={np.max(scores):.4f}")

    plt.figure(figsize=(7,5))
    plt.hist(scores, bins=30, color="skyblue", edgecolor="black", alpha=0.7)
    plt.title(f"CLIP Score Distribution - {dataset_name}")
    plt.xlabel("CLIP Score")
    plt.ylabel("Frequency")
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.show()
