In [None]:
import torch
from PIL import Image
import numpy as np
from tqdm import tqdm
import pickle
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from datasets import load_dataset

# Setup

In [None]:
!pip install transformers datasets tqdm pandas colpali-engine multilingual-clip open-clip-torch

In [None]:
# Dataset configuration
DATASET_NAME = "Chula-AI/association_bias_benchmark"  # Dataset to use
EMBEDDING_SAVE_PATH = "embeddings"  # Path to save the embeddings

# Processing configuration
START_INDEX = 0
END_INDEX = None  # None means process all

# Embeddings

Expected schema:
- concept_id
- concept_in_native  
- image_id
- image_key (for backward compatibility)
- index_in_dataset
- concept
- concept_country
- country
- title
- image_embedding
- text_embeddings
  - concept_embedding
  - translated_concept_embedding

In [None]:
# Helper function to load dataset
def load_bias_benchmark_dataset(dataset_name):
    """
    Load Association Bias Benchmark dataset.
    
    Args:
        dataset_name: Name of the dataset on HuggingFace
    
    Returns:
        Dataset with image metadata
    """
    print(f"Loading dataset: {dataset_name}")
    dataset = load_dataset(dataset_name, name="image_metadata", split="train")
    print(f"Dataset loaded with {len(dataset)} entries")
    return dataset

def concatenate_tensors(tensor_list):
    """
    Concatenates a list of tensors along dimension 0.
    Used for Jina embeddings.
    """
    if len(tensor_list) == 1:
        return tensor_list[0].unsqueeze(0)
    buffed_list = []
    for tensor in tensor_list:
        buffed_list.append(tensor.unsqueeze(0))
    return torch.cat(buffed_list, dim=0)

print("Helper functions loaded.")

## CLIP Models

In [None]:
# CLIP imports
from transformers import AutoModel, AutoProcessor

In [None]:
# Create dropdown widget for CLIP model selection
clip_model_options = [
    ("OpenAI CLIP ViT-Large", "openai/clip-vit-large-patch14"),
    ("Chinese CLIP ViT-Large", "OFA-Sys/chinese-clip-vit-large-patch14")
]

clip_model_dropdown = widgets.Dropdown(
    options=clip_model_options,
    value="openai/clip-vit-large-patch14",
    description='CLIP Model:',
    style={'description_width': 'initial'}
)

def on_clip_model_change(change):
    global clip_model_name, clip_model_name_part
    clip_model_name = change['new']
    clip_model_name_part = clip_model_name.split("/")[-1]
    print(f"Selected CLIP model: {clip_model_name}")

clip_model_dropdown.observe(on_clip_model_change, names='value')

# Initialize variables
clip_model_name = clip_model_dropdown.value
clip_model_name_part = clip_model_name.split("/")[-1]

display(clip_model_dropdown)
print(f"Initial CLIP model: {clip_model_name}")

In [None]:
# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

clip_model = AutoModel.from_pretrained(clip_model_name).to(device)
clip_processor = AutoProcessor.from_pretrained(clip_model_name)
config = clip_model.config
MAX_LENGTH = config.text_config.max_position_embeddings

print(f"CLIP model loaded: {clip_model_name}")
print(f"Maximum text length: {MAX_LENGTH} tokens")

In [None]:
# Load dataset for CLIP processing
dataset = load_bias_benchmark_dataset(DATASET_NAME)

eval_size = min(np.inf, len(dataset)) if END_INDEX is None else min(END_INDEX, len(dataset))
eval_dataset = dataset.select(range(eval_size))

embeddings_list = []

# Setup save path
os.makedirs(EMBEDDING_SAVE_PATH, exist_ok=True)
save_path = os.path.join(EMBEDDING_SAVE_PATH, f"image_text_embeddings_{clip_model_name_part}_{START_INDEX}_{eval_size}.pkl")

print(f"Processing entries from index {START_INDEX} to {eval_size}...")
print(f"Total dataset size: {len(eval_dataset)}")

# Process entries
for i in tqdm(range(START_INDEX, min(eval_size, len(eval_dataset))), desc="Processing CLIP embeddings"):
    current_entry = eval_dataset[i]
    
    # Initialize entry with expected schema
    entry_embeddings = {
        'concept_id': current_entry.get('concept_id', None),
        'concept_in_native': current_entry.get('concept_in_native'),
        'image_id': current_entry.get('image_id', None),
        'image_key': current_entry.get('image_id', None),
        'index_in_dataset': i,
        'concept': current_entry.get('concept', None),
        'concept_country': current_entry.get('concept_country', None),
        'country': current_entry.get('country', None),
        'title': current_entry.get('title', None),
        'image_embedding': None,
        'text_embeddings': {}
    }
    
    try:
        with torch.no_grad():
            # 1. Get image embedding
            if current_entry.get('image') is not None:
                image_inputs = clip_processor(images=current_entry['image'], return_tensors="pt")
                image_inputs = {k: v.to(device) for k, v in image_inputs.items()}
                image_outputs = clip_model.get_image_features(**image_inputs)
                image_features = image_outputs / image_outputs.norm(dim=-1, keepdim=True)
                entry_embeddings['image_embedding'] = image_features.cpu().numpy()
            else:
                print(f"Warning: No image found for entry index {i}")
            
            # 2. Get text embeddings
            if current_entry.get('concept'):
                concept_text = str(current_entry['concept'])
                text_inputs = clip_processor(
                    text=concept_text,
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=MAX_LENGTH
                )
                text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
                text_outputs = clip_model.get_text_features(**text_inputs)
                text_features = text_outputs / text_outputs.norm(dim=-1, keepdim=True)
                entry_embeddings['text_embeddings']['concept_embedding'] = text_features.cpu().numpy()
            
            if current_entry.get('concept_in_native'):
                native_text = str(current_entry['concept_in_native'])
                text_inputs = clip_processor(
                    text=native_text,
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=MAX_LENGTH
                )
                text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
                text_outputs = clip_model.get_text_features(**text_inputs)
                text_features = text_outputs / text_outputs.norm(dim=-1, keepdim=True)
                entry_embeddings['text_embeddings']['translated_concept_embedding'] = text_features.cpu().numpy()
                
        embeddings_list.append(entry_embeddings)
        
    except Exception as e:
        print(f"Error processing entry {i}: {str(e)}")
        entry_embeddings['error'] = str(e)
        embeddings_list.append(entry_embeddings)
    
    # Clean up GPU memory periodically
    if i % 100 == 0:
        torch.cuda.empty_cache()

# Final cleanup
torch.cuda.empty_cache()

# Save embeddings
print(f"Saving embeddings to: {save_path}")
with open(save_path, 'wb') as f:
    pickle.dump(embeddings_list, f)

print(f"Processing complete. Embeddings saved to: {save_path}")
print(f"Total entries processed: {len(embeddings_list)}")

# Print summary
successful_image_embeddings = sum(1 for entry in embeddings_list if entry.get('image_embedding') is not None)
successful_text_embeddings = sum(1 for entry in embeddings_list if entry.get('text_embeddings') and len(entry['text_embeddings']) > 0)

print(f"Successful image embeddings: {successful_image_embeddings}/{len(embeddings_list)}")
print(f"Successful text embeddings: {successful_text_embeddings}/{len(embeddings_list)}")

## ColQwen Models

In [None]:
# ColQwen imports
from colpali_engine.models import ColQwen2_5, ColQwen2_5_Processor, ColQwen2, ColQwen2Processor

In [None]:
# Create dropdown widget for ColQwen model selection
colqwen_model_options = [
    ("ColQwen2.5-3b-multilingual", "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"),
    ("ColQwen2.5-7b-multilingual", "Metric-AI/ColQwen2.5-7b-multilingual-v1.0"),
    ("colqwen2.5-v0.2", "vidore/colqwen2.5-v0.2"),
    ("colqwen2-v1.0", "vidore/colqwen2-v1.0")
]

colqwen_model_dropdown = widgets.Dropdown(
    options=colqwen_model_options,
    value="Metric-AI/ColQwen2.5-3b-multilingual-v1.0",
    description='ColQwen Model:',
    style={'description_width': 'initial'}
)

def on_colqwen_model_change(change):
    global colqwen_model_name, colqwen_model_name_part
    colqwen_model_name = change['new']
    colqwen_model_name_part = colqwen_model_name.split("/")[-1]
    print(f"Selected ColQwen model: {colqwen_model_name}")

colqwen_model_dropdown.observe(on_colqwen_model_change, names='value')

# Initialize variables
colqwen_model_name = colqwen_model_dropdown.value
colqwen_model_name_part = colqwen_model_name.split("/")[-1]

display(colqwen_model_dropdown)
print(f"Initial ColQwen model: {colqwen_model_name}")

In [None]:
# Load ColQwen model
if "colqwen2.5" in colqwen_model_name.lower():
    colqwen_processor = ColQwen2_5_Processor.from_pretrained(colqwen_model_name)
    colqwen_model = ColQwen2_5.from_pretrained(
        colqwen_model_name,
        torch_dtype=torch.bfloat16,
        device_map="cuda:0"
    ).eval()
else:
    colqwen_processor = ColQwen2Processor.from_pretrained(colqwen_model_name)
    colqwen_model = ColQwen2.from_pretrained(
        colqwen_model_name,
        torch_dtype=torch.bfloat16,
        device_map="cuda:0"
    ).eval()

print(f"ColQwen model loaded: {colqwen_model_name}")

In [None]:
# Load dataset for ColQwen processing
dataset = load_bias_benchmark_dataset(DATASET_NAME)

eval_size = min(np.inf, len(dataset)) if END_INDEX is None else min(END_INDEX, len(dataset))
eval_dataset = dataset.select(range(eval_size))

embeddings_list = []

# Setup save path
save_path = os.path.join(EMBEDDING_SAVE_PATH, f"image_text_embeddings_{colqwen_model_name_part}_{START_INDEX}_{eval_size}.pkl")

print(f"Processing entries from index {START_INDEX} to {eval_size}...")

# Process entries
for i in tqdm(range(START_INDEX, min(eval_size, len(eval_dataset))), desc="Processing ColQwen embeddings"):
    current_entry = eval_dataset[i]
    
    # Initialize entry with expected schema
    entry_embeddings = {
        'concept_id': current_entry.get('concept_id', None),
        'concept_in_native': current_entry.get('concept_in_native'),
        'image_id': current_entry.get('image_id', None),
        'image_key': current_entry.get('image_id', None),
        'index_in_dataset': i,
        'concept': current_entry.get('concept', None),
        'concept_country': current_entry.get('concept_country', None),
        'country': current_entry.get('country', None),
        'title': current_entry.get('title', None),
        'image_embedding': None,
        'text_embeddings': {}
    }
    
    try:
        with torch.no_grad():
            # Get text embeddings (ColQwen typically focuses on text)
            if current_entry.get('concept'):
                batch_queries = colqwen_processor.process_queries([current_entry['concept']]).to(colqwen_model.device)
                concept_text_embeddings = colqwen_model(**batch_queries)
                entry_embeddings['text_embeddings']['concept_embedding'] = concept_text_embeddings.cpu().float().numpy()
            
            if current_entry.get('concept_in_native'):
                native_queries = colqwen_processor.process_queries([current_entry['concept_in_native']]).to(colqwen_model.device)
                native_embeddings = colqwen_model(**native_queries)
                entry_embeddings['text_embeddings']['translated_concept_embedding'] = native_embeddings.cpu().float().numpy()
                
        embeddings_list.append(entry_embeddings)
        
    except Exception as e:
        print(f"Error processing entry {i}: {str(e)}")
        entry_embeddings['error'] = str(e)
        embeddings_list.append(entry_embeddings)

# Final cleanup
torch.cuda.empty_cache()

# Save embeddings
print(f"Saving embeddings to: {save_path}")
with open(save_path, 'wb') as f:
    pickle.dump(embeddings_list, f)

print(f"Processing complete. Total entries processed: {len(embeddings_list)}")

## GME Models

In [None]:
# GME imports
from transformers import AutoModel

In [None]:
# Create dropdown widget for GME model selection
gme_model_options = [
    ("GME-Qwen2-VL-2B", "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct"),
    ("GME-Qwen2-VL-7B", "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct")
]

gme_model_dropdown = widgets.Dropdown(
    options=gme_model_options,
    value="Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
    description='GME Model:',
    style={'description_width': 'initial'}
)

def on_gme_model_change(change):
    global gme_model_name, gme_model_name_part
    gme_model_name = change['new']
    gme_model_name_part = gme_model_name.split("/")[-1]
    print(f"Selected GME model: {gme_model_name}")

gme_model_dropdown.observe(on_gme_model_change, names='value')

# Initialize variables
gme_model_name = gme_model_dropdown.value
gme_model_name_part = gme_model_name.split("/")[-1]

display(gme_model_dropdown)
print(f"Initial GME model: {gme_model_name}")

In [None]:
# Load GME model
gme = AutoModel.from_pretrained(
    gme_model_name,
    torch_dtype="float16",
    device_map='cuda',
    trust_remote_code=True
)

print(f"GME model loaded: {gme_model_name}")

In [None]:
# Load dataset for GME processing
dataset = load_bias_benchmark_dataset(DATASET_NAME)

eval_size = min(np.inf, len(dataset)) if END_INDEX is None else min(END_INDEX, len(dataset))
eval_dataset = dataset.select(range(eval_size))

embeddings_list = []

# Setup save path
save_path = os.path.join(EMBEDDING_SAVE_PATH, f"image_text_embeddings_{gme_model_name_part}_{START_INDEX}_{eval_size}.pkl")

print(f"Processing entries from index {START_INDEX} to {eval_size}...")

# Process entries
for i in tqdm(range(START_INDEX, min(eval_size, len(eval_dataset))), desc="Processing GME embeddings"):
    current_entry = eval_dataset[i]
    
    # Initialize entry with expected schema
    entry_embeddings = {
        'concept_id': current_entry.get('concept_id', None),
        'concept_in_native': current_entry.get('concept_in_native'),
        'image_id': current_entry.get('image_id', None),
        'image_key': current_entry.get('image_id', None),
        'index_in_dataset': i,
        'concept': current_entry.get('concept', None),
        'concept_country': current_entry.get('concept_country', None),
        'country': current_entry.get('country', None),
        'title': current_entry.get('title', None),
        'image_embedding': None,
        'text_embeddings': {}
    }
    
    try:
        with torch.no_grad():
            # Get text embeddings (GME focuses on text for this use case)
            if current_entry.get('concept'):
                concept_text_embeddings = gme.get_text_embeddings(texts=[current_entry['concept']])
                entry_embeddings['text_embeddings']['concept_embedding'] = concept_text_embeddings.cpu().numpy()
            
            if current_entry.get('concept_in_native'):
                native_embeddings = gme.get_text_embeddings(texts=[current_entry['concept_in_native']])
                entry_embeddings['text_embeddings']['translated_concept_embedding'] = native_embeddings.cpu().numpy()
                
        embeddings_list.append(entry_embeddings)
        
    except Exception as e:
        print(f"Error processing entry {i}: {str(e)}")
        entry_embeddings['error'] = str(e)
        embeddings_list.append(entry_embeddings)

# Final cleanup
torch.cuda.empty_cache()

# Save embeddings
print(f"Saving embeddings to: {save_path}")
with open(save_path, 'wb') as f:
    pickle.dump(embeddings_list, f)

print(f"Processing complete. Total entries processed: {len(embeddings_list)}")

## Jina Models

In [None]:
# Jina imports
from transformers import AutoModel

In [None]:
# Jina model configuration
jina_model_name = 'jinaai/jina-embeddings-v4'
jina_model_name_part = jina_model_name.split("/")[-1]

print(f"Using Jina model: {jina_model_name}")

In [None]:
# Load Jina model
device = "cuda" if torch.cuda.is_available() else "cpu"

jina_model = AutoModel.from_pretrained(
    jina_model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16
)
jina_model.to(device)

print(f"Jina model loaded: {jina_model_name}")

In [None]:
# Load dataset for Jina processing
dataset = load_bias_benchmark_dataset(DATASET_NAME)

eval_size = min(np.inf, len(dataset)) if END_INDEX is None else min(END_INDEX, len(dataset))
eval_dataset = dataset.select(range(eval_size))

embeddings_list = []

# Setup save path
save_path = os.path.join(EMBEDDING_SAVE_PATH, f"image_text_embeddings_{jina_model_name_part}_{START_INDEX}_{eval_size}.pkl")

print(f"Processing entries from index {START_INDEX} to {eval_size}...")

# Process entries
for i in tqdm(range(START_INDEX, min(eval_size, len(eval_dataset))), desc="Processing Jina embeddings"):
    current_entry = eval_dataset[i]
    
    # Initialize entry with expected schema
    entry_embeddings = {
        'concept_id': current_entry.get('concept_id', None),
        'concept_in_native': current_entry.get('concept_in_native'),
        'image_id': current_entry.get('image_id', None),
        'image_key': current_entry.get('image_id', None),
        'index_in_dataset': i,
        'concept': current_entry.get('concept', None),
        'concept_country': current_entry.get('concept_country', None),
        'country': current_entry.get('country', None),
        'title': current_entry.get('title', None),
        'image_embedding': None,
        'text_embeddings': {}
    }
    
    try:
        with torch.no_grad():
            # 1. Get image embedding
            if current_entry.get('image') is not None:
                image_embeddings = jina_model.encode_image(
                    images=current_entry['image'],
                    task="retrieval",
                )
                image_embeddings = concatenate_tensors(image_embeddings)
                entry_embeddings['image_embedding'] = image_embeddings.cpu().numpy()
            else:
                print(f"Warning: No image found for entry index {i}")
            
            # 2. Get text embeddings
            if current_entry.get('concept'):
                concept_embeddings = jina_model.encode_text(
                    texts=[current_entry['concept']],
                    task="retrieval",
                )
                concept_embeddings = concatenate_tensors(concept_embeddings)
                entry_embeddings['text_embeddings']['concept_embedding'] = concept_embeddings.cpu().numpy()
            
            if current_entry.get('concept_in_native'):
                native_embeddings = jina_model.encode_text(
                    texts=[current_entry['concept_in_native']],
                    task="retrieval",
                )
                native_embeddings = concatenate_tensors(native_embeddings)
                entry_embeddings['text_embeddings']['translated_concept_embedding'] = native_embeddings.cpu().numpy()
                
        embeddings_list.append(entry_embeddings)
        
    except Exception as e:
        print(f"Error processing entry {i}: {str(e)}")
        entry_embeddings['error'] = str(e)
        embeddings_list.append(entry_embeddings)

# Final cleanup
torch.cuda.empty_cache()

# Save embeddings
print(f"Saving embeddings to: {save_path}")
with open(save_path, 'wb') as f:
    pickle.dump(embeddings_list, f)

print(f"Processing complete. Total entries processed: {len(embeddings_list)}")

# Print summary
successful_image_embeddings = sum(1 for entry in embeddings_list if entry.get('image_embedding') is not None)
successful_text_embeddings = sum(1 for entry in embeddings_list if entry.get('text_embeddings') and len(entry['text_embeddings']) > 0)

print(f"Successful image embeddings: {successful_image_embeddings}/{len(embeddings_list)}")
print(f"Successful text embeddings: {successful_text_embeddings}/{len(embeddings_list)}")

## Multilingual CLIP Models

In [None]:
# Multilingual CLIP imports
from multilingual_clip import pt_multilingual_clip
import transformers
import open_clip

In [None]:
# Create dropdown widget for Multilingual CLIP model selection
mclip_model_options = [
    ("XLM-Roberta-Large-Vit-L-14", "M-CLIP/XLM-Roberta-Large-Vit-L-14"),
    ("XLM-Roberta-Large-Vit-B-16Plus", "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus")
]

mclip_model_dropdown = widgets.Dropdown(
    options=mclip_model_options,
    value="M-CLIP/XLM-Roberta-Large-Vit-L-14",
    description='M-CLIP Model:',
    style={'description_width': 'initial'}
)

def on_mclip_model_change(change):
    global mclip_model_name, mclip_model_name_part
    mclip_model_name = change['new']
    mclip_model_name_part = mclip_model_name.split("/")[-1]
    print(f"Selected M-CLIP model: {mclip_model_name}")

mclip_model_dropdown.observe(on_mclip_model_change, names='value')

# Initialize variables
mclip_model_name = mclip_model_dropdown.value
mclip_model_name_part = mclip_model_name.split("/")[-1]

display(mclip_model_dropdown)
print(f"Initial M-CLIP model: {mclip_model_name}")

In [None]:
# Load Multilingual CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load text model
mclip_model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(mclip_model_name)
mclip_tokenizer = transformers.AutoTokenizer.from_pretrained(mclip_model_name)

# Load vision model
vision_model, _, vision_preprocess = open_clip.create_model_and_transforms('ViT-B-16-plus-240', pretrained="laion400m_e32")
vision_model.to(device)

print(f"Multilingual CLIP model loaded: {mclip_model_name}")

In [None]:
# Load dataset for Multilingual CLIP processing
dataset = load_bias_benchmark_dataset(DATASET_NAME)

eval_size = min(np.inf, len(dataset)) if END_INDEX is None else min(END_INDEX, len(dataset))
eval_dataset = dataset.select(range(eval_size))

embeddings_list = []

# Setup save path
save_path = os.path.join(EMBEDDING_SAVE_PATH, f"image_text_embeddings_{mclip_model_name_part}_{START_INDEX}_{eval_size}.pkl")

print(f"Processing entries from index {START_INDEX} to {eval_size}...")

# Process entries
for i in tqdm(range(START_INDEX, min(eval_size, len(eval_dataset))), desc="Processing M-CLIP embeddings"):
    current_entry = eval_dataset[i]
    
    # Initialize entry with expected schema
    entry_embeddings = {
        'concept_id': current_entry.get('concept_id', None),
        'concept_in_native': current_entry.get('concept_in_native'),
        'image_id': current_entry.get('image_id', None),
        'image_key': current_entry.get('image_id', None),
        'index_in_dataset': i,
        'concept': current_entry.get('concept', None),
        'concept_country': current_entry.get('concept_country', None),
        'country': current_entry.get('country', None),
        'title': current_entry.get('title', None),
        'image_embedding': None,
        'text_embeddings': {}
    }
    
    try:
        with torch.no_grad():
            # 1. Get image embedding
            if current_entry.get('image') is not None:
                image = vision_preprocess(current_entry['image']).unsqueeze(0).to(device)
                image_embeddings = vision_model.encode_image(image)
                entry_embeddings['image_embedding'] = image_embeddings.cpu().numpy()
            else:
                print(f"Warning: No image found for entry index {i}")
            
            # 2. Get text embeddings
            if current_entry.get('concept'):
                concept_text_embeddings = mclip_model.forward([current_entry['concept']], mclip_tokenizer)
                entry_embeddings['text_embeddings']['concept_embedding'] = concept_text_embeddings.cpu().numpy()
            
            if current_entry.get('concept_in_native'):
                native_embeddings = mclip_model.forward([current_entry['concept_in_native']], mclip_tokenizer)
                entry_embeddings['text_embeddings']['translated_concept_embedding'] = native_embeddings.cpu().numpy()
                
        embeddings_list.append(entry_embeddings)
        
    except Exception as e:
        print(f"Error processing entry {i}: {str(e)}")
        entry_embeddings['error'] = str(e)
        embeddings_list.append(entry_embeddings)

# Final cleanup
torch.cuda.empty_cache()

# Save embeddings
print(f"Saving embeddings to: {save_path}")
with open(save_path, 'wb') as f:
    pickle.dump(embeddings_list, f)

print(f"Processing complete. Total entries processed: {len(embeddings_list)}")

# Print summary
successful_image_embeddings = sum(1 for entry in embeddings_list if entry.get('image_embedding') is not None)
successful_text_embeddings = sum(1 for entry in embeddings_list if entry.get('text_embeddings') and len(entry['text_embeddings']) > 0)

print(f"Successful image embeddings: {successful_image_embeddings}/{len(embeddings_list)}")
print(f"Successful text embeddings: {successful_text_embeddings}/{len(embeddings_list)}")