In [2]:
import torch
import numpy as np
from PIL import Image
import os
import sys
from tqdm.notebook import tqdm # Or from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine as cosine_distance
import lpips # Still needed for visual distance

from transformers import (
    CLIPProcessor, CLIPModel,
    SiglipProcessor, SiglipModel,
    BlipProcessor, BlipModel, BlipVisionModel, # Add BLIP classes
    AutoProcessor, AutoModel # Generic loaders can sometimes work but explicit is safer
)
# --- End Import ---

import collections
import re
import yaml
import argparse

In [76]:
CLIP_MODEL_ID = "openai/clip-vit-base-patch16"
# Siglip_MODEL_ID = "google/siglip-base-patch16-512"
BLIP_MODEL_ID = "Salesforce/blip-image-captioning-base" 
ViT_MODEL_ID = "google/owlvit-base-patch32" # For ViT
LPIPS_NET_TYPE = "vgg"

In [81]:
def get_image_embedding(image_path, model, processor, model_type, device, VISION_MODEL_ID):
    """
    Loads image and extracts image embedding using the provided model and processor.
    Handles differences between CLIP, SigLIP, and BLIP.
    """
    try:
        image = Image.open(image_path).convert("RGB")
        # --- Preprocessing ---
        # Processors might handle inputs slightly differently.
        # BlipProcessor might require separate image/text handling if used for multimodal tasks,
        # but for image-only, passing `images=` should work.
        # Use pixel_values common key if possible
        inputs = processor(images=image, return_tensors="pt", padding=True).to(device)

        with torch.no_grad():
            if model_type == "CLIP" or model_type == "SigLIP":
                image_features = model.get_image_features(**inputs)
            elif model_type == "BLIP":
                image_features = model.get_image_features(**inputs)
            elif model_type == "google/vit-base-patch16-224" or model_type == "ViT":
                # Example of a specific model type that might need different handling
                # Assuming this is a ViT model, we can use the processor directly
                image_features = model(**inputs).last_hidden_state[:, 0, :] # CLS token
            else:
                raise ValueError(f"Unknown model_type '{model_type}' for feature extraction")

            # --- Normalization ---
            # Apply L2 normalization for consistency when using cosine distance
            if image_features is not None and isinstance(image_features, torch.Tensor):
                image_features = image_features / (image_features.norm(p=2, dim=-1, keepdim=True) + 1e-6) # Add epsilon for stability
            else:
                # Handle case where feature extraction failed
                print(f"Warning: image_features are None or not a Tensor for {image_path}. Skipping normalization.")
                return None # Return None if features couldn't be extracted

        return image_features.cpu().numpy().squeeze()

    except Exception as e:
        # Include model type in error for easier debugging
        print(f"Error processing {image_path} with {model_type} model ({VISION_MODEL_ID}): {e}")
        # Optionally raise e # Uncomment to stop execution on first error
        return None

In [78]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [79]:
clip_model = AutoModel.from_pretrained(CLIP_MODEL_ID).to(device)
clip_processor = AutoProcessor.from_pretrained(CLIP_MODEL_ID)

# siglip_model = AutoModel.from_pretrained(Siglip_MODEL_ID).to(device)
# siglip_processor = AutoProcessor.from_pretrained(Siglip_MODEL_ID)

blip_model = AutoModel.from_pretrained(BLIP_MODEL_ID).to(device)
blip_processor = AutoProcessor.from_pretrained(BLIP_MODEL_ID)

ViT_model = AutoModel.from_pretrained(ViT_MODEL_ID).to(device)
processor = AutoProcessor.from_pretrained(ViT_MODEL_ID)

`BlipModel` is going to be deprecated in future release, please use `BlipForConditionalGeneration`, `BlipForQuestionAnswering` or `BlipForImageTextRetrieval` depending on your usecase.
Some weights of BlipModel were not initialized from the model checkpoint at Salesforce/blip-image-captioning-base and are newly initialized: ['logit_scale', 'text_model.embeddings.LayerNorm.bias', 'text_model.embeddings.LayerNorm.weight', 'text_model.embeddings.position_embeddings.weight', 'text_model.embeddings.word_embeddings.weight', 'text_model.encoder.layer.0.attention.output.LayerNorm.bias', 'text_model.encoder.layer.0.attention.output.LayerNorm.weight', 'text_model.encoder.layer.0.attention.output.dense.bias', 'text_model.encoder.layer.0.attention.output.dense.weight', 'text_model.encoder.layer.0.attention.self.key.bias', 'text_model.encoder.layer.0.attention.self.key.weight', 'text_model.encoder.layer.0.attention.self.query.bias', 'text_model.encoder.layer.0.attention.self.query.weight', 'text_mo

config.json:   0%|          | 0.00/4.42k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/613M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/392 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/775 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

In [82]:
# test function
image_path = "test_images/3D_shape_sweep_only/size_0.10/config_S0.10__circle_circle_square.png"
clip_embedding = get_image_embedding(image_path, clip_model, clip_processor, "CLIP", device, CLIP_MODEL_ID)
# siglip_embedding = get_image_embedding(image_path, siglip_model, siglip_processor, "SigLIP", device, Siglip_MODEL_ID)
blip_embedding = get_image_embedding(image_path, blip_model, blip_processor, "BLIP", device, BLIP_MODEL_ID)
ViT_embedding = get_image_embedding(image_path, ViT_model, processor, "ViT", device, ViT_MODEL_ID)
print("CLIP embedding shape:", clip_embedding.shape)
# print("SigLIP embedding shape:", siglip_embedding.shape)
print("BLIP embedding shape:", blip_embedding.shape)
print("ViT embedding shape:", ViT_embedding.shape)


Error processing test_images/3D_shape_sweep_only/size_0.10/config_S0.10__circle_circle_square.png with ViT model (google/owlvit-base-patch32): 'NoneType' object has no attribute 'size'
CLIP embedding shape: (512,)
BLIP embedding shape: (512,)


AttributeError: 'NoneType' object has no attribute 'shape'

In [85]:
image = Image.open(image_path).convert("RGB")

inputs = processor(images=image, return_tensors="pt", padding=True).to(device)

with torch.no_grad():    
    image_features = ViT_model(**inputs)

AttributeError: 'NoneType' object has no attribute 'size'

In [40]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np

def get_cnn_embedding_vgg16(image_path, feature_extractor, pooling_layer, transform, device):
    """
    Calculates a 1D embedding vector for an image using a pre-trained VGG16
    feature extractor and Global Average Pooling.

    Args:
        image_path (str or Path): Path to the image file.
        feature_extractor (torch.nn.Module): The VGG16 features module.
        pooling_layer (torch.nn.Module): The Global Average Pooling layer.
        transform (torchvision.transforms.Compose): Preprocessing transforms.
        device (torch.device): CPU or CUDA device.

    Returns:
        np.ndarray: A 1D NumPy array representing the image embedding (shape (512,)),
                    or None if an error occurs.
    """
    try:
        img = Image.open(image_path).convert('RGB')
        # Apply transformations (resize, crop, normalize, etc.)
        img_t = transform(img)
        # Add batch dimension [C, H, W] -> [1, C, H, W]
        batch_t = torch.unsqueeze(img_t, 0).to(device)

        # --- Inference ---
        # Set model to evaluation mode and disable gradients
        feature_extractor.eval()
        pooling_layer.eval()
        with torch.no_grad():
            # 1. Extract features from convolutional layers
            # Output shape: [1, 512, H/32, W/32] (e.g., [1, 512, 7, 7] for 224x224 input)
            features = feature_extractor(batch_t)

            # 2. Apply Global Average Pooling
            # Output shape: [1, 512, 1, 1]
            pooled_features = pooling_layer(features)

        # 3. Flatten the pooled features to get the embedding vector
        # Reshape from [1, 512, 1, 1] to [1, 512] then flatten to (512,)
        # Using .squeeze() removes dimensions of size 1
        embedding = pooled_features.squeeze() # Shape: [512]

        # Detach from GPU, move to CPU, convert to NumPy
        embedding_np = embedding.detach().cpu().numpy()

        # Optional: L2 Normalize (consistent with previous steps, though maybe less critical
        # for some CNN embeddings depending on downstream use)
        # norm = np.linalg.norm(embedding_np)
        # if norm > 0:
        #     embedding_np = embedding_np / norm

        return embedding_np

    except Exception as e:
        print(f"Error getting VGG16 embedding for {image_path}: {e}")
        return None

# --- Setup and Example Usage ---

# Choose device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 1. Load pre-trained VGG16 model and weights
print("Loading VGG16 model...")
weights = models.VGG16_Weights.IMAGENET1K_V1 # Use recommended weights enum
vgg_model = models.vgg16(weights=weights).to(device)
vgg_model.eval() # Set to evaluation mode

# 2. Isolate the feature extractor part (convolutional layers)
# The output of this has 512 channels for VGG16
feature_extractor = vgg_model.features

# 3. Define the Global Average Pooling layer
# Output size (1, 1) means pool each channel down to a single value
pooling_layer = torch.nn.AdaptiveAvgPool2d((1, 1)).to(device)

# 4. Get the correct preprocessing transforms for these weights
preprocess_transform = weights.transforms()
print("VGG16 Preprocessing Transforms:")
print(preprocess_transform)

# --- Test with an example image path ---
# Replace with a path to one of your PNG images
# example_image_path = "test_images/3D_shape_sweep_only/size_0.10/your_image.png"
example_image_path = image_path # Using the uploaded image for demo

if Path(example_image_path).exists():
    print(f"\nGetting embedding for: {example_image_path}")
    vgg_embedding_vector = get_cnn_embedding_vgg16(
        example_image_path,
        feature_extractor,
        pooling_layer,
        preprocess_transform,
        device
    )

    if vgg_embedding_vector is not None:
        print(f"Successfully generated VGG16 Embedding.")
        print(f"Output Embedding shape: {vgg_embedding_vector.shape}") # Should be (512,)
        print(f"First 5 values: {vgg_embedding_vector[:5]}")

        # Now you can use 'vgg_embedding_vector' in your visualization script
        # You would add "VGG16" to your MODEL_INFO, MARKERS, etc.
        # Remember its dimension is 512, so you'll still need the
        # separate t-SNE approach if combining with 768d models.
else:
    print(f"Example image path not found: {example_image_path}")

Using device: cuda
Loading VGG16 model...
VGG16 Preprocessing Transforms:
ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

Getting embedding for: test_images/3D_shape_sweep_only/size_0.10/config_S0.10__circle_circle_square.png
Successfully generated VGG16 Embedding.
Output Embedding shape: (512,)
First 5 values: [0.08766108 0.00056495 0.01977451 0.00235377 0.        ]


In [None]:
# import os
# import numpy as np
# from pathlib import Path
# from sklearn.manifold import TSNE # Or PCA, UMAP
# import matplotlib.pyplot as plt
# import torch
# # Assuming PIL (Pillow) is used within get_image_embedding to load images

# # --- Configuration ---
# BASE_IMAGE_DIR = Path("test_images/3D_shape_sweep_only")
# OUTPUT_PLOT_DIR = Path("embedding_visualizations")
# OUTPUT_PLOT_DIR.mkdir(parents=True, exist_ok=True) # Create output dir if needed

# # --- Assumed Variables (Make sure these are loaded/defined) ---
# # Replace with your actual loaded models, processors, and IDs
# # CLIP_MODEL_ID = "openai/clip-vit-large-patch14"
# # Siglip_MODEL_ID = "google/siglip-so400m-patch14-384" # Example matching 1152 dim
# # BLIP_MODEL_ID = "Salesforce/blip-itm-large-coco" # Example matching 768 dim

# # clip_model = ...
# # clip_processor = ...
# # siglip_model = ...
# # siglip_processor = ...
# # blip_model = ...
# # blip_processor = ...
# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MODEL_INFO = {
#     "CLIP": {"model": clip_model, "processor": clip_processor, "id": CLIP_MODEL_ID},
#     "SigLIP": {"model": siglip_model, "processor": siglip_processor, "id": Siglip_MODEL_ID},
#     "BLIP": {"model": blip_model, "processor": blip_processor, "id": BLIP_MODEL_ID},
# }

# # Plotting config
# COLORS = {"CLIP": "red", "SigLIP": "blue", "BLIP": "green"}
# MARKERS = {"CLIP": "o", "SigLIP": "o", "BLIP": "o"} # Optional: different markers
# TSNE_PERPLEXITY = 15 # Adjust based on number of points per plot (usually 5-50)
# TSNE_N_ITER = 300  # Number of iterations for t-SNE

# # --- Helper Function (Assuming you have this) ---
# # Define or import your get_image_embedding function here.
# # It should return a numpy array or torch tensor.
# # Example structure:
# # def get_image_embedding(image_path, model, processor, model_name, device, model_id):
# #     # ... (load image with PIL)
# #     # ... (process image with processor)
# #     # ... (run model inference)
# #     # ... (extract embedding, e.g., model.get_image_features(**inputs))
# #     # Return embedding as numpy array or tensor
# #     # Example: return features.detach().cpu().numpy()
# #     pass # Replace with your actual function


# # --- Main Visualization Loop ---
# print(f"Starting visualization process. Plots will be saved to: {OUTPUT_PLOT_DIR}")

# # Iterate through each size_* directory
# for size_dir in sorted(BASE_IMAGE_DIR.glob("size_*")):
#     if not size_dir.is_dir():
#         continue

#     print(f"\nProcessing directory: {size_dir.name}")
#     image_paths = list(size_dir.glob("*.png"))

#     if not image_paths:
#         print(f"  No PNG images found in {size_dir.name}. Skipping.")
#         continue

#     print(f"  Found {len(image_paths)} PNG images.")

#     # Store embeddings separately for each model
#     model_embeddings_dict = {model_name: [] for model_name in MODEL_INFO}

#     # Generate embeddings for all images in this directory for all models
#     for i, img_path in enumerate(image_paths):
#         for model_name, info in MODEL_INFO.items():
#             try:
#                 embedding = get_image_embedding(
#                     img_path, info["model"], info["processor"], model_name, DEVICE, info["id"]
#                 )

#                 # Ensure embedding is a flat numpy array
#                 if isinstance(embedding, torch.Tensor):
#                     embedding = embedding.detach().cpu().numpy()
#                 embedding = embedding.flatten()

#                 # L2 Normalize embeddings
#                 norm = np.linalg.norm(embedding)
#                 if norm > 0:
#                     normalized_embedding = embedding / norm
#                 else:
#                     normalized_embedding = embedding # Avoid division by zero

#                 # Append to the correct model's list
#                 model_embeddings_dict[model_name].append(normalized_embedding)

#             except Exception as e:
#                 print(f"  Error processing {img_path.name} with {model_name}: {e}")


#     # --- Dimensionality Reduction (Per Model) ---
#     combined_2d_embeddings = []
#     combined_labels = []

#     for model_name, embeddings_list in model_embeddings_dict.items():
#         if not embeddings_list:
#             print(f"  No embeddings generated for {model_name} in {size_dir.name}. Skipping.")
#             continue

#         embeddings_array = np.array(embeddings_list)
#         n_samples = embeddings_array.shape[0]
#         print(f"  Generated {n_samples} embeddings for {model_name} (dim={embeddings_array.shape[1]}).")

#         # Check conditions for t-SNE
#         effective_perplexity = min(TSNE_PERPLEXITY, max(1, n_samples - 1))
#         if n_samples <= 1:
#             print(f"  Only {n_samples} {model_name} embedding(s). Cannot run t-SNE. Skipping.")
#             continue
#         if n_samples <= effective_perplexity:
#             print(f"  Adjusting perplexity for {model_name} from {TSNE_PERPLEXITY} to {max(1, n_samples - 1)}.")
#             effective_perplexity = max(1, n_samples - 1)

#         print(f"  Running t-SNE for {model_name} (perplexity={effective_perplexity}, n_iter={TSNE_N_ITER})...")
#         tsne = TSNE(
#             n_components=2,
#             random_state=42, # for reproducibility
#             perplexity=effective_perplexity,
#             n_iter=TSNE_N_ITER,
#             init='pca', # PCA initialization is often more stable
#             learning_rate='auto' # Recommended setting
#         )
#         try:
#             # Run t-SNE on this model's embeddings only
#             embeddings_2d = tsne.fit_transform(embeddings_array)

#             # Append the 2D results and corresponding labels to combined lists
#             combined_2d_embeddings.extend(embeddings_2d.tolist())
#             combined_labels.extend([model_name] * n_samples)

#         except Exception as e:
#             print(f"  Error during t-SNE for {model_name} in {size_dir.name}: {e}. Skipping.")


#     # --- Plotting (Combined 2D Embeddings) ---
#     if not combined_2d_embeddings:
#         print(f"  No 2D embeddings to plot for {size_dir.name}. Skipping plot.")
#         continue

#     combined_2d_embeddings_array = np.array(combined_2d_embeddings)
#     print(f"  Generating combined plot for {len(combined_labels)} points...")

#     plt.figure(figsize=(12, 10))
#     for model_name in MODEL_INFO.keys():
#         # Find indices corresponding to the current model in the combined lists
#         indices = [i for i, label in enumerate(combined_labels) if label == model_name]
#         if indices: # Only plot if there are points for this model
#             plt.scatter(
#                 combined_2d_embeddings_array[indices, 0],
#                 combined_2d_embeddings_array[indices, 1],
#                 c=COLORS[model_name],
#                 label=model_name,
#                 marker=MARKERS[model_name], # Use different markers
#                 alpha=0.7, # Adjust transparency
#                 s=50 # Adjust marker size
#             )

#     plt.title(f"t-SNE Visualization of Image Embeddings ({size_dir.name})", fontsize=16)
#     plt.xlabel("t-SNE Component 1", fontsize=12)
#     plt.ylabel("t-SNE Component 2", fontsize=12)
#     plt.legend(title="Model", fontsize=10)
#     plt.grid(True, linestyle='--', alpha=0.5)
#     plt.tight_layout() # Adjust layout

#     # Save the plot
#     plot_filename = OUTPUT_PLOT_DIR / f"{size_dir.name}_embedding_tsne.png"
#     plt.savefig(plot_filename, dpi=150) # Save with higher resolution
#     print(f"  Plot saved to {plot_filename}")
#     plt.close() # Close the figure to release memory

# print("\nVisualization process complete.")


In [None]:
import os
import numpy as np
from pathlib import Path
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.cm as cm # For color mapping for sizes
import torch
# Assuming PIL is used within get_image_embedding

# --- Configuration ---
BASE_IMAGE_DIR = Path("test_images/3D_shape_sweep_only")
OUTPUT_PLOT_DIR = Path("embedding_visualizations/Total_Combined")
OUTPUT_PLOT_DIR.mkdir(parents=True, exist_ok=True)

# Embedding dimensions (as provided by user)
MODEL_DIMS = {
    "CLIP": 768,
    "SigLIP": 768,
    "BLIP": 512,
    "ViT": 768, 
}

# --- Assumed Variables (Make sure these are loaded/defined) ---
# CLIP_MODEL_ID = ... Siglip_MODEL_ID = ... BLIP_MODEL_ID = ...
# clip_model = ... clip_processor = ...
# siglip_model = ... siglip_processor = ...
# blip_model = ... blip_processor = ...
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

MODEL_INFO = {
    "CLIP": {"model": clip_model, "processor": clip_processor, "id": CLIP_MODEL_ID, "dim": MODEL_DIMS["CLIP"]},
    "SigLIP": {"model": siglip_model, "processor": siglip_processor, "id": Siglip_MODEL_ID, "dim": MODEL_DIMS["SigLIP"]},
    #"BLIP": {"model": blip_model, "processor": blip_processor, "id": BLIP_MODEL_ID, "dim": MODEL_DIMS["BLIP"]},
    "ViT": {"model": ViT_model, "processor": processor, "id": ViT_MODEL_ID, "dim": MODEL_DIMS["ViT"]},
}

# Define markers for models
MARKERS = {"CLIP": "o", "SigLIP": "s",  "ViT": "D"} #"BLIP": "^",

# Output filename
OUTPUT_PLOT_FILE = OUTPUT_PLOT_DIR / "all_models_by_size_color_marker_tsne.png"

# t-SNE config
TSNE_PERPLEXITY = 30
TSNE_N_ITER = 350

# --- Helper Function (Assuming you have this) ---
# Define or import your get_image_embedding function here.
# def get_image_embedding(image_path, model, processor, model_name, device, model_id):
#     # ... (load image, process, inference) ...
#     # Return embedding as numpy array or tensor
#     pass # Replace with your actual function


# --- Data Collection (Grouped by Model, Across All Sizes) ---
print("Collecting embeddings for ALL models across all sizes...")
model_embeddings_dict = {model_name: [] for model_name in MODEL_INFO}
# Store corresponding size labels for each embedding within each model's list
model_size_labels_dict = {model_name: [] for model_name in MODEL_INFO}
unique_size_labels = sorted([d.name for d in BASE_IMAGE_DIR.glob("size_*") if d.is_dir()])
if not unique_size_labels:
    print("Error: No 'size_*' directories found in BASE_IMAGE_DIR.")
    exit()

for size_label in unique_size_labels:
    size_dir = BASE_IMAGE_DIR / size_label
    print(f"  Processing directory: {size_label}")
    image_paths = list(size_dir.glob("*.png"))

    if not image_paths:
        print(f"    No PNG images found. Skipping.")
        continue

    for img_path in image_paths:
        for model_name, info in MODEL_INFO.items():
            try:
                embedding = get_image_embedding(
                    img_path, info["model"], info["processor"], model_name, DEVICE, info["id"]
                )

                if isinstance(embedding, torch.Tensor):
                    embedding = embedding.detach().cpu().numpy()
                embedding = embedding.flatten()

                # Validate dimension
                expected_dim = info['dim']
                if embedding.shape[0] != expected_dim:
                    print(f"    Warning: Embedding dim mismatch for {model_name} ({img_path.name}). Expected {expected_dim}, got {embedding.shape[0]}. Skipping this embedding.")
                    continue

                model_embeddings_dict[model_name].append(embedding)
                model_size_labels_dict[model_name].append(size_label)

            except Exception as e:
                print(f"    Error processing {img_path.name} with {model_name}: {e}")







Collecting embeddings for ALL models across all sizes...
  Processing directory: size_0.10
  Processing directory: size_0.15
  Processing directory: size_0.20
  Processing directory: size_0.25
  Processing directory: size_0.30
  Processing directory: size_0.35
  Processing directory: size_0.40
  Processing directory: size_0.45
  Processing directory: size_0.50
  Processing directory: size_0.55
  Processing directory: size_0.60
  Processing directory: size_0.65
  Processing directory: size_0.70
  Processing directory: size_0.75
  Processing directory: size_0.80
  Processing directory: size_0.85
  Processing directory: size_0.90
  Processing directory: size_0.95
  Processing directory: size_1.00
  Processing directory: size_1.05
  Processing directory: size_1.10
  Processing directory: size_1.15
  Processing directory: size_1.20


In [None]:
# --- Dimensionality Reduction (Run t-SNE Separately for Each Model) ---
combined_2d_embeddings = []
combined_model_labels_for_marker = [] # For marker shape
combined_size_labels_for_color = []   # For color

print("\nRunning t-SNE separately for each model's collected embeddings...")
for model_name, embeddings_list in model_embeddings_dict.items():
    if not embeddings_list:
        print(f"  No embeddings collected for {model_name}. Skipping t-SNE.")
        continue

    embeddings_array = np.array(embeddings_list)
    n_samples, emb_dim = embeddings_array.shape
    print(f"  Processing {model_name}: {n_samples} samples, Dim={emb_dim}")

    # Check conditions for t-SNE
    effective_perplexity = min(TSNE_PERPLEXITY, max(1, n_samples - 1))
    if n_samples <= 1:
        print(f"    Only {n_samples} embedding(s). Cannot run t-SNE. Skipping.")
        continue
    if n_samples <= effective_perplexity:
        print(f"    Adjusting perplexity for {model_name} from {TSNE_PERPLEXITY} to {max(1, n_samples - 1)}.")
        effective_perplexity = max(1, n_samples - 1)

    print(f"    Running t-SNE for {model_name} (perplexity={effective_perplexity}, n_iter={TSNE_N_ITER})...")
    tsne = TSNE(
        n_components=2, random_state=42, perplexity=effective_perplexity,
        n_iter=TSNE_N_ITER, init='pca', learning_rate='auto', n_jobs=-1
    )
    try:
        embeddyings_2d = tsne.fit_transform(embeddings_array)
        combined_2d_embeddings.extend(embeddings_2d.tolist())
        combined_model_labels_for_marker.extend([model_name] * n_samples)
        # Get the corresponding size labels that were stored earlier
        combined_size_labels_for_color.extend(model_size_labels_dict[model_name])
        print(f"    t-SNE complete for {model_name}.")

    except Exception as e:
        print(f"    Error during t-SNE for {model_name}: {e}. Skipping.")


# --- Plotting (Single Plot, Marker=Model, Color=Size) ---
if not combined_2d_embeddings:
    print("\nNo 2D embeddings were generated from any model. Cannot create plot.")
    exit()

combined_2d_embeddings_array = np.array(combined_2d_embeddings)


Running t-SNE separately for each model's collected embeddings...
  Processing CLIP: 552 samples, Dim=768
    Running t-SNE for CLIP (perplexity=30, n_iter=350)...


[WinError 2] The system cannot find the file specified
  File "d:\anaconda\envs\ve\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "d:\anaconda\envs\ve\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\anaconda\envs\ve\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "d:\anaconda\envs\ve\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


    t-SNE complete for CLIP.
  Processing SigLIP: 552 samples, Dim=768
    Running t-SNE for SigLIP (perplexity=30, n_iter=350)...




    t-SNE complete for SigLIP.
  Processing BLIP: 552 samples, Dim=512
    Running t-SNE for BLIP (perplexity=30, n_iter=350)...




    t-SNE complete for BLIP.
  Processing ViT: 552 samples, Dim=768
    Running t-SNE for ViT (perplexity=30, n_iter=350)...




    t-SNE complete for ViT.


In [24]:
print(f"\nGenerating combined plot for {len(combined_model_labels_for_marker)} points...")

plt.figure(figsize=(18, 15)) # Large figure

# Create color map for SIZES
num_sizes = len(unique_size_labels)
# Use a visually distinct colormap like tab20 if num_sizes <= 20, otherwise viridis/plasma
color_map = cm.tab20(np.linspace(0, 1, num_sizes)) if num_sizes <= 20 else cm.viridis(np.linspace(0, 1, num_sizes))
size_to_color = {size_label: color_map[i] for i, size_label in enumerate(unique_size_labels)}

# Plot all points, assigning marker and color individually
# This is simpler than grouping if colors/markers are assigned per point anyway
point_colors = [size_to_color[sz_label] for sz_label in combined_size_labels_for_color]
point_markers = [MARKERS.get(m_label, 'x') for m_label in combined_model_labels_for_marker] # List of markers for each point

# Need to plot each marker type separately to build the legend correctly
legend_handles_models = []
unique_models_plotted = sorted(list(set(combined_model_labels_for_marker)))

for model_name in unique_models_plotted:
    model_indices = [i for i, m_label in enumerate(combined_model_labels_for_marker) if m_label == model_name]
    model_marker = MARKERS.get(model_name, 'x')

    # Extract coordinates and colors for points belonging to this model
    model_coords = combined_2d_embeddings_array[model_indices]
    model_point_colors = [point_colors[i] for i in model_indices]

    plt.scatter(
        model_coords[:, 0],
        model_coords[:, 1],
        c=model_point_colors, # Color by size
        marker=model_marker,  # Marker by model
        label=model_name,     # Label only used for legend handle creation below
        alpha=0.6,
        s=40
    )
    # Create a representative handle for the model legend (marker type)
    legend_handles_models.append(plt.Line2D([0], [0], marker=model_marker, color='grey', label=model_name, linestyle='', markersize=8))


plt.title(f"t-SNE Visualization (Marker=Model, Color=Size)", fontsize=18)
plt.xlabel("t-SNE Component 1", fontsize=14)
plt.ylabel("t-SNE Component 2", fontsize=14)

# Create legends - one for markers (models), one for colors (sizes)
# Legend 1: Models (Markers)
legend1 = plt.legend(handles=legend_handles_models, title="Model", loc='upper left', bbox_to_anchor=(1.04, 1), fontsize=10)
plt.gca().add_artist(legend1) # Add the first legend manually

# Legend 2: Sizes (Colors) - Create dummy scatter points for the legend
size_legend_handles = [plt.scatter([],[], color=size_to_color[sz_label], label=sz_label) for sz_label in unique_size_labels]
plt.legend(handles=size_legend_handles, title="Size", loc='center left', bbox_to_anchor=(1.04, 0.5), fontsize=9)


plt.grid(True, linestyle='--', alpha=0.4)
# Adjust layout AFTER legends are created
plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout for external legends

# Save the plot
plt.savefig(OUTPUT_PLOT_FILE, dpi=200, bbox_inches='tight') # Use bbox_inches='tight' to include external legend
print(f"\nPlot saved to {OUTPUT_PLOT_FILE}")
plt.close() # Close the figure to release memory

print("\nVisualization process complete.")


Generating combined plot for 2208 points...

Plot saved to embedding_visualizations\Total_Combined\all_models_by_size_color_marker_tsne.png

Visualization process complete.


In [39]:
# All embedding
import os
import numpy as np
from pathlib import Path
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import torch
# Assuming PIL is used within get_image_embedding

# --- Configuration ---
BASE_IMAGE_DIR = Path("test_images/3D_shape_sweep_only")
OUTPUT_PLOT_DIR = Path("embedding_visualizations")
OUTPUT_PLOT_DIR.mkdir(parents=True, exist_ok=True)

# Models to include (REMOVE BLIP)
MODELS_TO_INCLUDE = ["CLIP", "SigLIP"]
MODEL_DIMS = { "CLIP": 768, "SigLIP": 768, "ViT": 768 } # BLIP removed

# --- Assumed Variables (Make sure these are loaded/defined) ---
# CLIP_MODEL_ID = ... Siglip_MODEL_ID = ...
# clip_model = ... clip_processor = ...
# siglip_model = ... siglip_processor = ...
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Filter MODEL_INFO based on MODELS_TO_INCLUDE
MODEL_INFO_FILTERED = {
    "CLIP": {"model": clip_model, "processor": clip_processor, "id": CLIP_MODEL_ID, "dim": MODEL_DIMS["CLIP"]},
    "SigLIP": {"model": siglip_model, "processor": siglip_processor, "id": Siglip_MODEL_ID, "dim": MODEL_DIMS["SigLIP"]},
    "ViT": {"model": ViT_model, "processor": processor, "id": ViT_MODEL_ID, "dim": MODEL_DIMS["ViT"]},
    # BLIP entry is removed
}

# Define markers for models
MARKERS = {"CLIP": "o", "SigLIP": "s", "ViT": "D"} # BLIP removed

# Output filename
OUTPUT_PLOT_FILE = OUTPUT_PLOT_DIR / "CLIP_SigLIP_combined_tsne_marker_model_color_size.png"

# t-SNE config
TSNE_PERPLEXITY = 30
TSNE_N_ITER = 350

# --- Helper Function (Assuming you have this) ---
# Define or import your get_image_embedding function here.
# def get_image_embedding(image_path, model, processor, model_name, device, model_id):
#     # ... (load image, process, inference) ...
#     # Return embedding as numpy array or tensor
#     pass # Replace with your actual function


# --- Data Collection (CLIP & SigLIP only) ---
print(f"Collecting embeddings for {', '.join(MODELS_TO_INCLUDE)} across all sizes...")
all_embeddings_list = []
all_model_labels = [] # To determine marker shape
all_size_labels = []  # To determine color
unique_size_labels = sorted([d.name for d in BASE_IMAGE_DIR.glob("size_*") if d.is_dir()])
if not unique_size_labels:
    print("Error: No 'size_*' directories found.")
    exit()

for size_label in unique_size_labels:
    size_dir = BASE_IMAGE_DIR / size_label
    print(f"  Processing directory: {size_label}")
    image_paths = list(size_dir.glob("*.png"))

    if not image_paths:
        print(f"    No PNG images found. Skipping.")
        continue

    for img_path in image_paths:
        # Loop only through the models we want to include
        for model_name, info in MODEL_INFO_FILTERED.items():
            try:
                embedding = get_image_embedding(
                    img_path, info["model"], info["processor"], model_name, DEVICE, info["id"]
                )

                if isinstance(embedding, torch.Tensor):
                    embedding = embedding.detach().cpu().numpy()
                embedding = embedding.flatten()

                # Validate dimension
                expected_dim = info['dim']
                if embedding.shape[0] != expected_dim:
                    print(f"    Warning: Embedding dim mismatch for {model_name} ({img_path.name}). Expected {expected_dim}, got {embedding.shape[0]}. Skipping.")
                    continue

                all_embeddings_list.append(embedding)
                all_model_labels.append(model_name) # Store model label
                all_size_labels.append(size_label)   # Store size label

            except Exception as e:
                print(f"    Error processing {img_path.name} with {model_name}: {e}")


# --- Single Dimensionality Reduction (on Combined 768d Data) ---
if not all_embeddings_list:
    print("\nNo embeddings were collected. Cannot proceed.")
    exit()

embeddings_array = np.array(all_embeddings_list)
n_samples, emb_dim = embeddings_array.shape
# Should always be 768 now if validation worked
print(f"\nCollected {n_samples} total embeddings (dimension: {emb_dim}).")

print(f"Running t-SNE on the combined dataset (perplexity={TSNE_PERPLEXITY}, n_iter={TSNE_N_ITER})...")
# Adjust perplexity if number of samples is too small relative to default
effective_perplexity = min(TSNE_PERPLEXITY, max(1, n_samples - 1))
if n_samples <= effective_perplexity:
    print(f"  Warning: Number of samples ({n_samples}) is less than or equal to perplexity ({effective_perplexity}). Adjusting perplexity.")
    effective_perplexity = max(1, n_samples - 1)

tsne = TSNE(
    n_components=2, random_state=42, perplexity=effective_perplexity,
    n_iter=TSNE_N_ITER, init='pca', learning_rate='auto', n_jobs=-1
)
try:
    embeddings_2d = tsne.fit_transform(embeddings_array)
    print("  t-SNE calculation complete.")
except Exception as e:
    print(f"Error during t-SNE: {e}. Exiting.")
    exit()


# --- Plotting (Single Plot, Marker=Model, Color=Size) ---
print("Generating plot...")
plt.figure(figsize=(18, 15))

# Create color map for SIZES
num_sizes = len(unique_size_labels)
color_map = cm.tab20(np.linspace(0, 1, num_sizes)) if num_sizes <= 20 else cm.viridis(np.linspace(0, 1, num_sizes))
size_to_color = {size_label: color_map[i] for i, size_label in enumerate(unique_size_labels)}

# --- Plotting Logic ---
# We need to plot each marker type separately to build the legend correctly
legend_handles_models = []
unique_models_plotted = sorted(list(set(all_model_labels))) # Will be ['CLIP', 'SigLIP']

for model_name in unique_models_plotted:
    # Find indices for points belonging to this model
    model_indices = [i for i, m_label in enumerate(all_model_labels) if m_label == model_name]
    model_marker = MARKERS.get(model_name, 'x') # Get marker for this model

    # Extract coordinates and the corresponding size labels for these points
    model_coords = embeddings_2d[model_indices]
    model_point_size_labels = [all_size_labels[i] for i in model_indices]

    # Determine the color for each point based on its size label
    model_point_colors = [size_to_color[sz_label] for sz_label in model_point_size_labels]

    # Scatter plot for this model's points
    plt.scatter(
        model_coords[:, 0],
        model_coords[:, 1],
        c=model_point_colors, # Color determined by size
        marker=model_marker,  # Marker determined by model
        label=model_name,     # Label only used for handle creation below
        alpha=0.6,
        s=40                  # Adjust marker size if needed
    )
    # Create a representative handle for the model legend (marker type)
    legend_handles_models.append(plt.Line2D([0], [0], marker=model_marker, color='grey', label=model_name, linestyle='', markersize=8))

# --- Add Labels and Legends ---
plt.title(f"t-SNE on Combined CLIP & SigLIP Embeddings (Marker=Model, Color=Size)", fontsize=18)
plt.xlabel("t-SNE Component 1", fontsize=14)
plt.ylabel("t-SNE Component 2", fontsize=14)

# Legend 1: Models (Markers)
legend1 = plt.legend(handles=legend_handles_models, title="Model", loc='upper left', bbox_to_anchor=(1.04, 1), fontsize=10)
plt.gca().add_artist(legend1)

# Legend 2: Sizes (Colors)
size_legend_handles = [plt.scatter([],[], color=size_to_color[sz_label], label=sz_label) for sz_label in unique_size_labels]
plt.legend(handles=size_legend_handles, title="Size", loc='center left', bbox_to_anchor=(1.04, 0.5), fontsize=9)

plt.grid(True, linestyle='--', alpha=0.4)
plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout for external legends

# --- Save Plot ---
plt.savefig(OUTPUT_PLOT_FILE, dpi=200, bbox_inches='tight')
print(f"\nPlot saved to {OUTPUT_PLOT_FILE}")
plt.close()

print("\nVisualization process complete.")

Collecting embeddings for CLIP, SigLIP across all sizes...
  Processing directory: size_0.10
  Processing directory: size_0.15
  Processing directory: size_0.20
  Processing directory: size_0.25
  Processing directory: size_0.30
  Processing directory: size_0.35
  Processing directory: size_0.40
  Processing directory: size_0.45
  Processing directory: size_0.50
  Processing directory: size_0.55
  Processing directory: size_0.60
  Processing directory: size_0.65
  Processing directory: size_0.70
  Processing directory: size_0.75
  Processing directory: size_0.80
  Processing directory: size_0.85
  Processing directory: size_0.90
  Processing directory: size_0.95
  Processing directory: size_1.00
  Processing directory: size_1.05
  Processing directory: size_1.10
  Processing directory: size_1.15
  Processing directory: size_1.20

Collected 1656 total embeddings (dimension: 768).
Running t-SNE on the combined dataset (perplexity=30, n_iter=350)...




  t-SNE calculation complete.
Generating plot...

Plot saved to embedding_visualizations\CLIP_SigLIP_combined_tsne_marker_model_color_size.png

Visualization process complete.
