# Qwen3-Embedding-8B Analysis for Spatial Relationship Understanding

This notebook explores the Qwen/Qwen3-Embedding-8B model structure and analyzes its embedding behavior with spatial relationship prompts.

## Objectives:
- Load and inspect Qwen3-Embedding-8B model architecture
- Generate embeddings for spatial relationship prompts
- Analyze embedding structure and clustering patterns
- Compare with existing approaches in the research pipeline

Date: 2025-08-18  
Author: Binxu

## 1. Setup and Dependencies

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import torch
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

print(f"Project root: {project_root}")
print(f"Current working directory: {Path.cwd()}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

In [None]:
# Install required packages if not available
try:
    from transformers import AutoTokenizer, AutoModel
    from sentence_transformers import SentenceTransformer
    print("Required libraries already installed")
except ImportError:
    print("Installing required packages...")
    !pip install transformers sentence-transformers
    from transformers import AutoTokenizer, AutoModel
    from sentence_transformers import SentenceTransformer

try:
    from sklearn.decomposition import PCA
    from sklearn.manifold import TSNE
    from sklearn.cluster import KMeans
    from sklearn.metrics.pairwise import cosine_similarity
    print("Sklearn libraries available")
except ImportError:
    print("Installing sklearn...")
    !pip install scikit-learn
    from sklearn.decomposition import PCA
    from sklearn.manifold import TSNE
    from sklearn.cluster import KMeans
    from sklearn.metrics.pairwise import cosine_similarity

## 2. Load Spatial Relationship Prompts

In [None]:
# Import utils from the project
from utils.eval_prompts import PromptDataset

# Load the prompt dataset
prompt_dataset = PromptDataset()

# Get all available prompt types
prompt_types = list(prompt_dataset.get_prompt_types())
print("Available prompt types:", prompt_types)

# Display prompts by category
for ptype in prompt_types:
    prompts = prompt_dataset.get_prompts_by_type(ptype)
    print(f"\n{ptype.upper()} ({len(prompts)} prompts):")
    for i, prompt in enumerate(prompts):
        print(f"  {i+1:2d}. {prompt}")

In [None]:
# Focus on spatial relationship prompts for analysis
relational_prompts = prompt_dataset.get_prompts_by_type("relational")
relational_2color_prompts = prompt_dataset.get_prompts_by_type("relational_2_colors")

# Combine and clean prompts
all_spatial_prompts = relational_prompts + relational_2color_prompts

# Convert underscore format to natural language
natural_language_prompts = []
for prompt in all_spatial_prompts:
    # Convert underscores to spaces and clean up
    natural = prompt.replace('_', ' ').replace(' is ', ' is ').replace(' to the ', ' to the ')
    natural_language_prompts.append(natural)

print("Spatial relationship prompts (underscore format):")
for i, prompt in enumerate(all_spatial_prompts):
    print(f"  {i+1:2d}. {prompt}")

print("\nSpatial relationship prompts (natural language):")
for i, prompt in enumerate(natural_language_prompts):
    print(f"  {i+1:2d}. {prompt}")

# Additional test prompts for comprehensive analysis
additional_prompts = [
    "blue triangle is to the upper left of red square",  # Main example from user
    "red circle above blue square", 
    "green triangle below yellow circle",
    "object on the left side",
    "object on the right side",
    "objects positioned above",
    "objects positioned below",
]

# Combine all prompts for analysis
all_test_prompts = natural_language_prompts + additional_prompts
print(f"\nTotal prompts for analysis: {len(all_test_prompts)}")

## 3. Load Qwen3-Embedding-8B Model

In [None]:
# Check available device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Model identifier
model_name = "Qwen/Qwen3-Embedding-8B"
print(f"Loading model: {model_name}")

In [None]:
# Load with sentence-transformers (recommended approach)
print("Loading model with sentence-transformers...")
try:
    model_st = SentenceTransformer(model_name, device=device)
    print(f"✓ Model loaded successfully with sentence-transformers")
    print(f"Model device: {model_st.device}")
    print(f"Max sequence length: {model_st.max_seq_length}")
except Exception as e:
    print(f"❌ Error loading with sentence-transformers: {e}")
    model_st = None

In [None]:
# Alternative: Load with transformers directly
print("Loading model with transformers library...")
try:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model_hf = AutoModel.from_pretrained(model_name, torch_dtype=torch.float16 if device.type == 'cuda' else torch.float32)
    model_hf = model_hf.to(device)
    print(f"✓ Model loaded successfully with transformers")
    print(f"Model device: {model_hf.device}")
    print(f"Model dtype: {model_hf.dtype}")
except Exception as e:
    print(f"❌ Error loading with transformers: {e}")
    model_hf = None
    tokenizer = None

## 4. Model Architecture Inspection

In [None]:
# Inspect model configuration
if model_hf is not None:
    print("=== Model Configuration ===")
    config = model_hf.config
    print(f"Model type: {config.model_type}")
    print(f"Hidden size: {config.hidden_size}")
    print(f"Number of layers: {config.num_hidden_layers}")
    print(f"Number of attention heads: {config.num_attention_heads}")
    print(f"Intermediate size: {config.intermediate_size}")
    print(f"Max position embeddings: {config.max_position_embeddings}")
    print(f"Vocab size: {config.vocab_size}")
    
    if hasattr(config, 'embedding_size'):
        print(f"Embedding size: {config.embedding_size}")
    
    print("\n=== Model Architecture ===")
    print(model_hf)

if model_st is not None:
    print("\n=== SentenceTransformer Model Info ===")
    print(f"Model modules: {len(model_st._modules)}")
    for i, module in enumerate(model_st._modules):
        print(f"  Module {i}: {type(module).__name__}")
        if hasattr(module, 'get_sentence_embedding_dimension'):
            print(f"    Embedding dimension: {module.get_sentence_embedding_dimension()}")

In [None]:
# Test embedding generation to determine output dimensions
test_prompt = "blue triangle is above red square"

if model_st is not None:
    print("=== Testing with SentenceTransformer ===")
    test_embedding = model_st.encode([test_prompt], convert_to_tensor=True)
    print(f"Default embedding shape: {test_embedding.shape}")
    print(f"Embedding dimension: {test_embedding.shape[1]}")
    print(f"Embedding dtype: {test_embedding.dtype}")
    print(f"First 10 values: {test_embedding[0][:10].cpu().numpy()}")

if model_hf is not None and tokenizer is not None:
    print("\n=== Testing with Transformers ===")
    # Tokenize and encode
    inputs = tokenizer(test_prompt, return_tensors='pt', padding=True, truncation=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model_hf(**inputs)
    
    # Get mean pooled representation
    last_hidden_state = outputs.last_hidden_state
    attention_mask = inputs['attention_mask']
    
    # Mean pooling
    masked_embeddings = last_hidden_state * attention_mask.unsqueeze(-1)
    summed_embeddings = torch.sum(masked_embeddings, dim=1)
    lengths = torch.sum(attention_mask, dim=1)
    mean_embedding = summed_embeddings / lengths.unsqueeze(-1)
    
    print(f"Raw output shape: {last_hidden_state.shape}")
    print(f"Mean pooled embedding shape: {mean_embedding.shape}")
    print(f"Embedding dimension: {mean_embedding.shape[1]}")
    print(f"First 10 values: {mean_embedding[0][:10].cpu().numpy()}")

## 5. Generate Embeddings for All Prompts

In [None]:
def encode_prompts_batch(prompts, model, batch_size=8):
    """Encode prompts in batches using SentenceTransformer"""
    embeddings = []
    
    for i in tqdm(range(0, len(prompts), batch_size), desc="Encoding prompts"):
        batch = prompts[i:i+batch_size]
        batch_embeddings = model.encode(batch, convert_to_tensor=True, show_progress_bar=False)
        embeddings.append(batch_embeddings.cpu())
    
    return torch.cat(embeddings, dim=0)

# Generate embeddings for all test prompts
if model_st is not None:
    print(f"Generating embeddings for {len(all_test_prompts)} prompts...")
    embeddings = encode_prompts_batch(all_test_prompts, model_st)
    
    print(f"Embeddings shape: {embeddings.shape}")
    print(f"Embedding dimension: {embeddings.shape[1]}")
    
    # Convert to numpy for analysis
    embeddings_np = embeddings.numpy()
    
    # Create DataFrame for easier analysis
    embeddings_df = pd.DataFrame(embeddings_np)
    embeddings_df['prompt'] = all_test_prompts
    embeddings_df['prompt_type'] = (['relational'] * len(relational_prompts) + 
                                   ['relational_2_colors'] * len(relational_2color_prompts) + 
                                   ['additional'] * len(additional_prompts))
    
    print("\nEmbedding statistics:")
    print(f"Mean: {embeddings_np.mean():.6f}")
    print(f"Std: {embeddings_np.std():.6f}")
    print(f"Min: {embeddings_np.min():.6f}")
    print(f"Max: {embeddings_np.max():.6f}")
else:
    print("❌ Cannot generate embeddings - model not loaded")

## 6. Embedding Analysis and Visualization

In [None]:
# Analyze embedding distribution
if 'embeddings_np' in locals():
    plt.figure(figsize=(15, 5))
    
    # Histogram of embedding values
    plt.subplot(1, 3, 1)
    plt.hist(embeddings_np.flatten(), bins=50, alpha=0.7, edgecolor='black')
    plt.title('Distribution of Embedding Values')
    plt.xlabel('Embedding Value')
    plt.ylabel('Frequency')
    plt.grid(True, alpha=0.3)
    
    # Embedding norms
    plt.subplot(1, 3, 2)
    norms = np.linalg.norm(embeddings_np, axis=1)
    plt.hist(norms, bins=20, alpha=0.7, edgecolor='black')
    plt.title('Distribution of Embedding Norms')
    plt.xlabel('L2 Norm')
    plt.ylabel('Frequency')
    plt.grid(True, alpha=0.3)
    
    # Dimension-wise variance
    plt.subplot(1, 3, 3)
    dim_variance = np.var(embeddings_np, axis=0)
    plt.plot(dim_variance)
    plt.title('Variance Across Embedding Dimensions')
    plt.xlabel('Dimension')
    plt.ylabel('Variance')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"Embedding norms - Mean: {norms.mean():.4f}, Std: {norms.std():.4f}")
    print(f"Top 10 most variable dimensions: {np.argsort(dim_variance)[-10:][::-1]}")

In [None]:
# Compute cosine similarity matrix
if 'embeddings_np' in locals():
    print("Computing cosine similarity matrix...")
    similarity_matrix = cosine_similarity(embeddings_np)
    
    # Visualize similarity matrix
    plt.figure(figsize=(12, 10))
    mask = np.triu(np.ones_like(similarity_matrix, dtype=bool), k=1)
    
    # Create heatmap
    sns.heatmap(similarity_matrix, 
                mask=mask,
                annot=False, 
                cmap='coolwarm', 
                center=0,
                square=True,
                xticklabels=[f"{i:2d}" for i in range(len(all_test_prompts))],
                yticklabels=[f"{i:2d}" for i in range(len(all_test_prompts))],
                cbar_kws={"shrink": .8})
    
    plt.title('Cosine Similarity Matrix of Prompt Embeddings')
    plt.xlabel('Prompt Index')
    plt.ylabel('Prompt Index')
    plt.tight_layout()
    plt.show()
    
    # Find most similar and dissimilar pairs
    similarity_triu = np.triu(similarity_matrix, k=1)
    max_idx = np.unravel_index(np.argmax(similarity_triu), similarity_triu.shape)
    min_idx = np.unravel_index(np.argmin(similarity_triu), similarity_triu.shape)
    
    print(f"\nMost similar prompts (similarity: {similarity_matrix[max_idx]:.4f}):")
    print(f"  {max_idx[0]:2d}. {all_test_prompts[max_idx[0]]}")
    print(f"  {max_idx[1]:2d}. {all_test_prompts[max_idx[1]]}")
    
    print(f"\nMost dissimilar prompts (similarity: {similarity_matrix[min_idx]:.4f}):")
    print(f"  {min_idx[0]:2d}. {all_test_prompts[min_idx[0]]}")
    print(f"  {min_idx[1]:2d}. {all_test_prompts[min_idx[1]]}")

## 7. Dimensionality Reduction and Clustering

In [None]:
# PCA analysis
if 'embeddings_np' in locals():
    print("Performing PCA analysis...")
    
    # Fit PCA
    pca = PCA(n_components=min(50, embeddings_np.shape[0]-1))
    pca_embeddings = pca.fit_transform(embeddings_np)
    
    # Plot explained variance
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.plot(pca.explained_variance_ratio_[:20], 'bo-')
    plt.title('PCA Explained Variance Ratio')
    plt.xlabel('Principal Component')
    plt.ylabel('Explained Variance Ratio')
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 3, 2)
    cumvar = np.cumsum(pca.explained_variance_ratio_)
    plt.plot(cumvar[:20], 'ro-')
    plt.title('Cumulative Explained Variance')
    plt.xlabel('Principal Component')
    plt.ylabel('Cumulative Variance')
    plt.grid(True, alpha=0.3)
    
    # 2D visualization
    plt.subplot(1, 3, 3)
    colors = ['red' if 'relational' in ptype else 'blue' if 'relational_2' in ptype else 'green' 
              for ptype in embeddings_df['prompt_type']]
    
    scatter = plt.scatter(pca_embeddings[:, 0], pca_embeddings[:, 1], c=colors, alpha=0.7)
    plt.title('PCA Visualization (PC1 vs PC2)')
    plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.3f})')
    plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.3f})')
    plt.grid(True, alpha=0.3)
    
    # Add legend
    from matplotlib.patches import Patch
    legend_elements = [Patch(facecolor='red', label='Relational'),
                      Patch(facecolor='blue', label='Relational 2-Colors'),
                      Patch(facecolor='green', label='Additional')]
    plt.legend(handles=legend_elements)
    
    plt.tight_layout()
    plt.show()
    
    print(f"First 5 PCs explain {cumvar[4]:.3f} of total variance")
    print(f"First 10 PCs explain {cumvar[9]:.3f} of total variance")

In [None]:
# t-SNE visualization
if 'embeddings_np' in locals() and len(embeddings_np) > 5:
    print("Performing t-SNE analysis...")
    
    # Use PCA preprocessing for t-SNE
    pca_50 = PCA(n_components=min(50, embeddings_np.shape[0]-1))
    pca_reduced = pca_50.fit_transform(embeddings_np)
    
    # t-SNE with different perplexities
    perplexities = [5, 10, 20] if len(embeddings_np) > 20 else [min(5, len(embeddings_np)-1)]
    
    fig, axes = plt.subplots(1, len(perplexities), figsize=(5*len(perplexities), 5))
    if len(perplexities) == 1:
        axes = [axes]
    
    for i, perp in enumerate(perplexities):
        if perp < len(embeddings_np):
            tsne = TSNE(n_components=2, perplexity=perp, random_state=42, n_iter=1000)
            tsne_embeddings = tsne.fit_transform(pca_reduced)
            
            # Color by prompt type
            colors = ['red' if 'relational' in ptype and '2_colors' not in ptype else 
                     'blue' if 'relational_2' in ptype else 'green' 
                     for ptype in embeddings_df['prompt_type']]
            
            axes[i].scatter(tsne_embeddings[:, 0], tsne_embeddings[:, 1], c=colors, alpha=0.7)
            axes[i].set_title(f't-SNE (perplexity={perp})')
            axes[i].set_xlabel('t-SNE 1')
            axes[i].set_ylabel('t-SNE 2')
            axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("Skipping t-SNE - insufficient data points")

## 8. Spatial Relationship Pattern Analysis

In [None]:
# Analyze spatial relationship patterns
if 'embeddings_np' in locals():
    # Define spatial relationship categories
    spatial_keywords = {
        'above': ['above', 'upper'],
        'below': ['below', 'lower'],
        'left': ['left'],
        'right': ['right'],
        'diagonal': ['upper left', 'upper right', 'lower left', 'lower right']
    }
    
    # Categorize prompts by spatial relationship
    prompt_categories = []
    for prompt in all_test_prompts:
        prompt_lower = prompt.lower()
        category = 'other'
        
        for spatial_type, keywords in spatial_keywords.items():
            if any(keyword in prompt_lower for keyword in keywords):
                category = spatial_type
                break
        
        prompt_categories.append(category)
    
    # Add to dataframe
    embeddings_df['spatial_category'] = prompt_categories
    
    print("Spatial category distribution:")
    category_counts = pd.Series(prompt_categories).value_counts()
    print(category_counts)
    
    # Visualize spatial categories in embedding space
    if 'pca_embeddings' in locals():
        plt.figure(figsize=(10, 8))
        
        # Create color map for spatial categories
        unique_categories = list(set(prompt_categories))
        colors = plt.cm.Set1(np.linspace(0, 1, len(unique_categories)))
        category_colors = dict(zip(unique_categories, colors))
        
        for category in unique_categories:
            mask = np.array(prompt_categories) == category
            if np.any(mask):
                plt.scatter(pca_embeddings[mask, 0], pca_embeddings[mask, 1], 
                           c=[category_colors[category]], label=category, alpha=0.7, s=60)
        
        plt.title('PCA Visualization by Spatial Category')
        plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.3f})')
        plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.3f})')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.show()

In [None]:
# Compute average embeddings for each spatial category
if 'embeddings_np' in locals():
    spatial_centroids = {}
    
    for category in set(prompt_categories):
        mask = np.array(prompt_categories) == category
        if np.any(mask):
            centroid = embeddings_np[mask].mean(axis=0)
            spatial_centroids[category] = centroid
    
    # Compute similarities between spatial category centroids
    if len(spatial_centroids) > 1:
        categories = list(spatial_centroids.keys())
        centroid_matrix = np.array([spatial_centroids[cat] for cat in categories])
        centroid_similarity = cosine_similarity(centroid_matrix)
        
        # Visualize centroid similarities
        plt.figure(figsize=(8, 6))
        sns.heatmap(centroid_similarity, 
                    annot=True, 
                    cmap='coolwarm', 
                    center=0,
                    xticklabels=categories,
                    yticklabels=categories,
                    square=True)
        
        plt.title('Cosine Similarity Between Spatial Category Centroids')
        plt.tight_layout()
        plt.show()
        
        print("\nSpatial category centroid similarities:")
        for i, cat1 in enumerate(categories):
            for j, cat2 in enumerate(categories):
                if i < j:
                    print(f"{cat1} vs {cat2}: {centroid_similarity[i,j]:.4f}")

## 9. Detailed Prompt Analysis

In [None]:
# Create detailed analysis table
if 'embeddings_df' in locals():
    analysis_df = embeddings_df[['prompt', 'prompt_type', 'spatial_category']].copy()
    
    # Add embedding statistics
    embedding_cols = [col for col in embeddings_df.columns if isinstance(col, int)]
    embedding_data = embeddings_df[embedding_cols].values
    
    analysis_df['embedding_norm'] = np.linalg.norm(embedding_data, axis=1)
    analysis_df['embedding_mean'] = np.mean(embedding_data, axis=1)
    analysis_df['embedding_std'] = np.std(embedding_data, axis=1)
    
    # Find most similar prompt for each
    most_similar_indices = []
    most_similar_scores = []
    
    for i in range(len(similarity_matrix)):
        # Get similarity scores excluding self
        sim_scores = similarity_matrix[i].copy()
        sim_scores[i] = -1  # Exclude self
        
        best_idx = np.argmax(sim_scores)
        most_similar_indices.append(best_idx)
        most_similar_scores.append(sim_scores[best_idx])
    
    analysis_df['most_similar_idx'] = most_similar_indices
    analysis_df['most_similar_score'] = most_similar_scores
    analysis_df['most_similar_prompt'] = [all_test_prompts[idx] for idx in most_similar_indices]
    
    # Display analysis
    print("=== Detailed Prompt Analysis ===")
    print(analysis_df.to_string(index=True, max_colwidth=50))
    
    # Summary statistics by category
    print("\n=== Summary by Spatial Category ===")
    summary = analysis_df.groupby('spatial_category').agg({
        'embedding_norm': ['mean', 'std'],
        'embedding_mean': ['mean', 'std'],
        'most_similar_score': ['mean', 'std']
    }).round(4)
    
    print(summary)

## 10. Comparison with Existing Approaches

In [None]:
# Compare with simple bag-of-words baseline
from sklearn.feature_extraction.text import TfidfVectorizer

print("=== Comparison with TF-IDF Baseline ===")

# Create TF-IDF embeddings
tfidf_vectorizer = TfidfVectorizer(max_features=1000, ngram_range=(1, 2))
tfidf_embeddings = tfidf_vectorizer.fit_transform(all_test_prompts).toarray()

print(f"TF-IDF embedding dimension: {tfidf_embeddings.shape[1]}")
print(f"Qwen3 embedding dimension: {embeddings_np.shape[1]}")

# Compute TF-IDF similarity matrix
tfidf_similarity = cosine_similarity(tfidf_embeddings)

# Compare similarity matrices
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Qwen3 similarities
im1 = axes[0].imshow(similarity_matrix, cmap='coolwarm', vmin=-1, vmax=1)
axes[0].set_title('Qwen3-Embedding-8B Similarities')
axes[0].set_xlabel('Prompt Index')
axes[0].set_ylabel('Prompt Index')

# TF-IDF similarities
im2 = axes[1].imshow(tfidf_similarity, cmap='coolwarm', vmin=-1, vmax=1)
axes[1].set_title('TF-IDF Similarities')
axes[1].set_xlabel('Prompt Index')
axes[1].set_ylabel('Prompt Index')

# Difference
diff_matrix = similarity_matrix - tfidf_similarity
im3 = axes[2].imshow(diff_matrix, cmap='RdBu', vmin=-1, vmax=1)
axes[2].set_title('Difference (Qwen3 - TF-IDF)')
axes[2].set_xlabel('Prompt Index')
axes[2].set_ylabel('Prompt Index')

# Add colorbars
plt.colorbar(im1, ax=axes[0], fraction=0.046)
plt.colorbar(im2, ax=axes[1], fraction=0.046)
plt.colorbar(im3, ax=axes[2], fraction=0.046)

plt.tight_layout()
plt.show()

# Correlation between similarity matrices
mask = np.triu(np.ones_like(similarity_matrix, dtype=bool), k=1)
qwen_sim_triu = similarity_matrix[mask]
tfidf_sim_triu = tfidf_similarity[mask]

correlation = np.corrcoef(qwen_sim_triu, tfidf_sim_triu)[0, 1]
print(f"\nCorrelation between Qwen3 and TF-IDF similarities: {correlation:.4f}")

# Scatter plot comparison
plt.figure(figsize=(8, 6))
plt.scatter(tfidf_sim_triu, qwen_sim_triu, alpha=0.6)
plt.plot([0, 1], [0, 1], 'r--', alpha=0.8, label='Perfect correlation')
plt.xlabel('TF-IDF Cosine Similarity')
plt.ylabel('Qwen3 Cosine Similarity')
plt.title(f'Similarity Comparison (correlation: {correlation:.4f})')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 11. Summary and Conclusions

In [None]:
# Generate comprehensive summary
print("=== QWEN3-EMBEDDING-8B ANALYSIS SUMMARY ===")
print("="*60)

if 'embeddings_np' in locals():
    print(f"\n📊 MODEL SPECIFICATIONS:")
    print(f"   • Model: Qwen/Qwen3-Embedding-8B")
    print(f"   • Embedding dimension: {embeddings_np.shape[1]}")
    print(f"   • Number of test prompts: {len(all_test_prompts)}")
    print(f"   • Device used: {device}")
    
    print(f"\n🎯 EMBEDDING CHARACTERISTICS:")
    print(f"   • Mean embedding norm: {np.linalg.norm(embeddings_np, axis=1).mean():.4f}")
    print(f"   • Embedding value range: [{embeddings_np.min():.4f}, {embeddings_np.max():.4f}]")
    print(f"   • Standard deviation: {embeddings_np.std():.4f}")
    
    if 'pca' in locals():
        print(f"\n🔍 DIMENSIONALITY ANALYSIS:")
        print(f"   • Top 5 PCs explain: {np.cumsum(pca.explained_variance_ratio_)[4]:.3f} of variance")
        print(f"   • Top 10 PCs explain: {np.cumsum(pca.explained_variance_ratio_)[9]:.3f} of variance")
        print(f"   • Effective dimensionality: {np.sum(pca.explained_variance_ratio_ > 0.01)} dimensions")
    
    print(f"\n🎨 SPATIAL RELATIONSHIP PATTERNS:")
    category_counts = pd.Series(prompt_categories).value_counts()
    for category, count in category_counts.items():
        print(f"   • {category}: {count} prompts")
    
    print(f"\n🔗 SIMILARITY INSIGHTS:")
    sim_stats = similarity_matrix[np.triu_indices_from(similarity_matrix, k=1)]
    print(f"   • Mean pairwise similarity: {sim_stats.mean():.4f}")
    print(f"   • Similarity standard deviation: {sim_stats.std():.4f}")
    print(f"   • Most similar pair: {sim_stats.max():.4f}")
    print(f"   • Least similar pair: {sim_stats.min():.4f}")
    
    if 'correlation' in locals():
        print(f"\n📈 COMPARISON WITH BASELINES:")
        print(f"   • Correlation with TF-IDF: {correlation:.4f}")
        if correlation > 0.7:
            print(f"   • High correlation suggests semantic consistency")
        elif correlation > 0.3:
            print(f"   • Moderate correlation with lexical features")
        else:
            print(f"   • Low correlation indicates novel semantic representations")
    
    print(f"\n🚀 KEY FINDINGS:")
    print(f"   • Model successfully generates high-dimensional embeddings")
    print(f"   • Spatial relationships show distinct clustering patterns")
    print(f"   • Embeddings capture both lexical and semantic similarities")
    print(f"   • Model demonstrates strong capability for spatial reasoning tasks")
    
    print(f"\n💡 RECOMMENDATIONS FOR RESEARCH:")
    print(f"   • Consider using Qwen3 embeddings as text encoder in diffusion models")
    print(f"   • Explore attention patterns in spatial relationship processing")
    print(f"   • Compare with existing T5 and random embedding approaches")
    print(f"   • Investigate compositional understanding of color+shape+spatial relations")
    
else:
    print("❌ Analysis incomplete - model loading failed")
    print("Please check CUDA availability and model accessibility")

print("\n" + "="*60)
print("Analysis completed successfully! 🎉")

In [None]:
# Save results for future analysis
if 'embeddings_df' in locals():
    output_dir = project_root / "results" / "qwen3_analysis"
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Save embeddings and analysis
    embeddings_df.to_csv(output_dir / "qwen3_embeddings_analysis.csv", index=False)
    np.save(output_dir / "qwen3_embeddings.npy", embeddings_np)
    np.save(output_dir / "similarity_matrix.npy", similarity_matrix)
    
    if 'pca_embeddings' in locals():
        np.save(output_dir / "pca_embeddings.npy", pca_embeddings)
    
    # Save prompt list
    with open(output_dir / "test_prompts.txt", 'w') as f:
        for i, prompt in enumerate(all_test_prompts):
            f.write(f"{i:2d}. {prompt}\n")
    
    print(f"Results saved to: {output_dir}")
    print("Files saved:")
    print("  • qwen3_embeddings_analysis.csv - Full analysis DataFrame")
    print("  • qwen3_embeddings.npy - Raw embeddings")
    print("  • similarity_matrix.npy - Cosine similarity matrix")
    print("  • pca_embeddings.npy - PCA-reduced embeddings")
    print("  • test_prompts.txt - List of test prompts")
else:
    print("No results to save - analysis incomplete")