# t-SNE Visualization of Model Embeddings

This notebook loads a model, extracts embeddings, and creates a t-SNE visualization.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import torch
import seaborn as sns

# Set style for better visualizations
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Load the Model

In [None]:
# Load your model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Replace with your model loading code
# Example:
# from models import YourModel
# model = YourModel.from_pretrained('path/to/checkpoint')
# model = model.to(device)
# model.eval()

model_path = 'path/to/your/model.pth'  # Update this
# model = torch.load(model_path)
# model = model.to(device)
# model.eval()

## 2. Extract Embeddings

In [None]:
def extract_embeddings(model, dataloader, device):
    """
    Extract embeddings from the model for all data in the dataloader.
    
    Args:
        model: The neural network model
        dataloader: DataLoader containing the data
        device: Device to run inference on
    
    Returns:
        embeddings: numpy array of embeddings
        labels: numpy array of corresponding labels
    """
    embeddings = []
    labels = []
    
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(dataloader):
            data = data.to(device)
            
            # Get embeddings from your model
            # Adjust this based on your model architecture
            # Example: embedding = model.encode(data)  # or model.get_embeddings(data)
            embedding = model(data)  # Replace with actual embedding extraction
            
            embeddings.append(embedding.cpu().numpy())
            labels.append(target.cpu().numpy())
            
            if batch_idx % 10 == 0:
                print(f'Processed batch {batch_idx}/{len(dataloader)}')
    
    embeddings = np.vstack(embeddings)
    labels = np.concatenate(labels)
    
    return embeddings, labels

In [None]:
# Prepare your dataset and dataloader
# Example:
# from torch.utils.data import Dataset, DataLoader
# dataset = YourDataset()
# dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

# Extract embeddings
# embeddings, labels = extract_embeddings(model, dataloader, device)
# print(f'Extracted embeddings shape: {embeddings.shape}')
# print(f'Labels shape: {labels.shape}')

## 3. Create t-SNE Visualization

In [None]:
def create_tsne(embeddings, labels, n_components=2, perplexity=30, n_iter=1000, random_state=42):
    """
    Create t-SNE visualization of embeddings.
    
    Args:
        embeddings: numpy array of embeddings
        labels: numpy array of labels
        n_components: number of dimensions for t-SNE (2 or 3)
        perplexity: t-SNE perplexity parameter
        n_iter: number of iterations
        random_state: random seed for reproducibility
    
    Returns:
        tsne_results: t-SNE transformed embeddings
    """
    print(f'Running t-SNE with perplexity={perplexity}, n_iter={n_iter}...')
    
    tsne = TSNE(
        n_components=n_components,
        perplexity=perplexity,
        n_iter=n_iter,
        random_state=random_state,
        verbose=1
    )
    
    tsne_results = tsne.fit_transform(embeddings)
    print('t-SNE completed!')
    
    return tsne_results

In [None]:
# Run t-SNE
# tsne_results = create_tsne(embeddings, labels, perplexity=30, n_iter=1000)

## 4. Visualize Results

In [None]:
def plot_tsne(tsne_results, labels, title='t-SNE Visualization', figsize=(12, 8)):
    """
    Plot t-SNE results.
    
    Args:
        tsne_results: t-SNE transformed embeddings
        labels: corresponding labels
        title: plot title
        figsize: figure size
    """
    plt.figure(figsize=figsize)
    
    # Create scatter plot with different colors for each class
    unique_labels = np.unique(labels)
    colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_labels)))
    
    for label, color in zip(unique_labels, colors):
        mask = labels == label
        plt.scatter(
            tsne_results[mask, 0],
            tsne_results[mask, 1],
            c=[color],
            label=f'Class {label}',
            alpha=0.6,
            s=30
        )
    
    plt.xlabel('t-SNE Component 1', fontsize=12)
    plt.ylabel('t-SNE Component 2', fontsize=12)
    plt.title(title, fontsize=14, fontweight='bold')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)
    plt.tight_layout()
    plt.show()

In [None]:
# Plot the results
# plot_tsne(tsne_results, labels, title='t-SNE Visualization of Model Embeddings')

## 5. Optional: Save Results

In [None]:
# Save t-SNE results
# np.save('tsne_results.npy', tsne_results)
# np.save('labels.npy', labels)

# Save the plot
# plt.figure(figsize=(12, 8))
# plot_tsne(tsne_results, labels, title='t-SNE Visualization of Model Embeddings')
# plt.savefig('tsne_visualization.png', dpi=300, bbox_inches='tight')
# print('Saved visualization to tsne_visualization.png')

## 6. Optional: Interactive 3D Visualization

In [None]:
# For 3D visualization
# tsne_3d = create_tsne(embeddings, labels, n_components=3, perplexity=30)

# from mpl_toolkits.mplot3d import Axes3D

# fig = plt.figure(figsize=(12, 8))
# ax = fig.add_subplot(111, projection='3d')

# unique_labels = np.unique(labels)
# colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_labels)))

# for label, color in zip(unique_labels, colors):
#     mask = labels == label
#     ax.scatter(
#         tsne_3d[mask, 0],
#         tsne_3d[mask, 1],
#         tsne_3d[mask, 2],
#         c=[color],
#         label=f'Class {label}',
#         alpha=0.6,
#         s=30
#     )

# ax.set_xlabel('t-SNE Component 1')
# ax.set_ylabel('t-SNE Component 2')
# ax.set_zlabel('t-SNE Component 3')
# ax.set_title('3D t-SNE Visualization', fontweight='bold')
# plt.legend()
# plt.show()