# NEOS Foundation Model Explorer

Interactive exploration of similar objects.

This notebook provides:
- Interactive similarity search using learned 512D embeddings
- UMAP visualization of embedding space
- Quality filtering by SNR and magnitude
- Real/Fake object classification visualization


In [10]:
# Import required libraries
import numpy as np
import torch
import warnings
from pathlib import Path
import importlib
import sys

# Clear any cached modules to ensure we get the latest code
modules_to_reload = ['models.foundation_model', 'data.dataset', 'utils.similarity', 'utils.visualization']
for module in modules_to_reload:
    if module in sys.modules:
        importlib.reload(sys.modules[module])

# Import NEOS modules
from models.foundation_model import NEOSFoundationModel
from data.dataset import NEOSDataset, load_tabular_metadata, extract_labels
from utils.similarity import generate_embeddings, create_umap_visualization
from utils.visualization import NEOSExplorer

warnings.filterwarnings('ignore')

print("NEOS Foundation Model Explorer - Ready!")
print("Libraries imported successfully (with module reload).")


NEOS Foundation Model Explorer - Ready!
Libraries imported successfully (with module reload).


In [11]:
# Configuration
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_PATH = 'models/best_model.pt'
EMBEDDINGS_CACHE = 'model_embeddings.npz'

# Data paths
VISIT_PATHS = [
    "/neosp/tops/prod/s0012/t1/q3/l0/v00",
    "/neosp/tops/prod/s0012/t1/q3/l0/v01"
]

print(f"Device: {DEVICE}")
print(f"Visit paths: {len(VISIT_PATHS)} visits configured")
print(f"Model path: {MODEL_PATH}")


Device: cuda
Visit paths: 2 visits configured
Model path: models/best_model.pt


In [12]:
# Load or generate embeddings
embeddings_file = Path(EMBEDDINGS_CACHE)

if embeddings_file.exists():
    print("Loading pre-computed embeddings...")
    data = np.load(embeddings_file)
    embeddings = data['embeddings']
    labels = data['labels'] 
    source_ids = data['source_ids']
    embeddings_2d = data['embeddings_2d']
    
    print(f"Loaded {len(embeddings):,} pre-computed 512D embeddings!")
    
else:
    print("Generating embeddings from foundation model...")
    
    # Load the trained model
    checkpoint = torch.load(MODEL_PATH, map_location=DEVICE, weights_only=False)
    model = NEOSFoundationModel(tabular_dim=50, embed_dim=512)
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(DEVICE)
    model.eval()
    
    print(f"Model loaded (training loss: {checkpoint.get('loss', 'N/A'):.4f})")
    
    # Load dataset
    dataset = NEOSDataset(
        visit_paths=VISIT_PATHS,
        max_samples_per_visit=20000  # 40K total samples
    )
    
    print(f"Dataset loaded: {len(dataset)} samples")
    
    # Load tabular metadata for labels
    tabular_dict = load_tabular_metadata(VISIT_PATHS)
    
    # Generate embeddings
    embeddings, source_ids = generate_embeddings(model, dataset, DEVICE)
    
    # Extract labels
    labels = extract_labels(source_ids, tabular_dict)
    
    # Create UMAP visualization
    print("Creating UMAP 2D visualization...")
    embeddings_2d = create_umap_visualization(embeddings)
    
    # Save for next time
    np.savez(embeddings_file, 
             embeddings=embeddings, 
             source_ids=source_ids, 
             labels=labels,
             embeddings_2d=embeddings_2d)
    print("Saved embeddings for faster future loading!")

# Load tabular metadata
tabular_dict = load_tabular_metadata(VISIT_PATHS)

print(f"\nFoundation Model Explorer Ready!")
print(f"   Total samples: {len(embeddings):,}")
print(f"   Real objects: {(labels == 'Real').sum():,}")
print(f"   Fake objects: {(labels == 'Fake').sum():,}")
print(f"   Embedding dimension: {embeddings.shape[1]}D")
print(f"   UMAP 2D visualization: Ready") 


Loading pre-computed embeddings...
Loaded 40,000 pre-computed 512D embeddings!

Foundation Model Explorer Ready!
   Total samples: 40,000
   Real objects: 4,580
   Fake objects: 35,420
   Embedding dimension: 512D
   UMAP 2D visualization: Ready


In [13]:
# Create the interactive explorer
explorer = NEOSExplorer(
    embeddings=embeddings,
    source_ids=source_ids,
    labels=labels,
    embeddings_2d=embeddings_2d,
    tabular_dict=tabular_dict,
    visit_paths=VISIT_PATHS
)

# Display the main interface
explorer.display_interface()


VBox(children=(HTML(value='<h2>NEOS Foundation Model - Interactive Similarity Explorer</h2>'), HTML(value='<h3…

Output()