In [None]:
import os
# Suppress NVML warnings in Jupyter
os.environ['CUDA_MODULE_LOADING'] = 'LAZY'

%load_ext autoreload
%autoreload 1


In [None]:
import sys
from pathlib import Path
import warnings

# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning, message='.*NVML.*')
warnings.filterwarnings('ignore', category=UserWarning, message=".*Can't initialize NVML.*")

# Add project root and vggt paths
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))
sys.path.insert(0, str(project_root / "src" / "qwenvl" / "external"))

# Import torch first
import torch

# Import from spatial_mllm
from src.qwenvl.model.spatial_mllm import SpatialMLLMConfig, SpatialMLLMForConditionalGeneration

# Register for autoreload
%aimport src.qwenvl.model.spatial_mllm
%aimport src.qwenvl.model.spatial_encoder
%aimport src.qwenvl.model.connector

print(f"✓ Imports successful")
print(f"✓ CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"✓ GPU device: {torch.cuda.get_device_name(0)}")

In [None]:
# Initialize config and model
config = SpatialMLLMConfig()
model = SpatialMLLMForConditionalGeneration(config)

# Move to CUDA if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

# Create dummy inputs with correct shapes
batch_size = 1
seq_length = 10

input_ids = torch.randint(0, config.vocab_size, (batch_size, seq_length), device=device)
attention_mask = torch.ones(batch_size, seq_length, device=device, dtype=torch.long)
position_ids = torch.arange(seq_length, device=device).unsqueeze(0).expand(batch_size, -1)

# Forward pass
with torch.no_grad():
    outputs = model(
        input_ids=input_ids, 
        attention_mask=attention_mask, 
        position_ids=position_ids
    )

print(f"Output logits shape: {outputs.logits.shape}")
print(f"Config vocab_size: {config.vocab_size}")
print(f"Device: {device}")