# Feature Detector V5 - Inference Demo

This notebook demonstrates the V5 model's performance on real test samples.

**Model Performance:**
- Count Accuracy (Binary): 86.36%
- Count Accuracy (Avg %): 98.33%
- Type Accuracy: 87.10%
- Mean Absolute Error: 0.15 features

In [1]:
import torch
import torch.nn.functional as F
import numpy as np
from pathlib import Path
import pandas as pd
from torch_geometric.data import Data

from feature_detector_v5 import FeatureDetectorV5

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


## 1. Load Model and Dataset

In [2]:
# Load checkpoint
checkpoint_path = 'models/feature_detector_v5_100k/best_model.pt'
checkpoint = torch.load(checkpoint_path, weights_only=False)

# Load dataset
dataset_path = 'data/feature_detection_100k_v4/dataset.pt'
dataset_dict = torch.load(dataset_path, weights_only=False)

data_list = dataset_dict['data']
normalize_stats = {k: v.to(device) for k, v in dataset_dict['normalize_stats'].items()}
feature_types = dataset_dict['feature_types']
feature_type_to_idx = dataset_dict['feature_type_to_idx']
idx_to_feature_type = {v: k for k, v in feature_type_to_idx.items()}

print(f"Loaded {len(data_list)} samples")
print(f"Feature types: {feature_types}")

Loaded 96204 samples
Feature types: ['hole', 'pocket', 'step', 'weld', 'chamfer', 'fillet', 'unknown', 'additive', 'no_feature']


In [3]:
# Get test set indices (same split as training)
n = len(data_list)
train_size = int(0.7 * n)
val_size = int(0.15 * n)

np.random.seed(42)
indices = np.random.permutation(n)
test_indices = indices[train_size + val_size:]

print(f"Test set size: {len(test_indices)}")
print(f"First 10 test indices: {test_indices[:10]}")

Test set size: 14432
First 10 test indices: [51630 54583 59967 26485 34297  1286 48398 15059 84984 84247]


In [4]:
# Create model
node_feature_dim = normalize_stats['node_mean'].size(0)
edge_feature_dim = normalize_stats['edge_mean'].size(0)

model = FeatureDetectorV5(
    node_feature_dim=node_feature_dim,
    edge_feature_dim=edge_feature_dim,
    hidden_dim=384,
    num_gnn_layers=6,
    num_transformer_layers=4,
    num_feature_types=9,
    num_params=10,
    max_count=30,
    nhead=8
).to(device)

model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"Model loaded with {sum(p.numel() for p in model.parameters()):,} parameters")

Model loaded with 16,703,283 parameters


## 2. Select 5 Test Samples

We'll select samples with different feature counts for diversity.

In [5]:
# Get feature counts for test samples
test_counts = [(idx, len(data_list[idx]['features']) - 1) for idx in test_indices]

# Select 5 samples with different counts
selected_samples = []
target_counts = [3, 5, 7, 10, 15]  # Different feature counts for variety

for target in target_counts:
    # Find sample closest to target count
    candidates = [(idx, cnt) for idx, cnt in test_counts if abs(cnt - target) <= 2]
    if candidates:
        selected_samples.append(candidates[0][0])
    
# If we don't have 5, just take first 5 test samples
if len(selected_samples) < 5:
    selected_samples = test_indices[:5].tolist()

print(f"Selected test sample indices: {selected_samples}")
print(f"\nFeature counts:")
for idx in selected_samples:
    count = len(data_list[idx]['features']) - 1
    sample_id = data_list[idx]['sample_id']
    print(f"  Sample {sample_id}: {count} features")

Selected test sample indices: [np.int32(84247), np.int32(51630), np.int32(51630), np.int32(59967), np.int32(54583)]

Feature counts:
  Sample 00088413: 4 features
  Sample 00054192: 6 features
  Sample 00054192: 6 features
  Sample 00062926: 9 features
  Sample 00057294: 15 features


## 3. Inference Function

In [6]:
@torch.no_grad()
def run_inference(sample_idx):
    """Run inference on a single sample and return results."""
    data = data_list[sample_idx]
    sample_id = data['sample_id']
    
    # Normalize graph data
    x = (data['x'] - normalize_stats['node_mean'].cpu()) / normalize_stats['node_std'].cpu()
    x = torch.clamp(x, -10.0, 10.0)
    
    if data['edge_attr'].size(0) > 0:
        edge_attr = (data['edge_attr'] - normalize_stats['edge_mean'].cpu()) / normalize_stats['edge_std'].cpu()
        edge_attr = torch.clamp(edge_attr, -10.0, 10.0)
    else:
        edge_attr = data['edge_attr']
    
    graph = Data(
        x=x,
        edge_index=data['edge_index'],
        edge_attr=edge_attr,
        batch=torch.zeros(x.size(0), dtype=torch.long)  # Add batch attribute for single graph
    ).to(device)
    
    # Get ground truth features (exclude EOS token)
    gt_features = data['features'][:-1]
    gt_count = len(gt_features)
    
    # Prepare ground truth feature vectors for model input
    gt_feature_vecs = []
    for feat in gt_features:
        feature_vec = torch.zeros(19)
        feature_vec[feat['type']] = 1.0
        params = torch.from_numpy(feat['params']).float()
        params_norm = (params - normalize_stats['param_mean'].cpu()) / normalize_stats['param_std'].cpu()
        params_norm = torch.clamp(params_norm, -10.0, 10.0)
        feature_vec[9:] = params_norm
        gt_feature_vecs.append(feature_vec)
    
    if gt_feature_vecs:
        gt_feature_tensor = torch.stack(gt_feature_vecs).unsqueeze(0).to(device)
    else:
        gt_feature_tensor = torch.zeros(1, 1, 19).to(device)
    
    # Run inference with ground truth count for fair comparison
    gt_count_tensor = torch.tensor([gt_count], dtype=torch.long).to(device)
    count_logits, type_logits, param_preds = model(graph, gt_feature_tensor, gt_count_tensor)
    
    # Get predictions
    pred_count = torch.argmax(count_logits, dim=-1).item()
    pred_types = torch.argmax(type_logits, dim=-1)[0].cpu().numpy()  # [max_features]
    pred_params = param_preds[0].cpu().numpy()  # [max_features, 10]
    
    # Denormalize parameters
    param_mean = normalize_stats['param_mean'].cpu().numpy()
    param_std = normalize_stats['param_std'].cpu().numpy()
    pred_params_denorm = pred_params * param_std + param_mean
    
    return {
        'sample_id': sample_id,
        'gt_count': gt_count,
        'pred_count': pred_count,
        'gt_features': gt_features,
        'pred_types': pred_types[:gt_count],  # Only take predicted count
        'pred_params': pred_params_denorm[:gt_count],
        'count_correct': pred_count == gt_count
    }

print("Inference function ready")

Inference function ready


## 4. Run Inference on Selected Samples

In [7]:
# Run inference on all selected samples
results = []
for idx in selected_samples:
    result = run_inference(idx)
    results.append(result)
    print(f"‚úì Processed sample {result['sample_id']}")

print(f"\nCompleted inference on {len(results)} samples")

‚úì Processed sample 00088413
‚úì Processed sample 00054192
‚úì Processed sample 00054192
‚úì Processed sample 00062926
‚úì Processed sample 00057294

Completed inference on 5 samples


## 5. Display Results

In [8]:
PARAM_NAMES = ['diameter', 'depth', 'width', 'length', 'height', 'bbox_x', 'bbox_y', 'bbox_z', 'volume', 'confidence']

def display_sample_results(result, sample_num, show_params=True):
    """Display detailed results for a single sample."""
    print("=" * 80)
    print(f"SAMPLE {sample_num}: {result['sample_id']}")
    print("=" * 80)
    
    # Count prediction
    print(f"\nüìä COUNT PREDICTION:")
    print(f"  Ground Truth: {result['gt_count']} features")
    print(f"  Predicted:    {result['pred_count']} features")
    if result['count_correct']:
        print(f"  ‚úì CORRECT")
    else:
        diff = result['pred_count'] - result['gt_count']
        print(f"  ‚úó INCORRECT (off by {diff:+d})")
    
    # Feature-by-feature comparison
    print(f"\nüîç FEATURE-BY-FEATURE COMPARISON:")
    print(f"  {'#':<3} {'Ground Truth':<25} {'Predicted':<25} {'Status':<10}")
    print(f"  {'-'*3} {'-'*25} {'-'*25} {'-'*10}")
    
    num_features = min(result['gt_count'], len(result['pred_types']))
    type_correct = 0
    
    for i in range(num_features):
        gt_feat = result['gt_features'][i]
        gt_type_idx = gt_feat['type']
        gt_type_name = idx_to_feature_type.get(gt_type_idx, f"Unknown({gt_type_idx})")
        
        pred_type_idx = result['pred_types'][i]
        pred_type_name = idx_to_feature_type.get(pred_type_idx, f"Unknown({pred_type_idx})")
        
        correct = gt_type_idx == pred_type_idx
        if correct:
            type_correct += 1
        
        status = "‚úì" if correct else "‚úó"
        print(f"  {i+1:<3} {gt_type_name:<25} {pred_type_name:<25} {status:<10}")
    
    # Type accuracy for this sample
    type_acc = (type_correct / num_features * 100) if num_features > 0 else 0
    print(f"\n  Type Accuracy: {type_correct}/{num_features} = {type_acc:.1f}%")
    
    # Show parameter predictions if requested
    if show_params and num_features > 0:
        print(f"\nüìê PARAMETER PREDICTIONS (showing key dimensions):")
        print(f"  Feature | Parameter | Ground Truth | Predicted | Error")
        print(f"  {'-'*7} | {'-'*9} | {'-'*12} | {'-'*9} | {'-'*10}")
        
        for i in range(min(num_features, 3)):  # Show first 3 features to avoid clutter
            gt_feat = result['gt_features'][i]
            gt_params = gt_feat['params']
            pred_params = result['pred_params'][i]
            gt_type_name = idx_to_feature_type.get(gt_feat['type'], "unknown")
            
            # Show relevant parameters based on feature type
            if gt_type_name == 'hole':
                relevant_params = [0, 1]  # diameter, depth
            elif gt_type_name in ['pocket', 'step']:
                relevant_params = [1, 2, 3]  # depth, width, length
            else:
                relevant_params = [0, 1, 2]  # diameter, depth, width
            
            for param_idx in relevant_params:
                gt_val = gt_params[param_idx]
                pred_val = pred_params[param_idx]
                error = abs(pred_val - gt_val)
                pct_error = (error / gt_val * 100) if gt_val > 0.1 else 0
                
                print(f"  {i+1} {gt_type_name[:5]:<5} | {PARAM_NAMES[param_idx]:<9} | {gt_val:>10.2f}mm | {pred_val:>8.2f}mm | {error:>6.2f}mm ({pct_error:>4.1f}%)")
    
    print()

# Display all results
for i, result in enumerate(results, 1):
    display_sample_results(result, i)

SAMPLE 1: 00088413

üìä COUNT PREDICTION:
  Ground Truth: 4 features
  Predicted:    4 features
  ‚úì CORRECT

üîç FEATURE-BY-FEATURE COMPARISON:
  #   Ground Truth              Predicted                 Status    
  --- ------------------------- ------------------------- ----------
  1   step                      step                      ‚úì         
  2   step                      step                      ‚úì         
  3   unknown                   unknown                   ‚úì         
  4   unknown                   unknown                   ‚úì         

  Type Accuracy: 4/4 = 100.0%

üìê PARAMETER PREDICTIONS (showing key dimensions):
  Feature | Parameter | Ground Truth | Predicted | Error
  ------- | --------- | ------------ | --------- | ----------
  1 step  | depth     |       0.00mm |    -0.29mm |   0.29mm ( 0.0%)
  1 step  | width     |       0.00mm |    -0.12mm |   0.12mm ( 0.0%)
  1 step  | length    |       0.00mm |     0.97mm |   0.97mm ( 0.0%)
  2 step  | depth  

## 6. Summary Statistics

In [9]:
# Calculate summary metrics
count_correct = sum(1 for r in results if r['count_correct'])
count_errors = [r['pred_count'] - r['gt_count'] for r in results]
mae = np.mean(np.abs(count_errors))

# Type accuracy
total_features = 0
correct_types = 0

for result in results:
    num_features = min(result['gt_count'], len(result['pred_types']))
    total_features += num_features
    
    for i in range(num_features):
        gt_type = result['gt_features'][i]['type']
        pred_type = result['pred_types'][i]
        if gt_type == pred_type:
            correct_types += 1

type_accuracy = (correct_types / total_features * 100) if total_features > 0 else 0

print("=" * 80)
print("SUMMARY STATISTICS (5 SAMPLES)")
print("=" * 80)
print(f"\nCount Predictions:")
print(f"  Exact matches: {count_correct}/5 ({count_correct/5*100:.1f}%)")
print(f"  Mean Absolute Error: {mae:.2f} features")
print(f"  Errors: {count_errors}")

print(f"\nType Classification:")
print(f"  Correct: {correct_types}/{total_features} ({type_accuracy:.1f}%)")

print(f"\nExpected Performance (full test set):")
print(f"  Count Accuracy: 86.36%")
print(f"  Type Accuracy: 87.10%")
print(f"  MAE: 0.15 features")

SUMMARY STATISTICS (5 SAMPLES)

Count Predictions:
  Exact matches: 5/5 (100.0%)
  Mean Absolute Error: 0.00 features
  Errors: [0, 0, 0, 0, 0]

Type Classification:
  Correct: 34/40 (85.0%)

Expected Performance (full test set):
  Count Accuracy: 86.36%
  Type Accuracy: 87.10%
  MAE: 0.15 features


## 7. Detailed Metrics Table

In [10]:
# Create summary DataFrame
summary_data = []
for i, result in enumerate(results, 1):
    num_features = min(result['gt_count'], len(result['pred_types']))
    correct_types = sum(
        1 for j in range(num_features)
        if result['gt_features'][j]['type'] == result['pred_types'][j]
    )
    type_acc = (correct_types / num_features * 100) if num_features > 0 else 0
    
    summary_data.append({
        'Sample': f"Sample {i}",
        'Sample ID': result['sample_id'],
        'GT Count': result['gt_count'],
        'Pred Count': result['pred_count'],
        'Count Error': result['pred_count'] - result['gt_count'],
        'Count Correct': '‚úì' if result['count_correct'] else '‚úó',
        'Type Accuracy': f"{type_acc:.1f}%"
    })

df = pd.DataFrame(summary_data)
print("\n" + "=" * 80)
print("DETAILED METRICS TABLE")
print("=" * 80)
print(df.to_string(index=False))
print("=" * 80)


DETAILED METRICS TABLE
  Sample Sample ID  GT Count  Pred Count  Count Error Count Correct Type Accuracy
Sample 1  00088413         4           4            0             ‚úì        100.0%
Sample 2  00054192         6           6            0             ‚úì         83.3%
Sample 3  00054192         6           6            0             ‚úì         83.3%
Sample 4  00062926         9           9            0             ‚úì         66.7%
Sample 5  00057294        15          15            0             ‚úì         93.3%


## Conclusion

This notebook demonstrates the Feature Detector V5 model's performance on real test samples. The model predicts:

1. **Feature Count** - How many machining features exist in the part
2. **Feature Types** - Classification (hole, pocket, step, chamfer, fillet, etc.)
3. **Feature Parameters** - 10 dimensions per feature:
   - `diameter` - For holes
   - `depth` - How deep the feature goes
   - `width`, `length`, `height` - Feature dimensions
   - `bbox_x`, `bbox_y`, `bbox_z` - Bounding box
   - `volume` - Material removed
   - `confidence` - Prediction confidence

**Performance Metrics:**
- **Count Accuracy**: 86.36% (exact match on 8.6 out of 10 parts)
- **Type Accuracy**: 87.10% (correctly classifies 87% of individual features)
- **MAE**: 0.15 features (predictions typically within 1 feature of ground truth)

This enables automated CAD-to-CAM workflows with lightweight human verification.