# 03: Cross-Domain Agent Detection

Core experiment: train models on one agent dataset, evaluate on another.
Tests whether anomaly patterns transfer across agent trace datasets.

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve

from src.data.trail_loader import load_trail_dataset, get_trail_labels
from src.data.trace_loader import load_trace_dataset, trace_to_otel_format
from src.features.agent_extractor import AgentTraceFeatureExtractor
from src.features.ubfs_schema import UBFSNormalizer
from src.models.isolation_forest import IsolationForestDetector
from src.models.deep_clustering import DeepClusteringDetector
from src.evaluation.metrics import compute_metrics

## Load Both Datasets

In [None]:
trail = load_trail_dataset()
y_trail = get_trail_labels(trail['annotations'])
ext = AgentTraceFeatureExtractor()
X_trail, _, _ = ext.extract_batch(trail['traces'])
X_trail = UBFSNormalizer('zscore').fit_transform(X_trail)

trace_data = load_trace_dataset()
y_trace = trace_data['labels']
otel = [trace_to_otel_format(t) for t in trace_data['trajectories']]
ext2 = AgentTraceFeatureExtractor()
X_trace, _, _ = ext2.extract_batch(otel)
X_trace = UBFSNormalizer('zscore').fit_transform(X_trace)

trail_normal = X_trail[y_trail == 0]
trace_normal = X_trace[y_trace == 0]
print(f'TRAIL: {len(trail_normal)} normal, TRACE: {len(trace_normal)} normal')

## Transfer Experiment: TRACE -> TRAIL

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

for ax, (name, cls, kwargs) in zip(axes, [
    ('Isolation Forest', IsolationForestDetector, {'n_estimators': 200, 'seed': 42}),
    ('Deep Clustering', DeepClusteringDetector, {'pretrain_epochs': 30, 'batch_size': 16, 'seed': 42}),
]):
    # Within-domain
    model = cls(**kwargs)
    model.fit(trail_normal)
    scores_within = model.score(X_trail)
    m_within = compute_metrics(y_trail, scores_within)
    fpr, tpr, _ = roc_curve(y_trail, scores_within)
    ax.plot(fpr, tpr, label=f'Within TRAIL (AUC={m_within.auc_roc:.3f})', color='#2196F3')
    
    # Cross-domain: train TRACE, eval TRAIL
    model2 = cls(**kwargs)
    model2.fit(trace_normal)
    scores_cross = model2.score(X_trail)
    m_cross = compute_metrics(y_trail, scores_cross)
    fpr2, tpr2, _ = roc_curve(y_trail, scores_cross)
    ax.plot(fpr2, tpr2, label=f'TRACE->TRAIL (AUC={m_cross.auc_roc:.3f})', color='#FF9800')
    
    ax.plot([0, 1], [0, 1], 'k--', alpha=0.3)
    ax.set_title(name)
    ax.set_xlabel('FPR')
    ax.set_ylabel('TPR')
    ax.legend(loc='lower right')

plt.tight_layout()
plt.show()