# 02: UBFS Feature Transfer Demonstration

Shows how the Unified Behavioural Feature Schema maps both insider threat and agent trace data into the same 20-dimensional space.

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data.trail_loader import load_trail_dataset, get_trail_labels
from src.data.trace_loader import load_trace_dataset, trace_to_otel_format
from src.features.agent_extractor import AgentTraceFeatureExtractor
from src.features.ubfs_schema import (
    UBFSNormalizer, UBFSConfig, FeatureCategory,
    ubfs_feature_names, ubfs_cert_mapping, ubfs_agent_mapping,
)

## UBFS Schema Overview

In [None]:
config = UBFSConfig()
names = ubfs_feature_names()
cert_map = ubfs_cert_mapping()
agent_map = ubfs_agent_mapping()

print(f'Total UBFS dimensions: {config.total_dim}')
print(f'Categories: {list(config.category_dims.keys())}')
print()
for name in names:
    print(f'{name:30s}  CERT: {cert_map.get(name, "N/A"):30s}  Agent: {agent_map.get(name, "N/A")}')

## Extract Features from Both Domains

In [None]:
# TRAIL
trail = load_trail_dataset()
ext_trail = AgentTraceFeatureExtractor()
X_trail, _, _ = ext_trail.extract_batch(trail['traces'])
X_trail = UBFSNormalizer('zscore').fit_transform(X_trail)

# TRACE
trace = load_trace_dataset()
otel = [trace_to_otel_format(t) for t in trace['trajectories']]
ext_trace = AgentTraceFeatureExtractor()
X_trace, _, _ = ext_trace.extract_batch(otel)
X_trace = UBFSNormalizer('zscore').fit_transform(X_trace)

print(f'TRAIL: {X_trail.shape}, TRACE: {X_trace.shape}')

## Feature Correlation Comparison

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

short = [n.replace('_', '\n') for n in names]

corr_trail = np.corrcoef(X_trail.T)
sns.heatmap(corr_trail, ax=axes[0], cmap='RdBu_r', center=0, vmin=-1, vmax=1,
            xticklabels=short, yticklabels=short)
axes[0].set_title('TRAIL Feature Correlations')

corr_trace = np.corrcoef(X_trace.T)
sns.heatmap(corr_trace, ax=axes[1], cmap='RdBu_r', center=0, vmin=-1, vmax=1,
            xticklabels=short, yticklabels=short)
axes[1].set_title('TRACE Feature Correlations')

plt.tight_layout()
plt.show()

## Feature Distribution Comparison

In [None]:
fig, axes = plt.subplots(4, 5, figsize=(16, 12))
for i, (ax, name) in enumerate(zip(axes.flat, names)):
    ax.hist(X_trail[:, i], bins=20, alpha=0.5, label='TRAIL', color='#2196F3', density=True)
    ax.hist(X_trace[:, i], bins=20, alpha=0.5, label='TRACE', color='#FF9800', density=True)
    ax.set_title(name, fontsize=8)
    if i == 0:
        ax.legend(fontsize=7)
plt.tight_layout()
plt.show()