# Zero Trust Architecture (ZTA) Experiment Analysis

This notebook analyzes the results of ZTA experiments, comparing baseline and ZTA-enabled scenarios.


In [None]:
import json
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats

# Set up plotting style
plt.style.use('seaborn')
sns.set_palette('husl')


## Load Experiment Data

First, we'll load the experiment results from the output directory.


In [None]:
def load_experiment(experiment_dir: Path) -> dict:
    """Load experiment data and return scenario DataFrames."""
    results = {}
    
    # Load each scenario's events
    for scenario_dir in experiment_dir.glob('*'):
        if scenario_dir.is_dir():
            events_file = scenario_dir / 'events.jsonl'
            if events_file.exists():
                results[scenario_dir.name] = pd.read_json(
                    events_file,
                    lines=True,
                    convert_dates=['timestamp']
                )
    
    return results

# Load experiment data
experiment_dir = Path('../data/experiments').glob('*').__next__()
scenario_data = load_experiment(experiment_dir)

print(f"Loaded scenarios: {list(scenario_data.keys())}")


## Security Metrics Analysis

Let's analyze key security metrics across scenarios.


In [None]:
def calculate_security_metrics(df: pd.DataFrame) -> dict:
    """Calculate security metrics for a scenario."""
    metrics = {
        'total_events': len(df),
        'success_rate': df['success'].mean(),
        'auth_failures': len(df[~df['success'] & (df['event'] == 'login')]),
        'blocked_access': len(df[~df['success'] & (df['event'] == 'access')])
    }
    
    # Attack metrics if present
    attack_events = df[df['attack_type'].notna()]
    if len(attack_events) > 0:
        metrics.update({
            'attack_events': len(attack_events),
            'attack_success_rate': attack_events['success'].mean()
        })
    
    return metrics

# Calculate metrics for each scenario
security_metrics = {}
for scenario, df in scenario_data.items():
    security_metrics[scenario] = calculate_security_metrics(df)

# Create comparison plot
metrics_df = pd.DataFrame(security_metrics).T

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Security Metrics Comparison')

# Success rates
metrics_df['success_rate'].plot(kind='bar', ax=axes[0,0], title='Success Rate')
axes[0,0].set_ylabel('Rate')

# Auth failures
metrics_df['auth_failures'].plot(kind='bar', ax=axes[0,1], title='Authentication Failures')
axes[0,1].set_ylabel('Count')

# Blocked access
metrics_df['blocked_access'].plot(kind='bar', ax=axes[1,0], title='Blocked Access Attempts')
axes[1,0].set_ylabel('Count')

# Attack success rate if present
if 'attack_success_rate' in metrics_df.columns:
    metrics_df['attack_success_rate'].plot(kind='bar', ax=axes[1,1], title='Attack Success Rate')
    axes[1,1].set_ylabel('Rate')

plt.tight_layout()
plt.show()


## Statistical Analysis

Let's perform statistical tests to compare scenarios.


In [None]:
def compare_scenarios(baseline_df: pd.DataFrame, zta_df: pd.DataFrame) -> dict:
    """Perform statistical comparison between scenarios."""
    results = {}
    
    # Compare success rates
    stat, pval = stats.chi2_contingency([
        [sum(baseline_df['success']), len(baseline_df) - sum(baseline_df['success'])],
        [sum(zta_df['success']), len(zta_df) - sum(zta_df['success'])]
    ])[:2]
    
    results['success_rate_comparison'] = {
        'test': 'chi2',
        'statistic': stat,
        'p_value': pval,
        'significant': pval < 0.05
    }
    
    # Compare attack success if present
    baseline_attacks = baseline_df[baseline_df['attack_type'].notna()]
    zta_attacks = zta_df[zta_df['attack_type'].notna()]
    
    if len(baseline_attacks) > 0 and len(zta_attacks) > 0:
        stat, pval = stats.chi2_contingency([
            [sum(baseline_attacks['success']), len(baseline_attacks) - sum(baseline_attacks['success'])],
            [sum(zta_attacks['success']), len(zta_attacks) - sum(zta_attacks['success'])]
        ])[:2]
        
        results['attack_success_comparison'] = {
            'test': 'chi2',
            'statistic': stat,
            'p_value': pval,
            'significant': pval < 0.05
        }
    
    return results

# Perform comparison if we have baseline and ZTA scenarios
if 'baseline' in scenario_data and 'zta_full' in scenario_data:
    comparison = compare_scenarios(
        scenario_data['baseline'],
        scenario_data['zta_full']
    )
    
    print("Statistical Comparison Results:")
    for metric, result in comparison.items():
        print(f"\n{metric}:")
        print(f"  Test: {result['test']}")
        print(f"  p-value: {result['p_value']:.4f}")
        print(f"  Significant: {result['significant']}")


## Usability Analysis

Now let's analyze the usability metrics.


In [None]:
def analyze_usability(df: pd.DataFrame) -> dict:
    """Analyze usability metrics from events."""
    metrics = {
        'avg_task_duration': df['duration'].mean() if 'duration' in df else None,
        'friction_events': df['friction_events'].str.len().mean() if 'friction_events' in df else None,
        'satisfaction': df['satisfaction_score'].mean() if 'satisfaction_score' in df else None
    }
    
    return metrics

# Calculate usability metrics
usability_metrics = {}
for scenario, df in scenario_data.items():
    usability_metrics[scenario] = analyze_usability(df)

# Plot usability comparison
usability_df = pd.DataFrame(usability_metrics).T

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('Usability Metrics Comparison')

metrics = ['avg_task_duration', 'friction_events', 'satisfaction']
titles = ['Average Task Duration', 'Friction Events', 'Satisfaction Score']

for ax, metric, title in zip(axes, metrics, titles):
    if metric in usability_df.columns:
        usability_df[metric].plot(kind='bar', ax=ax, title=title)
        ax.set_ylabel('Value')

plt.tight_layout()
plt.show()
