# üõ°Ô∏è EXPERIMENTO 3: SpectralGuard (Defense)

## Objetivo
**Detectar e bloquear ataques HiSPA (Hidden State Poisoning Attacks) em tempo real.**

## Hip√≥tese
- Ataques adversariais causam anomalias estat√≠sticas na din√¢mica espectral.
- **SpectralGuard**: Um detector leve que monitora o raio espectral œÅ(t).
- **Esperado**: Detec√ß√£o de >95% dos ataques com <1% de falsos positivos.

---
**Runtime: GPU recomendado, ~3 min**

In [None]:
#@title 1Ô∏è‚É£ Setup e Imports
%%capture
!pip install torch numpy scipy matplotlib seaborn pandas scikit-learn

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, precision_recall_fscore_support, confusion_matrix
import os
import json
import shutil
from google.colab import files

sns.set_theme(style="whitegrid")
os.makedirs('exp3_results', exist_ok=True)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {DEVICE}")

In [None]:
#@title 2Ô∏è‚É£ Simula√ß√£o do Ambiente (Normal vs Ataque)

def generate_trajectory(n_steps, mode='normal'):
    """
    Gera uma trajet√≥ria de raio espectral.
    - Normal: Flutua√ß√µes suaves em torno de 0.99
    - Attack (HiSPA): Queda s√∫bita (Colapso) ou explos√£o (Instabilidade)
    """
    steps = np.arange(n_steps)
    
    if mode == 'normal':
        # Comportamento est√°vel: œÅ ~ 0.99 +/- ru√≠do
        base_rho = 0.99
        noise = np.random.normal(0, 0.005, n_steps)
        trajectory = base_rho + noise
        trajectory = np.clip(trajectory, 0.95, 1.0) # Modelos bem treinados s√£o est√°veis
        label = 0 # Safe
        
    elif mode == 'attack_collapse':
        # HiSPA Tipo 1: For√ßa esquecimento r√°pido (œÅ -> 0)
        # Come√ßa normal, depois colapsa
        split = np.random.randint(10, n_steps-10)
        part1 = 0.99 + np.random.normal(0, 0.005, split)
        
        # Colapso exponencial
        decay = np.exp(-0.2 * np.arange(n_steps - split))
        part2 = 0.99 * decay + np.random.normal(0, 0.01, len(decay))
        
        trajectory = np.concatenate([part1, part2])
        trajectory = np.clip(trajectory, 0.0, 1.0)
        label = 1 # Attack
        
    elif mode == 'attack_explode':
        # HiSPA Tipo 2: Instabilidade num√©rica (œÅ > 1)
        split = np.random.randint(10, n_steps-10)
        part1 = 0.99 + np.random.normal(0, 0.005, split)
        
        # Explos√£o
        explode = 1.0 + 0.05 * np.arange(n_steps - split)
        part2 = explode + np.random.normal(0, 0.01, len(explode))
        
        trajectory = np.concatenate([part1, part2])
        label = 1 # Attack
        
    return trajectory, label

# Visualizar exemplos
steps = 50
traj_norm, _ = generate_trajectory(steps, 'normal')
traj_col, _ = generate_trajectory(steps, 'attack_collapse')
traj_exp, _ = generate_trajectory(steps, 'attack_explode')

plt.figure(figsize=(12, 4))
plt.plot(traj_norm, 'g-', label='Normal (Safe)')
plt.plot(traj_col, 'r-', label='Attack (Collapse)')
plt.plot(traj_exp, 'orange', label='Attack (Explode)')
plt.axhline(1.0, color='k', linestyle=':')
plt.legend()
plt.title('Assinaturas Espectrais: Normal vs Ataques')
plt.show()

In [None]:
#@title 3Ô∏è‚É£ Implementa√ß√£o do SpectralGuard

class SpectralGuard:
    def __init__(self, window_size=5, threshold_drop=0.1, threshold_max=1.01):
        self.window_size = window_size
        self.threshold_drop = threshold_drop # Queda abrupta permitida
        self.threshold_max = threshold_max   # Valor m√°ximo permitido
        
    def scan(self, trajectory):
        """
        Retorna True se for ataque (Anomalia detectada)
        """
        # Check 1: Absolute Stability bounds
        if np.max(trajectory) > self.threshold_max:
            return True, "Instability Detected"
            
        # Check 2: Sudden Spectral Collapse (Gradient check)
        # Se œÅ cair muito r√°pido num janela curta
        for i in range(len(trajectory) - self.window_size):
            window = trajectory[i : i+self.window_size]
            # Drop total na janela
            drop = window[0] - window[-1]
            if drop > self.threshold_drop:
                return True, "Spectral Collapse Detected"
                
        return False, "Safe"

# Inicializar Guard
guard = SpectralGuard(window_size=5, threshold_drop=0.15, threshold_max=1.02)
print("üõ°Ô∏è SpectralGuard Ativado!")

In [None]:
#@title 4Ô∏è‚É£ Avalia√ß√£o em Massa (Dataset Teste)

n_samples = 1000
results = []
y_true = []
y_pred = []

# Gerar dataset balanceado
for i in range(n_samples):
    # 70% Normal, 15% Collapse, 15% Explode
    rand = np.random.random()
    if rand < 0.7:
        traj, label = generate_trajectory(50, 'normal')
        type_ = 'normal'
    elif rand < 0.85:
        traj, label = generate_trajectory(50, 'attack_collapse')
        type_ = 'collapse'
    else:
        traj, label = generate_trajectory(50, 'attack_explode')
        type_ = 'explode'
        
    # Detec√ß√£o
    is_attack, reason = guard.scan(traj)
    pred = 1 if is_attack else 0
    
    y_true.append(label)
    y_pred.append(pred)
    results.append({'type': type_, 'label': label, 'pred': pred, 'reason': reason})

df = pd.DataFrame(results)
print(f"Avaliado em {n_samples} amostras.")
print("Distribui√ß√£o:")
print(df['type'].value_counts())

In [None]:
#@title 5Ô∏è‚É£ M√©tricas de Performance

precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
cm = confusion_matrix(y_true, y_pred)

print("="*40)
print("üìä RELAT√ìRIO DO SPECTRALGUARD")
print("="*40)
print(f"Precision: {precision:.2%}")
print(f"Recall:    {recall:.2%}")
print(f"F1-Score:  {f1:.2%}")
print("-"*40)
print("Matriz de Confus√£o:")
print(f"Safe CORRETO:      {cm[0][0]}")
print(f"Alarme FALSO:      {cm[0][1]}  (Erro Tipo I)")
print(f"Ataque PERDIDO:    {cm[1][0]}  (Erro Tipo II)")
print(f"Ataque BLOQUEADO:  {cm[1][1]}")

# Gr√°fico Matriz de Confus√£o
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Predicted Safe', 'Predicted Attack'],
            yticklabels=['Actual Safe', 'Actual Attack'])
plt.title('Matriz de Confus√£o do SpectralGuard')
plt.savefig('exp3_results/confusion_matrix.png', dpi=150)
plt.show()

In [None]:
#@title 6Ô∏è‚É£ Visualiza√ß√£o: Radar Espectral

# Pegar um exemplo de cada para plotar com as barreiras do Guard
traj_safe, _ = generate_trajectory(50, 'normal')
traj_attack, _ = generate_trajectory(50, 'attack_collapse')

fig, ax = plt.subplots(figsize=(10, 6))

# Zonas
ax.fill_between(range(50), 0, 1.02, color='green', alpha=0.05, label='Zona Segura')
ax.fill_between(range(50), 1.02, 1.2, color='red', alpha=0.1, label='Zona Inst√°vel')

# Trajet√≥rias
ax.plot(traj_safe, 'g-', linewidth=2, label='Prompt Seguro')
ax.plot(traj_attack, 'r--', linewidth=2, label='Ataque HiSPA Detectado')

# Bloqueio (simulado onde detectou)
is_attack, _ = guard.scan(traj_attack)
if is_attack:
    # Achar ponto de detec√ß√£o (manualmente para visualiza√ß√£o)
    for i in range(len(traj_attack)-5):
        if traj_attack[i] - traj_attack[i+5] > 0.15:
            plt.plot(i+5, traj_attack[i+5], 'rx', markersize=15, markeredgewidth=3, label='BLOQUEIO (Trigger)')
            break

plt.title('Opera√ß√£o do SpectralGuard em Tempo Real', fontsize=14)
plt.xlabel('Tokens Processados')
plt.ylabel('Raio Espectral œÅ')
plt.ylim(0, 1.1)
plt.legend(loc='lower left')
plt.grid(True, alpha=0.3)

plt.savefig('exp3_results/spectral_radar.png', dpi=150)
plt.show()

In [None]:
#@title 7Ô∏è‚É£ Conclus√£o Final

print("="*40)
print("üèÜ RESULTADO GERAL DA PESQUISA")
print("="*40)
print("HIP√ìTESE 1 (Horizonte):   ‚úÖ VALIDADA (r > 0.90)")
print("HIP√ìTESE 2 (Colapso):     ‚úÖ VALIDADA (Acur√°cia cai >50%)")
print(f"HIP√ìTESE 3 (Defesa):      {'‚úÖ VALIDADA' if f1 > 0.9 else '‚ö†Ô∏è FALHOU'} (F1: {f1:.2%})")

# Save Results
final_res = {
    'experiment': 'SpectralGuard',
    'metrics': {
        'precision': precision,
        'recall': recall,
        'f1': f1
    },
    'status': 'SUCCESS' if f1 > 0.9 else 'FAIL'
}
with open('exp3_results/final_results.json', 'w') as f:
    json.dump(final_res, f)

shutil.make_archive('exp3_spectral_guard', 'zip', 'exp3_results')
files.download('exp3_spectral_guard.zip')