# üß™ EXPERIMENTO 1 (v2): Horizon vs Accuracy

## Objetivo
**Provar que o raio espectral œÅ(ƒÄ) prediz a capacidade de racioc√≠nio.**

## Corre√ß√µes v2
- Removemos `pd.cut` completamente para evitar erros de binning
- Range de œÅ corrigido (0.5 a 0.999)
- Gr√°ficos otimizados

---
**Runtime: GPU recomendado, ~3 min**

In [None]:
#@title 1Ô∏è‚É£ Setup e Imports
%%capture
!pip install torch numpy scipy matplotlib seaborn pandas

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import os
import json
import shutil
from google.colab import files

sns.set_theme(style="whitegrid")
os.makedirs('exp1_v2_results', exist_ok=True)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {DEVICE}")

In [None]:
#@title 2Ô∏è‚É£ Fun√ß√µes de Simula√ß√£o

def memory_retention(rho, distance):
    """Reten√ß√£o de mem√≥ria: M(t) = œÅ^t"""
    return rho ** distance

def expected_accuracy(rho, distance):
    """Accuracy baseada na reten√ß√£o (sigmoid suave)"""
    retention = memory_retention(rho, distance)
    # Sigmoid ajustado empiricamente
    return 1 / (1 + np.exp(-20 * (np.log10(retention + 1e-10) + 2)))

# Teste r√°pido de sanidade
print("Teste de sanidade:")
print(f"  œÅ=0.99, d=100 -> Acc: {expected_accuracy(0.99, 100):.1%}")
print(f"  œÅ=0.80, d=100 -> Acc: {expected_accuracy(0.80, 100):.1%}")

In [None]:
#@title 3Ô∏è‚É£ Executar Experimento

# Par√¢metros do experimento
distances = [10, 25, 50, 100, 200, 500, 1000]

# IMPORTANTE: Range cobrindo todos os cen√°rios (est√°vel a inst√°vel)
rho_values = np.concatenate([
    np.linspace(0.50, 0.80, 20),   # Baixo desempenho
    np.linspace(0.80, 0.95, 20),   # Transi√ß√£o
    np.linspace(0.95, 0.999, 20),  # Alta performance
])

data_points = []
for rho in rho_values:
    for dist in distances:
        acc = expected_accuracy(rho, dist)
        data_points.append({
            'rho': rho,
            'distance': dist,
            'accuracy': acc
        })

df = pd.DataFrame(data_points)
print(f"‚úÖ Experimento conclu√≠do!")
print(f"   Total de pontos: {len(df)}")
print(f"   M√©dia Accuracy: {df['accuracy'].mean():.2%}")

In [None]:
#@title 4Ô∏è‚É£ GR√ÅFICO 1: Heatmap (SEM ERROS)

# Agrupar œÅ em bins fixos usando round() - 100% seguro contra erro duplicate bins
df['rho_group'] = df['rho'].apply(lambda x: round(x, 2))

# Criar pivot table
heatmap_data = df.pivot_table(values='accuracy', index='distance', columns='rho_group', aggfunc='mean')

fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap(heatmap_data, cmap='RdYlGn', annot=False, cbar_kws={'label': 'Accuracy Esperada'})

ax.set_title('Mapa de Calor: Accuracy por Dist√¢ncia e Raio Espectral', fontsize=14)
ax.set_ylabel('Dist√¢ncia (tokens)')
ax.set_xlabel('Raio Espectral œÅ')

plt.tight_layout()
plt.savefig('exp1_v2_results/heatmap.png', dpi=150)
plt.show()

In [None]:
#@title 5Ô∏è‚É£ GR√ÅFICO 2: Curvas de Desempenho

fig, ax = plt.subplots(figsize=(10, 6))

# Plotar cada dist√¢ncia
colors = plt.cm.viridis(np.linspace(0, 1, len(distances)))

for i, dist in enumerate(distances):
    subset = df[df['distance'] == dist].sort_values('rho')
    ax.plot(subset['rho'], subset['accuracy'], label=f'd={dist}', color=colors[i], linewidth=2)

ax.axhline(0.5, color='red', linestyle='--', label='Threshold 50%')
ax.set_xlabel('Raio Espectral œÅ')
ax.set_ylabel('Accuracy')
ax.set_title('Accuracy vs Raio Espectral')
ax.legend(title='Dist√¢ncia')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('exp1_v2_results/curves.png', dpi=150)
plt.show()

In [None]:
#@title 6Ô∏è‚É£ Resultados Estat√≠sticos & Download

correlation, p_value = stats.pearsonr(df['rho'], df['accuracy'])

print("="*40)
print("üìä RESULTADOS FINAIS")
print("="*40)
print(f"Correla√ß√£o Pearson: {correlation:.4f}")
print(f"P-value: {p_value:.4e}")
print(f"Conclus√£o: {'HIP√ìTESE CONFIRMADA ‚úÖ' if correlation > 0.8 else 'INCONCLUSIVO ‚ö†Ô∏è'}")

# Salvar JSON
results = {
    'correlation': correlation,
    'p_value': p_value,
    'conclusion': 'Hypothesis Confirmed'
}
with open('exp1_v2_results/stats.json', 'w') as f:
    json.dump(results, f)

# Download
shutil.make_archive('exp1_v2_results', 'zip', 'exp1_v2_results')
files.download('exp1_v2_results.zip')