In [None]:
# Save results and push to GitHub
import json

# Save numerical results as JSON (not .pt, so it's readable on GitHub)
results_01 = {
    'r2_ours': r2_ours.tolist(),
    'r2_baseline': r2_base.tolist(),
    'mcc_dims': dims,
    'mcc_values': mcc_results,
    'b_true_sparsity': float(1 - B_true.float().mean()),
    'b_est_sparsity': float(1 - B_estimated.float().mean()),
}

os.makedirs('results', exist_ok=True)
with open('results/01_synthetic_results.json', 'w') as f:
    json.dump(results_01, f, indent=2)

# Copy figures to results/
!cp fig3_r2_matrix.png results/
!cp fig4_mcc.png results/
!cp b_matrix_comparison.png results/

# Git add, commit, push
!git add results/
!git commit -m "Add Notebook 01 results: synthetic experiment (Fig 3, 4)"
!git push

print('Results pushed to GitHub!')

# Notebook 1: Synthetic Experiment (Section 5.1)

Validates the identifiability theory from the paper.
- No LLM required
- Generates synthetic data with known latent structure
- Trains sparsity-regularized AE and baseline AE
- Reproduces Fig 3 (R² matrix) and Fig 4 (MCC across dimensions)

**Estimated time: ~15 minutes on Colab T4**

In [None]:
# Setup: Clone repo and install
# To push results back to GitHub, store a Personal Access Token in Colab Secrets:
#   1. Go to https://github.com/settings/tokens → Generate new token (classic) → repo scope
#   2. In Colab left sidebar → Secrets (key icon) → Add: name="GITHUB_TOKEN", value=<your token>
#   3. Toggle "Notebook access" ON

import os
try:
    from google.colab import userdata
    GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')
    REPO_URL = f'https://{GITHUB_TOKEN}@github.com/AUMEZAK/thoughtcomm.git'
except Exception:
    GITHUB_TOKEN = None
    REPO_URL = 'https://github.com/AUMEZAK/thoughtcomm.git'

!git clone {REPO_URL} thoughtcomm 2>/dev/null || echo 'Already cloned'
%cd thoughtcomm
!pip install -e . -q

# Configure git for pushing results
!git config user.email "colab@thoughtcomm.dev"
!git config user.name "ThoughtComm Colab"

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from configs.config import ThoughtCommConfig
from data.synthetic import generate_synthetic_data, generate_multi_setup_data
from models.autoencoder import SparsityRegularizedAE
from training.train_autoencoder import train_autoencoder, train_autoencoder_baseline
from training.jacobian_utils import compute_binary_pattern
from evaluation.synthetic_eval import compute_r2_matrix, compute_mcc_fast

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Device: {device}')

## 1. Basic Setup: R² Matrix (Fig 3)

In [None]:
# Generate synthetic data (dim=128, basic setup)
DIM = 128
X, Z, B_true, group_indices, mixing_fn = generate_synthetic_data(
    dim=DIM, num_samples=10000, seed=42
)
print(f'X shape: {X.shape}, Z shape: {Z.shape}')
print(f'Group indices: {group_indices}')
print(f'B_true non-zero fraction: {B_true.float().mean():.3f}')

In [None]:
# Config for synthetic experiment
config = ThoughtCommConfig(
    n_z=DIM,
    ae_hidden=256,
    ae_num_layers=3,
    ae_epochs=200,
    ae_batch_size=128,
    ae_lr=1e-3,
    jacobian_l1_weight=0.01,
    jacobian_sample_rows=32,
    device=device,
)
# Override n_h for synthetic (2 observed variables, not 3 agents)
config.hidden_size = DIM // 2  # each 'agent' has DIM//2 dims
config.num_agents = 2

In [None]:
# Train sparsity-regularized AE (ours)
print('Training sparsity-regularized AE...')
ae_ours, loss_ours = train_autoencoder(X, config, verbose=True)

In [None]:
# Train baseline AE (no sparsity)
print('Training baseline AE...')
ae_base, loss_base = train_autoencoder_baseline(X, config, verbose=True)

In [None]:
# Recover latent estimates
with torch.no_grad():
    Z_hat_ours = ae_ours.encode(X.float().to(device)).cpu()
    Z_hat_base = ae_base.encode(X.float().to(device)).cpu()

# Compute R² matrices
r2_ours, names = compute_r2_matrix(Z_hat_ours, Z, group_indices)
r2_base, _ = compute_r2_matrix(Z_hat_base, Z, group_indices)

print('Ours R² matrix:')
print(np.round(r2_ours, 3))
print('\nBaseline R² matrix:')
print(np.round(r2_base, 3))

In [None]:
# Plot Fig 3: R² matrices
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

for ax, r2, title in [(axes[0], r2_ours, 'Ours'), (axes[1], r2_base, 'Baseline')]:
    im = ax.imshow(r2, cmap='Blues', vmin=0, vmax=0.8)
    ax.set_title(title, fontsize=14)
    ax.set_xticks(range(3))
    ax.set_yticks(range(3))
    labels = ['$Z_A \\setminus Z_B$', '$Z_A \\cap Z_B$', '$Z_B \\setminus Z_A$']
    ax.set_xticklabels(labels, fontsize=10)
    ax.set_yticklabels(labels, fontsize=10)
    for i in range(3):
        for j in range(3):
            ax.text(j, i, f'{r2[i,j]:.2f}', ha='center', va='center', fontsize=12)
    plt.colorbar(im, ax=ax)

plt.suptitle('Figure 3: R² of two models', fontsize=14)
plt.tight_layout()
plt.savefig('fig3_r2_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

## 2. MCC Across Dimensions (Fig 4)

In [None]:
# Sweep across dimensions
dims = [128, 256, 384, 512, 640, 768, 896, 1024]
mcc_results = []

for dim in dims:
    print(f'\nDimension: {dim}')
    X_d, Z_d, _, gi_d, _ = generate_synthetic_data(dim=dim, num_samples=10000)

    cfg = ThoughtCommConfig(
        n_z=dim, ae_hidden=max(256, dim//2), ae_num_layers=3,
        ae_epochs=200, ae_batch_size=128, ae_lr=1e-3,
        jacobian_l1_weight=0.01, jacobian_sample_rows=32,
        device=device,
    )
    cfg.hidden_size = dim // 2
    cfg.num_agents = 2

    ae_d, _ = train_autoencoder(X_d, cfg, verbose=False)

    with torch.no_grad():
        Z_hat_d = ae_d.encode(X_d.float().to(device)).cpu()

    mcc, _ = compute_mcc_fast(Z_hat_d, Z_d)
    mcc_results.append(mcc)
    print(f'  MCC: {mcc:.4f}')

In [None]:
# Plot Fig 4: MCC across dimensions
plt.figure(figsize=(7, 5))
plt.plot(dims, mcc_results, 'b-o', linewidth=2, markersize=8)
plt.axhline(y=0.75, color='r', linestyle='--', label='Identifiability threshold')
plt.xlabel('Dimension', fontsize=12)
plt.ylabel('MCC', fontsize=12)
plt.title('Figure 4: MCC across setups', fontsize=14)
plt.legend(fontsize=11)
plt.xticks(dims)
plt.ylim(0.7, 1.0)
plt.grid(True, alpha=0.3)
plt.savefig('fig4_mcc.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Visualize B matrix structure
with torch.no_grad():
    Z_sample = ae_ours.encode(X[:64].float().to(device))
B_estimated = compute_binary_pattern(ae_ours.decoder, Z_sample, threshold=0.01, sub_batch=8, device=device)

fig, axes = plt.subplots(1, 2, figsize=(12, 5))
axes[0].imshow(B_true.numpy(), cmap='Blues', aspect='auto')
axes[0].set_title('Ground Truth B', fontsize=12)
axes[0].set_xlabel('Latent dims')
axes[0].set_ylabel('Observed dims')

axes[1].imshow(B_estimated.numpy(), cmap='Blues', aspect='auto')
axes[1].set_title('Estimated B (ours)', fontsize=12)
axes[1].set_xlabel('Latent dims')
axes[1].set_ylabel('Observed dims')

plt.tight_layout()
plt.savefig('b_matrix_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print(f'B_true sparsity: {1 - B_true.float().mean():.3f}')
print(f'B_est sparsity: {1 - B_estimated.float().mean():.3f}')

## Summary

Expected results:
- **Fig 3 (left/Ours)**: High R² on diagonal (~0.6-0.8), low off-diagonal (~0-0.1)
- **Fig 3 (right/Baseline)**: Poorly disentangled, high off-diagonal R²
- **Fig 4**: MCC > 0.75 (red threshold) across all dimensions
- **B matrix**: Estimated B shows block-sparse structure similar to ground truth

## Push Results to GitHub