<a href="https://colab.research.google.com/github/Domusgpt/PPP-Market-Analog-Computer/blob/claude%2Fanalyze-project-continuation-ZKCoW/domain_diverse_training_ipynb_(1)_txt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HEMOC Domain-Diverse Training (Phase C)

**Goal**: Fix cross-domain transfer failure by training on ALL domains simultaneously.

**Background**: Music-trained CNN achieves 0.916 correlation on music data but FAILS on market/sensor data (correlation -0.027). This experiment trains on mixed domains (25% music + 25% market + 25% sensor + 25% random) to produce a domain-agnostic decoder.

**Important**: This uses `HybridEncoder` from `demos/dual_decoder.py` — NOT the old `OpticalKirigamiEncoder` from `main.py`.

**Requirements**: GPU runtime (Runtime > Change runtime type > T4 GPU)

## 1. Setup: Clone HEMOC repo and install dependencies

In [None]:
# Clone the HEMOC repo — use the jules branch which has experiments 1-24 + all results
!git clone --branch jules-7629987832936421389-695948c7 \
    https://github.com/Domusgpt/HEMOC-Stain-Glass-Flower.git

# Verify we're on the right branch
!cd HEMOC-Stain-Glass-Flower && git branch && echo "---" && ls demos/dual_decoder.py demos/domain_diverse_training.py

Cloning into 'HEMOC-Stain-Glass-Flower'...
remote: Enumerating objects: 7037, done.[K
remote: Counting objects: 100% (319/319), done.[K
remote: Compressing objects: 100% (92/92), done.[K
remote: Total 7037 (delta 273), reused 231 (delta 225), pack-reused 6718 (from 3)[K
Receiving objects: 100% (7037/7037), 77.80 MiB | 16.84 MiB/s, done.
Resolving deltas: 100% (1776/1776), done.
* [32mjules-7629987832936421389-695948c7[m
---
demos/domain_diverse_training.py  demos/dual_decoder.py


In [None]:
# Check GPU availability
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")
else:
    print("WARNING: No GPU detected! Go to Runtime > Change runtime type > T4 GPU")

PyTorch: 2.9.0+cu128
CUDA available: True
GPU: Tesla T4


AttributeError: 'torch._C._CudaDeviceProperties' object has no attribute 'total_mem'

## 2. Verify the Working Pipeline (HybridEncoder)

In [None]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt

# Add the demos directory to path
sys.path.insert(0, 'HEMOC-Stain-Glass-Flower/demos')

from dual_decoder import HybridEncoder

# Create encoder
encoder = HybridEncoder(size=64)
print("HybridEncoder initialized (64x64)")
print(f"  Input: 6 angles in [-pi, pi]")
print(f"  Output: (3, 64, 64) RGB pattern")
print(f"  Angles 0-2: Linear path (phase, rotation, tilt)")
print(f"  Angles 3-5: Hypercube path (non-linear mixing)")

In [None]:
# Quick sanity check: generate some patterns and visualize
fig, axes = plt.subplots(2, 4, figsize=(16, 8))

np.random.seed(42)
for i in range(8):
    angles = np.random.uniform(-np.pi, np.pi, 6).astype(np.float32)
    pattern = encoder.generate(angles)
    ax = axes[i // 4, i % 4]
    # pattern is (3, H, W), transpose to (H, W, 3) for display
    ax.imshow(np.transpose(pattern, (1, 2, 0)))
    ax.set_title(f'Sample {i+1}', fontsize=10)
    ax.axis('off')

plt.suptitle('HybridEncoder: Random Angle Samples', fontsize=14)
plt.tight_layout()
plt.show()

# Verify determinism
test_angles = np.array([0.5, -0.3, 1.2, -0.8, 0.1, -1.5], dtype=np.float32)
p1 = encoder.generate(test_angles)
p2 = encoder.generate(test_angles)
print(f"Determinism check: max diff = {np.max(np.abs(p1 - p2)):.2e} (should be 0)")
print(f"Pattern shape: {p1.shape} (should be (3, 64, 64))")
print(f"Pattern range: [{p1.min():.3f}, {p1.max():.3f}] (should be [0, 1])")

## 3. Run Domain-Diverse Training

This runs the full experiment:
- **Experiment A**: Train CNN on ALL 4 domains (25% each: music/market/sensor/random)
- **Experiment B**: Train CNN on music-only (same total data, for comparison)
- **Evaluation**: Test both models on held-out data from each domain separately

On GPU: ~60K training samples, 80 epochs (~15-30 min)
On CPU: ~16K training samples, 50 epochs (~60+ min, not recommended)

In [None]:
# Change to the HEMOC repo directory so imports work
os.chdir('HEMOC-Stain-Glass-Flower')
print(f"Working directory: {os.getcwd()}")

# Verify key files exist
for f in ['demos/dual_decoder.py', 'demos/domain_diverse_training.py', 'demos/option_e_scaled_cnn.py']:
    exists = os.path.exists(f)
    print(f"  {f}: {'OK' if exists else 'MISSING!'}")

In [None]:
# Run the domain-diverse training experiment
# This is the main experiment — takes 15-30 min on GPU, 60+ min on CPU
#
# It runs as a script (not import) so __file__-based imports work correctly
!cd demos && python domain_diverse_training.py

## 4. Analyze Results

In [None]:
import json

# Load saved results
with open('results/domain_diverse_results.json', 'r') as f:
    results = json.load(f)

print(f"Experiment: {results['experiment']}")
print(f"Timestamp: {results['timestamp']}")
print(f"Device: {results['device']}")
print(f"Total training samples: {results['config']['total_train']:,}")
print(f"\nVerdict: {results['verdict']}")
print(f"\nCross-domain improvement: {results['comparison']['improvement']:+.4f}")
print(f"In-domain cost: {results['comparison']['in_domain_cost']:+.4f}")

In [None]:
# Visualize: Diverse vs Music-Only across domains
domains = ['music', 'market', 'sensor', 'random']

diverse_corrs = [results['diverse_model'][d]['avg_corr'] for d in domains]
music_corrs = [results['music_model'][d]['avg_corr'] for d in domains]

x = np.arange(len(domains))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
bars1 = ax.bar(x - width/2, diverse_corrs, width, label='Diverse Model', color='steelblue')
bars2 = ax.bar(x + width/2, music_corrs, width, label='Music-Only Model', color='coral')

ax.set_xlabel('Test Domain')
ax.set_ylabel('Average Correlation')
ax.set_title('Domain-Diverse vs Music-Only: Cross-Domain Transfer')
ax.set_xticks(x)
ax.set_xticklabels(domains)
ax.legend()
ax.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5, label='Pass threshold')
ax.set_ylim(-0.2, 1.0)

# Add value labels
for bar in bars1:
    ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.02,
            f'{bar.get_height():.3f}', ha='center', va='bottom', fontsize=9)
for bar in bars2:
    ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.02,
            f'{bar.get_height():.3f}', ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

In [None]:
# Per-angle heatmap: Diverse model across domains
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

for idx, (model_key, title) in enumerate([('diverse_model', 'Diverse Model'), ('music_model', 'Music-Only Model')]):
    data = np.array([results[model_key][d]['per_angle_corr'] for d in domains])
    im = axes[idx].imshow(data, cmap='RdYlGn', vmin=-0.5, vmax=1.0, aspect='auto')
    axes[idx].set_xticks(range(6))
    axes[idx].set_xticklabels([f'a{i}\n({"lin" if i < 3 else "hyp"})' for i in range(6)])
    axes[idx].set_yticks(range(4))
    axes[idx].set_yticklabels(domains)
    axes[idx].set_title(title)

    # Add text annotations
    for i in range(4):
        for j in range(6):
            color = 'white' if data[i, j] < 0.3 else 'black'
            axes[idx].text(j, i, f'{data[i,j]:.2f}', ha='center', va='center',
                          color=color, fontsize=9)

plt.colorbar(im, ax=axes, label='Correlation')
plt.suptitle('Per-Angle Correlation by Domain', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Summary table
print("=" * 70)
print("FINAL SUMMARY")
print("=" * 70)
print(f"\n{'Model':<20} {'Music':<10} {'Market':<10} {'Sensor':<10} {'Random':<10} {'Cross-Avg':<10}")
print("-" * 70)

for model_key, label in [('diverse_model', 'Diverse'), ('music_model', 'Music-Only')]:
    corrs = [results[model_key][d]['avg_corr'] for d in domains]
    cross_avg = np.mean(corrs[1:])  # market + sensor + random
    print(f"{label:<20} {corrs[0]:<10.4f} {corrs[1]:<10.4f} {corrs[2]:<10.4f} {corrs[3]:<10.4f} {cross_avg:<10.4f}")

print(f"\nPrevious results:")
print(f"  Music pipeline (Exp 13): 0.916 avg corr (music only)")
print(f"  Cross-domain (Exp 14-15): -0.027 to 0.077 (FAIL)")
print(f"\nVerdict: {results['verdict']}")

## 5. Save Results for PPP Integration

Download the results JSON for the evidence table.

In [None]:
# Download results
from google.colab import files
files.download('results/domain_diverse_results.json')
print("Results downloaded. Add to PPP repo as evidence artifact.")