# üîç Architecture Explorer

Load and explore saved architectures from previous training runs.

**Use this to:**
- Load saved models (.pth files)
- Explore architecture structures (.json files)
- Visualize architectures
- Compare different architectures
- Use models for inference

---

## üì¶ Setup

In [None]:
import torch
import json
import os
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display, Image

# Import modules
from architecture import ArchitectureState, OPERATION_POOL
from evaluation import ConvNet
from utils import load_architecture_json, save_model_pth
from visualize import visualize_architecture

print("‚úÖ Modules loaded")

## üìÇ Select Results Directory

In [None]:
# === CONFIGURE THIS ===
# Path to your results directory
RESULTS_DIR = 'demo_results/mnist_20241203_143022'  # Change this to your results folder

# Verify directory exists
if not os.path.exists(RESULTS_DIR):
    print(f"‚ùå Directory not found: {RESULTS_DIR}")
    print(f"\nAvailable directories:")
    if os.path.exists('demo_results'):
        for d in os.listdir('demo_results'):
            print(f"  - demo_results/{d}")
    if os.path.exists('results'):
        for d in os.listdir('results'):
            print(f"  - results/{d}")
else:
    print(f"‚úÖ Using results from: {RESULTS_DIR}")

## üìä Load Results Summary

In [None]:
# Load summary
summary_path = os.path.join(RESULTS_DIR, 'summary.json')
with open(summary_path) as f:
    summary = json.load(f)

print("üìã Summary")
print("="*60)
print(f"Dataset: {summary['dataset']}")
print(f"Total Episodes: {summary['total_episodes']}")
print(f"Architectures Explored: {summary['architectures_explored']}")
print(f"Architectures Evaluated: {summary['architectures_evaluated']}")
print(f"Best Accuracy: {summary['best_accuracy']:.4f}")
print(f"Average Accuracy: {summary['avg_accuracy']:.4f}")
print("="*60)

## üìñ Load All Results

In [None]:
# Load all results from JSONL
results_path = os.path.join(RESULTS_DIR, 'results.jsonl')
results = []

with open(results_path) as f:
    for line in f:
        results.append(json.loads(line))

print(f"Loaded {len(results)} architectures")

# Create DataFrame
df = pd.DataFrame(results)
df = df.sort_values('final_accuracy', ascending=False).reset_index(drop=True)

print("\nTop 10 Architectures:")
display(df[['arch_id', 'final_accuracy', 'search_reward', 'depth', 'avg_width', 'total_params']].head(10))

## üèÜ Explore Best Architecture

In [None]:
# Get best architecture
best_result = df.iloc[0]
best_arch_id = best_result['arch_id']

print(f"üèÜ Best Architecture: {best_arch_id}")
print("="*60)
for key, value in best_result.items():
    if key != 'arch_id':
        print(f"{key:20s}: {value}")
print("="*60)

## üîç Load Architecture Structure

In [None]:
# Load architecture from JSON
arch_path = os.path.join(RESULTS_DIR, 'architectures', f'{best_arch_id}.json')
best_arch = load_architecture_json(arch_path)

print("Architecture Details:")
print(f"  Nodes: {best_arch.nodes}")
print(f"  Edges: {best_arch.edges}")
print(f"  Operations: {best_arch.operations}")
print(f"  Channels: {best_arch.channels}")
print(f"  Positions (depth): {best_arch.positions}")

# Operation distribution
op_counts = {}
for op in best_arch.operations.values():
    op_counts[op] = op_counts.get(op, 0) + 1

print(f"\nOperation Distribution:")
for op, count in sorted(op_counts.items()):
    print(f"  {op:15s}: {count}")

## üé® Visualize Best Architecture

In [None]:
# Visualize
viz_path = 'temp_visualization.png'
visualize_architecture(
    best_arch, 
    save_path=viz_path,
    title=f"{best_arch_id} - Accuracy: {best_result['final_accuracy']:.4f}"
)

display(Image(filename=viz_path))

## üîÑ Load Trained Model

In [None]:
# Load model weights
model_path = os.path.join(RESULTS_DIR, 'models', f'{best_arch_id}.pth')
checkpoint = torch.load(model_path)

print("Checkpoint contents:")
print(f"  Keys: {checkpoint.keys()}")
print(f"  Timestamp: {checkpoint['timestamp']}")
print(f"  Metadata: {checkpoint['metadata']}")

# Create model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ConvNet(best_arch, num_classes=10).to(device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"\n‚úÖ Model loaded with {sum(p.numel() for p in model.parameters()):,} parameters")

## üß™ Test Inference

In [None]:
# Test inference with random input
test_input = torch.randn(1, 3, 32, 32).to(device)

with torch.no_grad():
    output = model(test_input)
    probabilities = torch.softmax(output, dim=1)
    predicted_class = output.argmax(dim=1).item()

print("Test Inference:")
print(f"  Input shape: {test_input.shape}")
print(f"  Output shape: {output.shape}")
print(f"  Predicted class: {predicted_class}")
print(f"  Confidence: {probabilities[0, predicted_class].item():.4f}")
print(f"\nTop 3 predictions:")
top3 = probabilities[0].topk(3)
for prob, idx in zip(top3.values, top3.indices):
    print(f"  Class {idx.item()}: {prob.item():.4f}")

## üìä Compare Multiple Architectures

In [None]:
# Select top 5 for comparison
top_5 = df.head(5)

fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()

# Accuracy comparison
axes[0].bar(range(5), top_5['final_accuracy'])
axes[0].set_title('Final Accuracy')
axes[0].set_xlabel('Rank')
axes[0].set_ylabel('Accuracy')
axes[0].grid(True, alpha=0.3)

# Depth comparison
axes[1].bar(range(5), top_5['depth'])
axes[1].set_title('Architecture Depth')
axes[1].set_xlabel('Rank')
axes[1].set_ylabel('Depth')
axes[1].grid(True, alpha=0.3)

# Width comparison
axes[2].bar(range(5), top_5['avg_width'])
axes[2].set_title('Average Width')
axes[2].set_xlabel('Rank')
axes[2].set_ylabel('Channels')
axes[2].grid(True, alpha=0.3)

# Parameters
axes[3].bar(range(5), top_5['total_params'])
axes[3].set_title('Total Parameters')
axes[3].set_xlabel('Rank')
axes[3].set_ylabel('Parameters')
axes[3].grid(True, alpha=0.3)

# Novelty scores
axes[4].bar(range(5), top_5['topological_novelty'], alpha=0.7, label='Topological')
axes[4].bar(range(5), top_5['scale_novelty'], alpha=0.7, label='Scale')
axes[4].set_title('Novelty Scores')
axes[4].set_xlabel('Rank')
axes[4].set_ylabel('Novelty')
axes[4].legend()
axes[4].grid(True, alpha=0.3)

# Scatter: Accuracy vs Parameters
axes[5].scatter(df['total_params'], df['final_accuracy'], alpha=0.6)
axes[5].set_title('Accuracy vs Parameters')
axes[5].set_xlabel('Parameters')
axes[5].set_ylabel('Accuracy')
axes[5].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## üî¨ Analyze Architecture Patterns

In [None]:
# Load all architectures and analyze patterns
all_ops = []
all_depths = []
all_widths = []

arch_dir = os.path.join(RESULTS_DIR, 'architectures')
for arch_file in os.listdir(arch_dir):
    if arch_file.endswith('.json'):
        arch_path = os.path.join(arch_dir, arch_file)
        arch = load_architecture_json(arch_path)
        
        all_ops.extend(arch.operations.values())
        all_depths.append(arch.depth)
        all_widths.append(arch.avg_width)

# Operation frequency
op_freq = {}
for op in all_ops:
    op_freq[op] = op_freq.get(op, 0) + 1

print("üî¨ Architecture Patterns Analysis")
print("="*60)
print(f"\nOperation Frequency:")
for op, count in sorted(op_freq.items(), key=lambda x: x[1], reverse=True):
    print(f"  {op:15s}: {count:4d} ({count/len(all_ops)*100:.1f}%)")

print(f"\nDepth Statistics:")
print(f"  Min: {min(all_depths)}")
print(f"  Max: {max(all_depths)}")
print(f"  Mean: {sum(all_depths)/len(all_depths):.2f}")
print(f"  Std: {pd.Series(all_depths).std():.2f}")

print(f"\nWidth Statistics:")
print(f"  Min: {min(all_widths):.1f}")
print(f"  Max: {max(all_widths):.1f}")
print(f"  Mean: {sum(all_widths)/len(all_widths):.2f}")
print(f"  Std: {pd.Series(all_widths).std():.2f}")

## üéØ Find Interesting Architectures

In [None]:
print("üéØ Interesting Architectures")
print("="*60)

# Most novel topologically
most_novel_topo = df.loc[df['topological_novelty'].idxmax()]
print(f"\nüìê Most Topologically Novel:")
print(f"  ID: {most_novel_topo['arch_id']}")
print(f"  Accuracy: {most_novel_topo['final_accuracy']:.4f}")
print(f"  Topo Novelty: {most_novel_topo['topological_novelty']:.4f}")

# Best depth/width ratio
df['depth_width_ratio'] = df['depth'] / (df['avg_width'] + 1)
extreme_ratio = df.loc[df['depth_width_ratio'].idxmax()]
print(f"\nüìè Most Extreme Depth/Width Ratio:")
print(f"  ID: {extreme_ratio['arch_id']}")
print(f"  Accuracy: {extreme_ratio['final_accuracy']:.4f}")
print(f"  Depth/Width: {extreme_ratio['depth_width_ratio']:.4f}")

# Most efficient (best accuracy per parameter)
df['efficiency'] = df['final_accuracy'] / (df['total_params'] + 1)
most_efficient = df.loc[df['efficiency'].idxmax()]
print(f"\n‚ö° Most Efficient (Accuracy/Params):")
print(f"  ID: {most_efficient['arch_id']}")
print(f"  Accuracy: {most_efficient['final_accuracy']:.4f}")
print(f"  Parameters: {most_efficient['total_params']:,}")
print(f"  Efficiency: {most_efficient['efficiency']:.2e}")

print("="*60)

## üíæ Export Specific Architecture

In [None]:
# Select an architecture to export
EXPORT_ARCH_ID = best_arch_id  # Change this to any arch_id you want

# Load architecture
export_path = os.path.join(RESULTS_DIR, 'architectures', f'{EXPORT_ARCH_ID}.json')
export_arch = load_architecture_json(export_path)

# Create model
export_model = ConvNet(export_arch, num_classes=10)

# Load weights
model_path = os.path.join(RESULTS_DIR, 'models', f'{EXPORT_ARCH_ID}.pth')
checkpoint = torch.load(model_path)
export_model.load_state_dict(checkpoint['model_state_dict'])

# Save to current directory
export_filename = f'{EXPORT_ARCH_ID}_exported.pth'
torch.save({
    'model_state_dict': export_model.state_dict(),
    'architecture_json': export_path,
    'metadata': checkpoint['metadata']
}, export_filename)

print(f"‚úÖ Exported model to: {export_filename}")
print(f"\nTo load this model:")
print(f"  checkpoint = torch.load('{export_filename}')")
print(f"  model.load_state_dict(checkpoint['model_state_dict'])")

## ‚úÖ Summary

You've explored:
- ‚úÖ Loaded all saved architectures
- ‚úÖ Visualized best architectures  
- ‚úÖ Loaded trained model weights
- ‚úÖ Tested inference
- ‚úÖ Compared multiple architectures
- ‚úÖ Analyzed patterns across all results
- ‚úÖ Exported specific models

**Next steps:**
- Use exported models in your applications
- Fine-tune models on new datasets
- Analyze which architectural patterns work best
- Run more training to discover better architectures