# üé® Creative Neural Architecture Search - Interactive Demo

This notebook provides an interactive way to discover creative neural network architectures.

**Estimated time:** 3-10 minutes depending on settings

---

## üì¶ Setup & Imports

In [None]:
# Install dependencies (run once)
# !pip install torch torchvision torch-geometric networkx scipy matplotlib tqdm

In [None]:
import torch
import os
from datetime import datetime
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"üñ•Ô∏è  Using device: {device}")
if device == 'cpu':
    print("‚ö†Ô∏è  No GPU detected. Training will be slower. Recommend using MNIST with fewer episodes.")

## ‚öôÔ∏è Configuration

**Customize your demo here:**

In [None]:
# ===== DEMO CONFIGURATION =====

# Dataset choice: 'mnist', 'fashion', 'cifar10'
# - mnist: Fastest (~2-3 min for 50 episodes)
# - fashion: Fast (~3-5 min for 50 episodes)  
# - cifar10: Slower (~5-10 min for 50 episodes)
DATASET = 'mnist'

# Number of training episodes (50-200 for demo)
# More episodes = more exploration but longer time
EPISODES = 100

# Epochs per architecture evaluation (2-5 for demo)
# More epochs = better accuracy estimates but slower
EVAL_EPOCHS = 3

# Number of top architectures to fully evaluate (5-20)
TOP_K = 10

# Output directory
OUTPUT_DIR = f'demo_results/{DATASET}_{datetime.now().strftime("%Y%m%d_%H%M%S")}'

print("üìã Configuration:")
print(f"  Dataset: {DATASET}")
print(f"  Episodes: {EPISODES}")
print(f"  Eval Epochs: {EVAL_EPOCHS}")
print(f"  Top K: {TOP_K}")
print(f"  Output: {OUTPUT_DIR}")

# Estimate time
time_estimates = {
    'mnist': EPISODES * 0.02 + TOP_K * 0.15,
    'fashion': EPISODES * 0.025 + TOP_K * 0.2,
    'cifar10': EPISODES * 0.04 + TOP_K * 0.4
}
print(f"\n‚è±Ô∏è  Estimated time: ~{time_estimates[DATASET]:.1f} minutes")

## üèóÔ∏è Load Core Components

In [None]:
# Import all modules
from architecture import ArchitectureState, ActionSpace, OPERATION_POOL
from gnn_models import ArchitectureEncoder, DQNetwork
from novelty import TopologicalNovelty, ScaleNovelty, RewardFunction
from evaluation import ConvNet, train_architecture
from agent import CreativityDQN
from utils import save_all_results, load_architecture_json
from visualize import visualize_architecture, create_results_report

print("‚úÖ All modules loaded successfully!")

## üîß Configure Dataset

In [None]:
import evaluation
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

if DATASET == 'mnist':
    def get_mnist_loaders(batch_size=128, subset_size=None):
        transform = transforms.Compose([
            transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
            transforms.Normalize((0.1307,)*3, (0.3081,)*3)
        ])
        
        trainset = torchvision.datasets.MNIST(
            root='./data', train=True, download=True, transform=transform
        )
        testset = torchvision.datasets.MNIST(
            root='./data', train=False, download=True, transform=transform
        )
        
        if subset_size:
            indices = np.random.choice(len(trainset), subset_size, replace=False)
            trainset = Subset(trainset, indices)
        
        return (DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2),
               DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2))
    
    evaluation.get_cifar10_loaders = get_mnist_loaders
    print("üìä Dataset: MNIST (handwritten digits)")

elif DATASET == 'fashion':
    def get_fashion_loaders(batch_size=128, subset_size=None):
        transform = transforms.Compose([
            transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
            transforms.Normalize((0.2860,)*3, (0.3530,)*3)
        ])
        
        trainset = torchvision.datasets.FashionMNIST(
            root='./data', train=True, download=True, transform=transform
        )
        testset = torchvision.datasets.FashionMNIST(
            root='./data', train=False, download=True, transform=transform
        )
        
        if subset_size:
            indices = np.random.choice(len(trainset), subset_size, replace=False)
            trainset = Subset(trainset, indices)
        
        return (DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2),
               DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2))
    
    evaluation.get_cifar10_loaders = get_fashion_loaders
    print("üìä Dataset: Fashion-MNIST (clothing items)")

else:
    print("üìä Dataset: CIFAR-10 (natural images)")

# Override epochs for demo
original_train = evaluation.train_architecture
def fast_train(arch, epochs=None, device='cuda', subset_size=10000):
    return original_train(arch, epochs=EVAL_EPOCHS, device=device, subset_size=subset_size)
evaluation.train_architecture = fast_train

## üéØ Initialize Agent

In [None]:
print("üì¶ Initializing DQN agent...")
agent = CreativityDQN(device=device)
print("‚úÖ Agent ready!")
print(f"   Q-Network parameters: {sum(p.numel() for p in agent.q_network.parameters()):,}")

## üöÄ Train Agent

This will take a few minutes. Watch the progress bar!

In [None]:
print(f"\nüé¨ Starting training for {EPISODES} episodes...\n")

best_archs, stats = agent.train(num_episodes=EPISODES, update_freq=5, eval_freq=25)

print(f"\n‚úÖ Training complete!")
print(f"   Found {len(best_archs)} interesting architectures")

## üìä Training Statistics

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Episode rewards
axes[0].plot(stats['episode_rewards'])
axes[0].set_title('Episode Rewards')
axes[0].set_xlabel('Episode')
axes[0].set_ylabel('Reward')
axes[0].grid(True)

# Epsilon decay
axes[1].plot(stats['epsilons'])
axes[1].set_title('Exploration (Epsilon)')
axes[1].set_xlabel('Episode')
axes[1].set_ylabel('Epsilon')
axes[1].grid(True)

# Reward distribution
axes[2].hist(stats['episode_rewards'], bins=30)
axes[2].set_title('Reward Distribution')
axes[2].set_xlabel('Reward')
axes[2].set_ylabel('Frequency')
axes[2].grid(True)

plt.tight_layout()
plt.show()

print(f"Average reward: {sum(stats['episode_rewards'])/len(stats['episode_rewards']):.3f}")

## üéì Final Evaluation

Now we'll fully train the top architectures with more epochs.

In [None]:
# Restore original training function for final evaluation
evaluation.train_architecture = original_train

print(f"üéì Evaluating top {TOP_K} architectures...\n")

final_results = []
for i, arch_data in enumerate(best_archs[:TOP_K]):
    arch = arch_data['architecture']
    print(f"[{i+1}/{TOP_K}] Nodes: {len(arch.nodes)}, Depth: {arch.depth}, Width: {arch.avg_width:.1f}")
    
    final_acc = original_train(
        arch,
        epochs=10,
        device=device,
        subset_size=None
    )
    
    print(f"    ‚úÖ Accuracy: {final_acc:.4f}\n")
    
    final_results.append({
        'architecture': arch,
        'search_reward': arch_data['reward'],
        'final_accuracy': final_acc,
        'trajectory': arch_data.get('trajectory', [])
    })

print("‚úÖ Evaluation complete!")

## üíæ Save Results

In [None]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("üíæ Saving results...\n")
save_all_results(OUTPUT_DIR, best_archs, final_results, stats, DATASET)

print("\nüìä Creating visualizations...")
create_results_report(OUTPUT_DIR)

print(f"\n‚úÖ All results saved to: {OUTPUT_DIR}")

## üèÜ Best Architecture

In [None]:
best = max(final_results, key=lambda x: x['final_accuracy'])
best_arch = best['architecture']

print("üèÜ BEST ARCHITECTURE")
print("=" * 50)
print(f"Final Accuracy:  {best['final_accuracy']:.4f}")
print(f"Search Reward:   {best['search_reward']:.4f}")
print(f"Nodes:           {len(best_arch.nodes)}")
print(f"Edges:           {len(best_arch.edges)}")
print(f"Depth:           {best_arch.depth}")
print(f"Avg Width:       {best_arch.avg_width:.1f}")
print(f"Parameters:      {best_arch.total_params:,}")
print(f"Skip Connections: {best_arch.num_skip_connections}")
print("=" * 50)

# Visualize best architecture
viz_path = os.path.join(OUTPUT_DIR, 'best_architecture.png')
visualize_architecture(best_arch, save_path=viz_path, 
                      title=f"Best Architecture (Acc: {best['final_accuracy']:.4f})")

from IPython.display import Image, display
display(Image(filename=viz_path))

## üìä Top 10 Results

In [None]:
import pandas as pd

# Create results table
results_data = []
for i, result in enumerate(sorted(final_results, key=lambda x: x['final_accuracy'], reverse=True)[:10]):
    arch = result['architecture']
    results_data.append({
        'Rank': i+1,
        'Accuracy': f"{result['final_accuracy']:.4f}",
        'Reward': f"{result['search_reward']:.3f}",
        'Nodes': len(arch.nodes),
        'Depth': arch.depth,
        'Avg Width': f"{arch.avg_width:.1f}",
        'Parameters': f"{arch.total_params:,}"
    })

df = pd.DataFrame(results_data)
display(df)

## üé® Visualize Top 5 Architectures

In [None]:
from IPython.display import Image, display
import os

viz_dir = os.path.join(OUTPUT_DIR, 'visualizations')

top_5 = sorted(final_results, key=lambda x: x['final_accuracy'], reverse=True)[:5]

for i, result in enumerate(top_5):
    arch = result['architecture']
    print(f"\n{'='*60}")
    print(f"Rank #{i+1} - Accuracy: {result['final_accuracy']:.4f}")
    print(f"{'='*60}")
    
    # Find corresponding visualization
    import json
    results_file = os.path.join(OUTPUT_DIR, 'results.jsonl')
    with open(results_file) as f:
        for line in f:
            data = json.loads(line)
            if abs(data['final_accuracy'] - result['final_accuracy']) < 0.0001:
                arch_id = data['arch_id']
                img_path = os.path.join(viz_dir, f"{arch_id}.png")
                if os.path.exists(img_path):
                    display(Image(filename=img_path, width=600))
                break

## üìÇ View Full Report

In [None]:
report_path = os.path.join(OUTPUT_DIR, 'report.html')

print(f"üìä Full interactive report available at:")
print(f"   {report_path}")
print(f"\nüìÅ All files saved to:")
print(f"   {OUTPUT_DIR}")
print(f"\nüì¶ Includes:")
print(f"   - PyTorch models (.pth): {OUTPUT_DIR}/models/")
print(f"   - Architecture JSON: {OUTPUT_DIR}/architectures/")
print(f"   - Visualizations: {OUTPUT_DIR}/visualizations/")
print(f"   - Results: {OUTPUT_DIR}/results.jsonl")

# Display link to report
from IPython.display import display, HTML
display(HTML(f'<a href="{report_path}" target="_blank">üîó Open Full Report</a>'))

## üîç Load & Explore Saved Architectures

In [None]:
# Example: Load a specific architecture
arch_dir = os.path.join(OUTPUT_DIR, 'architectures')
arch_files = sorted([f for f in os.listdir(arch_dir) if f.endswith('.json')])

if arch_files:
    # Load first architecture
    arch_path = os.path.join(arch_dir, arch_files[0])
    loaded_arch = load_architecture_json(arch_path)
    
    print(f"Loaded: {arch_files[0]}")
    print(f"  Nodes: {len(loaded_arch.nodes)}")
    print(f"  Edges: {len(loaded_arch.edges)}")
    print(f"  Depth: {loaded_arch.depth}")
    print(f"  Operations: {list(set(loaded_arch.operations.values()))}")
    
    # Can create a model from it
    model = ConvNet(loaded_arch, num_classes=10)
    print(f"\n  Model created with {sum(p.numel() for p in model.parameters()):,} parameters")

## üéâ Demo Complete!

### What you got:
- ‚úÖ Trained DQN agent
- ‚úÖ Top architectures discovered
- ‚úÖ Models saved as .pth files
- ‚úÖ Architectures saved as .json files
- ‚úÖ Interactive HTML report
- ‚úÖ Visualizations of all architectures

### Next steps:
1. Open the HTML report to explore all results
2. Load saved models and use them for inference
3. Run again with different settings for better results
4. Use `train.py` for longer training runs

**Happy Architecture Hunting! üé®‚ú®**