# üé® Creative Neural Architecture Search - Complete System

This notebook contains the full implementation organized into logical sections.

**Use this for:**
- Understanding the complete system
- Customizing components
- Running longer training sessions

---

## üì¶ Dependencies & Setup

In [None]:
# Install if needed
# !pip install torch torchvision torch-geometric networkx scipy matplotlib tqdm

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool, global_max_pool
from torch_geometric.data import Data
from torch.utils.data import DataLoader, Subset

import torchvision
import torchvision.transforms as transforms

import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import entropy

import os
import json
import copy
import random
from collections import deque
from datetime import datetime
from pathlib import Path
from tqdm import tqdm
from typing import List, Dict, Tuple, Optional

import warnings
warnings.filterwarnings('ignore')

print("‚úÖ All imports successful!")

## 1Ô∏è‚É£ Architecture Representation

Defines how we represent neural architectures as graphs.

In [None]:
# Fixed operation pool
OPERATION_POOL = [
    'conv3x3',
    'conv5x5',
    'sep_conv3x3',
    'sep_conv5x5',
    'max_pool3x3',
    'avg_pool3x3',
    'skip_connect'
]

OP_TO_IDX = {op: idx for idx, op in enumerate(OPERATION_POOL)}

# Action space
class ActionSpace:
    ADD_NODE = 0
    REMOVE_NODE = 1
    ADD_EDGE = 2
    REMOVE_EDGE = 3
    INCREASE_CHANNELS = 4
    DECREASE_CHANNELS = 5
    STOP_BUILDING = 6
    NUM_ACTIONS = 7

print(f"Operation pool: {OPERATION_POOL}")
print(f"Action space size: {ActionSpace.NUM_ACTIONS}")

In [None]:
# Full ArchitectureState class from architecture.py
# (Paste the complete class here or import from module)

# For notebook, we'll use a compact version that imports from the module
exec(open('architecture.py').read())

print("‚úÖ ArchitectureState class loaded")

### Test Architecture Creation

In [None]:
# Create a simple test architecture
test_arch = ArchitectureState.initialize_starter()

print("Test Architecture:")
print(f"  Nodes: {len(test_arch.nodes)}")
print(f"  Edges: {len(test_arch.edges)}")
print(f"  Depth: {test_arch.depth}")
print(f"  Operations: {list(test_arch.operations.values())}")
print(f"  Valid actions: {len(test_arch.get_valid_actions())}")

## 2Ô∏è‚É£ GNN Models

Graph neural network for encoding architectures.

In [None]:
# Load GNN models
exec(open('gnn_models.py').read())

print("‚úÖ GNN models loaded")

# Test GNN encoder
device = 'cuda' if torch.cuda.is_available() else 'cpu'
encoder = ArchitectureEncoder().to(device)
print(f"   Encoder parameters: {sum(p.numel() for p in encoder.parameters()):,}")

## 3Ô∏è‚É£ Novelty Metrics

Measures for topological and scale creativity.

In [None]:
# Load novelty metrics
exec(open('novelty.py').read())

print("‚úÖ Novelty metrics loaded")

# Test novelty computation
reward_fn = RewardFunction(alpha=0.5, beta=0.35, gamma=0.15)
test_reward, components = reward_fn.compute_reward(test_arch, performance=0.85)

print(f"\nTest reward components:")
for key, value in components.items():
    print(f"  {key}: {value:.4f}")

## 4Ô∏è‚É£ Architecture Evaluation

Convert architectures to PyTorch models and train them.

In [None]:
# Load evaluation code
exec(open('evaluation.py').read())

print("‚úÖ Evaluation code loaded")

### Test Model Creation

In [None]:
# Create a model from test architecture
test_model = ConvNet(test_arch, num_classes=10).to(device)

print(f"Model created:")
print(f"  Parameters: {sum(p.numel() for p in test_model.parameters()):,}")

# Test forward pass
test_input = torch.randn(2, 3, 32, 32).to(device)
test_output = test_model(test_input)
print(f"  Output shape: {test_output.shape}")

## 5Ô∏è‚É£ DQN Agent

Reinforcement learning agent that discovers architectures.

In [None]:
# Load agent code
exec(open('agent.py').read())

print("‚úÖ DQN agent loaded")

## 6Ô∏è‚É£ Utilities & Visualization

Functions for saving/loading and visualization.

In [None]:
# Load utils and visualization
exec(open('utils.py').read())
exec(open('visualize.py').read())

print("‚úÖ Utilities loaded")

## üöÄ Training Configuration

In [None]:
# ===== TRAINING CONFIGURATION =====

CONFIG = {
    'dataset': 'cifar10',        # 'mnist', 'fashion', 'cifar10'
    'episodes': 1000,            # Number of training episodes
    'eval_epochs': 3,            # Epochs per architecture during search
    'final_epochs': 20,          # Epochs for final evaluation
    'top_k': 20,                 # Number of top architectures to evaluate
    'device': device,
    'seed': 42
}

# Set seeds
torch.manual_seed(CONFIG['seed'])
if torch.cuda.is_available():
    torch.cuda.manual_seed(CONFIG['seed'])
np.random.seed(CONFIG['seed'])
random.seed(CONFIG['seed'])

# Output directory
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
OUTPUT_DIR = f'results/{CONFIG["dataset"]}_{timestamp}'
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")
print(f"\nOutput: {OUTPUT_DIR}")

## üéØ Initialize Agent & Train

In [None]:
# Initialize agent
agent = CreativityDQN(device=CONFIG['device'])

print(f"Agent initialized with {sum(p.numel() for p in agent.q_network.parameters()):,} parameters")

In [None]:
# Train agent
print(f"\nTraining for {CONFIG['episodes']} episodes...\n")

best_archs, stats = agent.train(
    num_episodes=CONFIG['episodes'],
    update_freq=10,
    eval_freq=50
)

print(f"\n‚úÖ Training complete! Found {len(best_archs)} architectures")

## üìä Visualize Training

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Episode rewards
axes[0, 0].plot(stats['episode_rewards'])
axes[0, 0].set_title('Episode Rewards')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('Reward')
axes[0, 0].grid(True)

# Training loss
if stats['losses']:
    axes[0, 1].plot(stats['losses'])
    axes[0, 1].set_title('Training Loss')
    axes[0, 1].set_xlabel('Update Step')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].grid(True)

# Epsilon decay
axes[1, 0].plot(stats['epsilons'])
axes[1, 0].set_title('Epsilon Decay')
axes[1, 0].set_xlabel('Episode')
axes[1, 0].set_ylabel('Epsilon')
axes[1, 0].grid(True)

# Reward distribution
axes[1, 1].hist(stats['episode_rewards'], bins=50)
axes[1, 1].set_title('Reward Distribution')
axes[1, 1].set_xlabel('Reward')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].grid(True)

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'training_stats.png'), dpi=150)
plt.show()

## üéì Final Evaluation

In [None]:
print(f"Evaluating top {CONFIG['top_k']} architectures...\n")

final_results = []

for i, arch_data in enumerate(best_archs[:CONFIG['top_k']]):
    arch = arch_data['architecture']
    search_reward = arch_data['reward']
    
    print(f"[{i+1}/{CONFIG['top_k']}]")
    print(f"  Nodes: {len(arch.nodes)}, Depth: {arch.depth}, Width: {arch.avg_width:.1f}")
    
    # Full training
    final_acc = train_architecture(
        arch,
        epochs=CONFIG['final_epochs'],
        device=CONFIG['device'],
        subset_size=None
    )
    
    print(f"  Accuracy: {final_acc:.4f}\n")
    
    final_results.append({
        'architecture': arch,
        'search_reward': search_reward,
        'final_accuracy': final_acc,
        'trajectory': arch_data.get('trajectory', [])
    })

print("‚úÖ Evaluation complete!")

## üíæ Save All Results

In [None]:
# Save results
save_all_results(OUTPUT_DIR, best_archs, final_results, stats, CONFIG['dataset'])

# Create visualizations
create_results_report(OUTPUT_DIR)

# Save agent
agent.save(os.path.join(OUTPUT_DIR, 'agent.pt'))

print(f"\n‚úÖ All results saved to: {OUTPUT_DIR}")

## üèÜ Results Summary

In [None]:
import pandas as pd

# Create summary table
summary_data = []
for i, result in enumerate(sorted(final_results, key=lambda x: x['final_accuracy'], reverse=True)):
    arch = result['architecture']
    summary_data.append({
        'Rank': i+1,
        'Accuracy': result['final_accuracy'],
        'Reward': result['search_reward'],
        'Nodes': len(arch.nodes),
        'Depth': arch.depth,
        'Avg_Width': arch.avg_width,
        'Parameters': arch.total_params,
        'Skip_Connections': arch.num_skip_connections
    })

df = pd.DataFrame(summary_data)

print("\n" + "="*80)
print("RESULTS SUMMARY")
print("="*80)
print(f"\nBest Accuracy: {df['Accuracy'].max():.4f}")
print(f"Average Accuracy: {df['Accuracy'].mean():.4f}")
print(f"Std Accuracy: {df['Accuracy'].std():.4f}")
print(f"\nAverage Depth: {df['Depth'].mean():.1f}")
print(f"Average Width: {df['Avg_Width'].mean():.1f}")
print(f"Average Parameters: {df['Parameters'].mean():,.0f}")

print("\nTop 10 Architectures:")
display(df.head(10))

## üé® Visualize Best Architecture

In [None]:
from IPython.display import Image, display

best = max(final_results, key=lambda x: x['final_accuracy'])
best_arch = best['architecture']

print("üèÜ BEST ARCHITECTURE")
print("="*60)
print(f"Accuracy: {best['final_accuracy']:.4f}")
print(f"Reward: {best['search_reward']:.4f}")
print(f"Nodes: {len(best_arch.nodes)}")
print(f"Depth: {best_arch.depth}")
print(f"Width: {best_arch.avg_width:.1f}")
print(f"Parameters: {best_arch.total_params:,}")
print("="*60)

# Visualize
viz_path = os.path.join(OUTPUT_DIR, 'best_architecture.png')
visualize_architecture(best_arch, save_path=viz_path,
                      title=f"Best Architecture (Acc: {best['final_accuracy']:.4f})")
display(Image(filename=viz_path))

## ‚úÖ Done!

All results saved to `{OUTPUT_DIR}/`

Open `report.html` to see the full interactive report!