# NAS from Scratch — Analysis Notebook

This notebook analyses the results of the Evolutionary Neural Architecture Search
for CIFAR-10. It loads outputs from `scripts/run_search.py` and provides:

1. **Fitness History** — best / mean / worst accuracy per generation
2. **Architecture Analysis** — inspect the best discovered genome
3. **Diversity Metrics** — depth and parameter distributions
4. **Saved Visualisations** — fitness curves, evolution tree, diversity plots

In [None]:
import json
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import torch

# Ensure project root is importable
PROJECT = Path(".").resolve().parent
if str(PROJECT) not in sys.path:
    sys.path.insert(0, str(PROJECT))

from src.genome import Genome
from src.builder import build_model, count_params
from src.search_space import SearchSpace

%matplotlib inline
plt.rcParams.update({"figure.figsize": (10, 5), "font.size": 12})

## 1. Load Results

In [None]:
OUT = PROJECT / "outputs"

# Fitness history
with open(OUT / "logs" / "fitness_history.json") as f:
    fitness_history = json.load(f)

# All genomes (full lineage)
with open(OUT / "logs" / "all_genomes.json") as f:
    all_genomes_raw = json.load(f)
all_genomes = [Genome.from_dict(g) for g in all_genomes_raw]

# Best genome
best_genome = Genome.load(OUT / "best_genome.json")

print(f"Generations logged: {len(fitness_history)}")
print(f"Total genomes evaluated: {len(all_genomes)}")
print(f"Best genome: {best_genome.summary()}")

## 2. Fitness Curve

In [None]:
gens = [h["generation"] for h in fitness_history]
bests = [h["best"] for h in fitness_history]
means = [h["mean"] for h in fitness_history]
worsts = [h["worst"] for h in fitness_history]

fig, ax = plt.subplots()
ax.plot(gens, bests, "o-", label="Best", color="#2ecc71", lw=2)
ax.plot(gens, means, "s--", label="Mean", color="#3498db", lw=1.5)
ax.plot(gens, worsts, "^:", label="Worst", color="#e74c3c", lw=1)
ax.fill_between(gens, worsts, bests, alpha=0.12, color="#3498db")
ax.set_xlabel("Generation")
ax.set_ylabel("Validation Accuracy")
ax.set_title("Evolutionary NAS — Fitness Curve")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Best Architecture

In [None]:
print("Best discovered architecture:\n")
print(f"  ID:         {best_genome.id}")
print(f"  Generation: {best_genome.generation}")
print(f"  Fitness:    {best_genome.fitness:.4f}")
print(f"  Depth:      {best_genome.depth} layers")
print(f"  Skips:      {len(best_genome.skip_connections)}")
print()
print("Layers:")
for i, layer in enumerate(best_genome.layers):
    params_str = ", ".join(f"{k}={v}" for k, v in layer.params.items())
    print(f"  [{i}] {layer.layer_type:10s}  {params_str}")

In [None]:
# Build the model and print summary
model = build_model(best_genome, num_classes=10)
n_params = count_params(model)
print(f"Total trainable parameters: {n_params:,}")
print(f"\nModel architecture:\n")
print(model)

## 4. Population Diversity

In [None]:
# Analyse depth and param distributions across all evaluated architectures
depths = [g.depth for g in all_genomes]
fitnesses = [g.fitness or 0.0 for g in all_genomes]

param_counts = []
for g in all_genomes:
    try:
        m = build_model(g, num_classes=10)
        param_counts.append(count_params(m))
    except Exception:
        param_counts.append(0)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Depth distribution
axes[0].hist(depths, bins=range(min(depths), max(depths) + 2),
             color="#3498db", edgecolor="white", alpha=0.8)
axes[0].set_xlabel("Depth (# layers)")
axes[0].set_ylabel("Count")
axes[0].set_title("Architecture Depth Distribution")

# Fitness distribution
axes[1].hist(fitnesses, bins=20, color="#2ecc71", edgecolor="white", alpha=0.8)
axes[1].set_xlabel("Validation Accuracy")
axes[1].set_ylabel("Count")
axes[1].set_title("Fitness Distribution")

# Fitness vs params scatter
axes[2].scatter(param_counts, fitnesses, alpha=0.5, s=20, c="#e74c3c")
axes[2].set_xlabel("Parameter Count")
axes[2].set_ylabel("Validation Accuracy")
axes[2].set_title("Fitness vs. Model Size")

for ax in axes:
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Generation-by-Generation Statistics

In [None]:
import pandas as pd

df = pd.DataFrame(fitness_history)
df["improvement"] = df["best"].diff().fillna(0)
df.style.format({
    "best": "{:.4f}",
    "mean": "{:.4f}",
    "worst": "{:.4f}",
    "improvement": "{:+.4f}",
})

## 6. Layer Type Usage

In [None]:
from collections import Counter

# Layer type frequency across ALL architectures
all_types = [layer.layer_type for g in all_genomes for layer in g.layers]
type_counts = Counter(all_types)

# Layer type frequency in TOP 25% architectures
sorted_genomes = sorted(all_genomes, key=lambda g: g.fitness or 0.0, reverse=True)
top_25 = sorted_genomes[:max(1, len(sorted_genomes) // 4)]
top_types = [layer.layer_type for g in top_25 for layer in g.layers]
top_counts = Counter(top_types)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

labels = sorted(type_counts.keys())
ax1.barh(labels, [type_counts[l] for l in labels], color="#3498db", alpha=0.8)
ax1.set_title("All Architectures — Layer Type Usage")
ax1.set_xlabel("Count")

ax2.barh(labels, [top_counts.get(l, 0) for l in labels], color="#2ecc71", alpha=0.8)
ax2.set_title("Top 25% — Layer Type Usage")
ax2.set_xlabel("Count")

plt.tight_layout()
plt.show()

## 7. Saved Plots

The search script also saved these visualisations:

In [None]:
from IPython.display import Image, display

plots = list((OUT / "plots").glob("*.png"))
for p in sorted(plots):
    print(f"\n── {p.name} ──")
    display(Image(filename=str(p), width=700))

## 8. Next Steps

- **Full training**: Run `python scripts/train_best.py` to fully train the best architecture (100+ epochs)
- **ResNet comparison**: Run `python scripts/compare_resnet.py` to compare NAS-best vs ResNet-18
- **Longer search**: Try `python scripts/run_search.py` with larger population / more generations
- **Enable features**: Turn on `weight_inheritance` and `predictor` in configs for better search efficiency