# Layer Outputs PCA Visualization

This notebook visualizes the layer outputs from trained neural networks using PCA.
It works with the current project's output layer structure.

Each hidden layer output is reduced to 2D using Principal Component Analysis (PCA)
and plotted with classes shown in different colors.

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import pandas as pd

# Configuration
BASE_DIR = os.getcwd()
OUTPUT_LAYERS_DIR = os.path.join(BASE_DIR, "output_layers")
VISUALIZATION_DIR = "layer_visualizations"
os.makedirs(VISUALIZATION_DIR, exist_ok=True)

# Color map for classes (yellow and blue)
color_map = {0: '#FDD835', 1: '#536DFE'}

# Accuracy threshold for model selection
ACC_THRESHOLD = 0.98

print(f"Output layers directory: {OUTPUT_LAYERS_DIR}")
print(f"Visualization directory: {VISUALIZATION_DIR}")

In [None]:
def get_available_combinations():
    """Get list of available architecture-dataset combinations."""
    combinations = []
    if not os.path.exists(OUTPUT_LAYERS_DIR):
        return combinations
    
    for item in os.listdir(OUTPUT_LAYERS_DIR):
        item_path = os.path.join(OUTPUT_LAYERS_DIR, item)
        if os.path.isdir(item_path) and not item.startswith('.'):
            # Check if it has models directory
            for subitem in os.listdir(item_path):
                if subitem.startswith('models_'):
                    combinations.append(item)
                    break
    
    return sorted(combinations)

# List available combinations
combinations = get_available_combinations()
print(f"Found {len(combinations)} architecture-dataset combinations:")
for i, c in enumerate(combinations):
    print(f"  {i}: {c}")

In [None]:
def load_test_data(combo_dir):
    """Load test data and labels from the combo directory."""
    model_dir = None
    # Find the models directory (e.g., models_b70)
    for item in os.listdir(combo_dir):
        if item.startswith('models_'):
            model_dir = os.path.join(combo_dir, item)
            break
    
    if model_dir is None:
        raise FileNotFoundError(f"No models directory found in {combo_dir}")
    
    # Load test data
    x_test_path = os.path.join(model_dir, "x_test.csv")
    y_test_path = os.path.join(model_dir, "y_test.csv")
    
    X_test = pd.read_csv(x_test_path, header=None).values
    y_test = pd.read_csv(y_test_path, header=None).values.flatten().astype(int)
    
    colors = [color_map[label] for label in y_test]
    
    print(f"Test set shape: {X_test.shape}")
    print(f"Test labels shape: {y_test.shape}")
    print(f"Test labels distribution: {np.bincount(y_test)}")
    
    return X_test, y_test, colors, model_dir


def load_layer_outputs(model_dir, acc_threshold=ACC_THRESHOLD):
    """Load layer outputs from model_predict.npy, selecting a high-accuracy model."""
    model_predict_path = os.path.join(model_dir, "model_predict.npy")
    accuracy_path = os.path.join(model_dir, "accuracy.npy")
    
    # Load all models' layer outputs
    model_predict = np.load(model_predict_path, allow_pickle=True)
    
    # Load accuracy if available to select best model
    selected_model_idx = 0
    if os.path.exists(accuracy_path):
        accuracies = np.load(accuracy_path)
        # Find first model that passes threshold, or use best model
        passing = np.where(accuracies > acc_threshold)[0]
        if len(passing) > 0:
            selected_model_idx = passing[0]
        else:
            selected_model_idx = np.argmax(accuracies)
        print(f"Selected model #{selected_model_idx} with accuracy: {accuracies[selected_model_idx]:.4f}")
    
    # Get layer activations for selected model
    layer_outputs = {}
    activations = model_predict[selected_model_idx]
    
    for layer_num, layer_data in enumerate(activations):
        layer_outputs[layer_num + 1] = layer_data  # 1-indexed layers
    
    return layer_outputs

print("Helper functions defined!")

In [None]:
def visualize_layers(layer_outputs, colors, X_test, combo_name, save_filename=None, output_dir=None):
    """Visualize layer outputs using PCA."""
    if output_dir is None:
        output_dir = VISUALIZATION_DIR
    
    # Include input layer (layer 0) and all hidden layers
    all_layers = [0] + sorted(layer_outputs.keys())
    n_layers = len(all_layers)
    
    print(f"{combo_name}: {n_layers} layers to visualize")
    
    # Subplot layout
    cols = 3
    rows = (n_layers + cols - 1) // cols
    
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
    if rows == 1:
        axes = axes.reshape(1, -1)
    axes = axes.flatten()
    
    for idx, layer_num in enumerate(all_layers):
        ax = axes[idx]
        
        # Get data for this layer
        if layer_num == 0:
            X_layer = X_test
            layer_label = "Input Layer"
        else:
            X_layer = layer_outputs[layer_num]
            layer_label = f"Hidden Layer {layer_num}"
        
        # Apply PCA if needed
        if X_layer.shape[1] > 2:
            pca = PCA(n_components=2, random_state=42)
            X_pca = pca.fit_transform(X_layer)
            var_ratio = pca.explained_variance_ratio_.sum()
        else:
            X_pca = X_layer
            var_ratio = 1.0
        
        # Scatter plot
        ax.scatter(X_pca[:, 0], X_pca[:, 1], c=colors, s=10, alpha=0.7, edgecolors='none')
        
        ax.set_xlabel('PC 1', fontsize=10)
        ax.set_ylabel('PC 2', fontsize=10)
        ax.set_title(f'{layer_label} (dim: {X_layer.shape[1]})', fontsize=11, fontweight='bold')
        ax.grid(True, alpha=0.3)
        
        # Explained variance ratio
        ax.text(0.02, 0.98, f'Explained variance: {var_ratio:.2%}', 
                transform=ax.transAxes, fontsize=9, verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    # Remove extra subplots
    for idx in range(n_layers, len(axes)):
        fig.delaxes(axes[idx])
    
    title = combo_name.replace('_', ' ').title()
    plt.suptitle(f'{title} - Layer Visualizations', fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    
    if save_filename:
        save_path = os.path.join(output_dir, save_filename)
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"âœ… Visualization saved: {save_path}")
    
    plt.show()

print("Visualization function defined!")

## Select and Visualize a Combination

Choose which architecture-dataset combination to visualize by setting the `SELECTED_COMBO` variable below.

In [None]:
# === SELECT WHICH COMBINATION TO VISUALIZE ===
# Set to an index from the list above, or set to a specific name
# Examples:
#   SELECTED_COMBO = 0  (first combination)
#   SELECTED_COMBO = "narrow_5_synthetic_a"

SELECTED_COMBO = 0  # Change this to select different combinations

# Resolve the selection
if isinstance(SELECTED_COMBO, int):
    if SELECTED_COMBO < len(combinations):
        combo_name = combinations[SELECTED_COMBO]
    else:
        raise ValueError(f"Index {SELECTED_COMBO} out of range. Max: {len(combinations)-1}")
else:
    combo_name = SELECTED_COMBO

print(f"Selected: {combo_name}")

In [None]:
# Load data for selected combination
combo_dir = os.path.join(OUTPUT_LAYERS_DIR, combo_name)

X_test, y_test, colors, model_dir = load_test_data(combo_dir)
layer_outputs = load_layer_outputs(model_dir)

print(f"\nLoaded {len(layer_outputs)} hidden layers")

In [None]:
# Visualize
visualize_layers(
    layer_outputs, 
    colors, 
    X_test, 
    combo_name,
    save_filename=f"{combo_name}_visualization.png"
)

## Visualize All Combinations

Run the cell below to generate visualizations for ALL available combinations.

In [None]:
# Visualize all combinations
for combo_name in combinations:
    print(f"\n{'='*60}")
    print(f"Processing: {combo_name}")
    print(f"{'='*60}")
    
    try:
        combo_dir = os.path.join(OUTPUT_LAYERS_DIR, combo_name)
        X_test, y_test, colors, model_dir = load_test_data(combo_dir)
        layer_outputs = load_layer_outputs(model_dir)
        
        visualize_layers(
            layer_outputs, 
            colors, 
            X_test, 
            combo_name,
            save_filename=f"{combo_name}_visualization.png"
        )
    except Exception as e:
        print(f"Error processing {combo_name}: {e}")

print(f"\n{'='*60}")
print("Done! All visualizations saved.")
print(f"{'='*60}")