# GeoSparse Mixed-Activation vs ReLU (psann==0.12.3)

This notebook compares a **mixed-activation GeoSparseRegressor**, a **GeoSparseRegressor with `relu_sigmoid_psann`**, and a **dense PSANNRegressor with ReLU** on a synthetic regression task.


In [None]:
# Install PyPI packages for Colab
%pip install -q psann==0.12.3 scikit-learn matplotlib


In [None]:
import random
import time
import numpy as np
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from psann import GeoSparseRegressor, PSANNRegressor, __version__ as psann_version

# --- settings ---
SEED = 2026
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
N_SAMPLES = 5000
N_FEATURES = 64
SHAPE = (8, 8)
TEST_SIZE = 0.2
VAL_SIZE = 0.1
EPOCHS = 160
BATCH_SIZE = 256
SEEDS = [0, 1]

print('psann', psann_version, '| torch', torch.__version__, '| device', DEVICE)

def set_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def synthetic_regression(seed: int, n_samples: int = N_SAMPLES, n_features: int = N_FEATURES):
    rng = np.random.default_rng(seed)
    X = rng.normal(size=(n_samples, n_features)).astype(np.float32)
    y = (
        1.7 * np.sin(2.2 * X[:, 0])
        + 0.9 * (X[:, 1] ** 2 - 0.5)
        + 0.65 * X[:, 2] * X[:, 3]
        + 0.4 * np.cos(3.1 * X[:, 4])
        + 0.2 * X[:, 5:8].sum(axis=1)
        + 0.15 * X[:, 8:12].sum(axis=1) * X[:, 12]
    ).astype(np.float32)
    y = y + 0.1 * rng.standard_normal(n_samples).astype(np.float32)
    return X, y

def split_and_scale(X, y, seed: int):
    X_train, X_tmp, y_train, y_tmp = train_test_split(
        X, y, test_size=TEST_SIZE + VAL_SIZE, random_state=seed
    )
    val_frac = VAL_SIZE / (TEST_SIZE + VAL_SIZE)
    X_val, X_test, y_val, y_test = train_test_split(
        X_tmp, y_tmp, test_size=1.0 - val_frac, random_state=seed
    )
    scaler = StandardScaler().fit(X_train)
    return (
        scaler.transform(X_train).astype(np.float32),
        scaler.transform(X_val).astype(np.float32),
        scaler.transform(X_test).astype(np.float32),
        y_train.astype(np.float32), y_val.astype(np.float32), y_test.astype(np.float32),
    )

def regression_metrics(y_true, y_pred):
    mse = float(mean_squared_error(y_true, y_pred))
    return dict(
        mse=mse,
        rmse=float(np.sqrt(mse)),
        mae=float(mean_absolute_error(y_true, y_pred)),
        r2=float(r2_score(y_true, y_pred)),
    )


In [None]:
COMMON_TRAIN_CFG = dict(
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    lr=1e-3,
    optimizer='adam',
    weight_decay=1e-4,
    device=DEVICE,
 )

def build_models(seed: int):
    return {
        'geosparse_mixed': GeoSparseRegressor(
            shape=SHAPE,
            hidden_layers=4,
            k=8,
            activation_type='mixed',
            activation={
                'activation_types': ['psann', 'relu', 'tanh'],
                'activation_ratios': [0.6, 0.25, 0.15],
                'layout': 'random',
                'mix_seed': 1000 + seed,
            },
            random_state=seed,
            **COMMON_TRAIN_CFG,
        ),
        'geosparse_relu_sigmoid_psann': GeoSparseRegressor(
            shape=SHAPE,
            hidden_layers=4,
            k=8,
            activation_type='relu_sigmoid_psann',
            activation={
                'slope_init': 1.0,
                'clip_max': 1.0,
            },
            random_state=seed,
            **COMMON_TRAIN_CFG,
        ),
        'dense_relu': PSANNRegressor(
            hidden_layers=4,
            hidden_units=64,
            activation_type='relu',
            random_state=seed,
            **COMMON_TRAIN_CFG,
        ),
    }

def run_once(seed: int):
    set_seed(seed)
    X, y = synthetic_regression(seed=seed)
    X_train, X_val, X_test, y_train, y_val, y_test = split_and_scale(X, y, seed=seed)

    rows = []
    for model_name, model in build_models(seed).items():
        start = time.perf_counter()
        model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            verbose=0,
        )
        elapsed = time.perf_counter() - start
        preds = model.predict(X_test)
        row = regression_metrics(y_test, np.asarray(preds, dtype=np.float32))
        row.update({'seed': seed, 'model': model_name, 'fit_time_s': float(elapsed)})
        rows.append(row)
        print(model_name, '| MSE:', f"{row['mse']:.4f}", '| R2:', f"{row['r2']:.4f}", '| fit(s):', f"{elapsed:.1f}")
    return rows

all_rows = []
for s in SEEDS:
    all_rows.extend(run_once(s))


In [None]:
results = pd.DataFrame(all_rows)
results


In [None]:
summary = results.groupby('model')[['mse', 'rmse', 'mae', 'r2', 'fit_time_s']].mean().round(4)\nsummary\n

In [None]:
ax = summary[['mse', 'rmse', 'mae']].plot(kind='bar', rot=0, title='Mean test metrics (lower is better)')
ax.set_ylabel('error')
r2_ax = summary['r2'].plot(kind='line', marker='o', secondary_y=True, color='black', label='r2')
r2_ax.set_ylabel('R^2')
ax.figure.tight_layout()
ax.figure
